cloud-hypervisor/virtio-devices/src/vsock/packet.rs
Rob Bradford 2a6eb31d5b vm-virtio, virtio-devices: Split device implementation from virt queues
Split the generic virtio code (queues and device type) from the
VirtioDevice trait, transport and device implementations.

This also simplifies the feature handling in vhost_user_backend as the
vm-virtio crate is no longer has any features.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
2020-07-02 17:09:28 +01:00

655 lines
23 KiB
Rust

// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
/// `VsockPacket` provides a thin wrapper over the buffers exchanged via virtio queues.
/// There are two components to a vsock packet, each using its own descriptor in a
/// virtio queue:
/// - the packet header; and
/// - the packet data/buffer.
/// There is a 1:1 relation between descriptor chains and packets: the first (chain head) holds
/// the header, and an optional second descriptor holds the data. The second descriptor is only
/// present for data packets (VSOCK_OP_RW).
///
/// `VsockPacket` wraps these two buffers and provides direct access to the data stored
/// in guest memory. This is done to avoid unnecessarily copying data from guest memory
/// to temporary buffers, before passing it on to the vsock backend.
///
use byteorder::{ByteOrder, LittleEndian};
use super::super::DescriptorChain;
use super::defs;
use super::{Result, VsockError};
use vfio_ioctls::get_host_address_range;
// The vsock packet header is defined by the C struct:
//
// ```C
// struct virtio_vsock_hdr {
// le64 src_cid;
// le64 dst_cid;
// le32 src_port;
// le32 dst_port;
// le32 len;
// le16 type;
// le16 op;
// le32 flags;
// le32 buf_alloc;
// le32 fwd_cnt;
// };
// ```
//
// This structed will occupy the buffer pointed to by the head descriptor. We'll be accessing it
// as a byte slice. To that end, we define below the offsets for each field struct, as well as the
// packed struct size, as a bunch of `usize` consts.
// Note that these offsets are only used privately by the `VsockPacket` struct, the public interface
// consisting of getter and setter methods, for each struct field, that will also handle the correct
// endianess.
/// The vsock packet header struct size (when packed).
pub const VSOCK_PKT_HDR_SIZE: usize = 44;
// Source CID.
const HDROFF_SRC_CID: usize = 0;
// Destination CID.
const HDROFF_DST_CID: usize = 8;
// Source port.
const HDROFF_SRC_PORT: usize = 16;
// Destination port.
const HDROFF_DST_PORT: usize = 20;
// Data length (in bytes) - may be 0, if there is no data buffer.
const HDROFF_LEN: usize = 24;
// Socket type. Currently, only connection-oriented streams are defined by the vsock protocol.
const HDROFF_TYPE: usize = 28;
// Operation ID - one of the VSOCK_OP_* values; e.g.
// - VSOCK_OP_RW: a data packet;
// - VSOCK_OP_REQUEST: connection request;
// - VSOCK_OP_RST: forcefull connection termination;
// etc (see `super::defs::uapi` for the full list).
const HDROFF_OP: usize = 30;
// Additional options (flags) associated with the current operation (`op`).
// Currently, only used with shutdown requests (VSOCK_OP_SHUTDOWN).
const HDROFF_FLAGS: usize = 32;
// Size (in bytes) of the packet sender receive buffer (for the connection to which this packet
// belongs).
const HDROFF_BUF_ALLOC: usize = 36;
// Number of bytes the sender has received and consumed (for the connection to which this packet
// belongs). For instance, for our Unix backend, this counter would be the total number of bytes
// we have successfully written to a backing Unix socket.
const HDROFF_FWD_CNT: usize = 40;
/// The vsock packet, implemented as a wrapper over a virtq descriptor chain:
/// - the chain head, holding the packet header; and
/// - (an optional) data/buffer descriptor, only present for data packets (VSOCK_OP_RW).
///
pub struct VsockPacket {
hdr: *mut u8,
buf: Option<*mut u8>,
buf_size: usize,
}
impl VsockPacket {
/// Create the packet wrapper from a TX virtq chain head.
///
/// The chain head is expected to hold valid packet header data. A following packet buffer
/// descriptor can optionally end the chain. Bounds and pointer checks are performed when
/// creating the wrapper.
///
pub fn from_tx_virtq_head(head: &DescriptorChain) -> Result<Self> {
// All buffers in the TX queue must be readable.
//
if head.is_write_only() {
return Err(VsockError::UnreadableDescriptor);
}
// The packet header should fit inside the head descriptor.
if head.len < VSOCK_PKT_HDR_SIZE as u32 {
return Err(VsockError::HdrDescTooSmall(head.len));
}
let mut pkt = Self {
hdr: get_host_address_range(head.mem, head.addr, VSOCK_PKT_HDR_SIZE)
.ok_or_else(|| VsockError::GuestMemory)? as *mut u8,
buf: None,
buf_size: 0,
};
// No point looking for a data/buffer descriptor, if the packet is zero-lengthed.
if pkt.is_empty() {
return Ok(pkt);
}
// Reject weirdly-sized packets.
//
if pkt.len() > defs::MAX_PKT_BUF_SIZE as u32 {
return Err(VsockError::InvalidPktLen(pkt.len()));
}
// If the packet header showed a non-zero length, there should be a data descriptor here.
let buf_desc = head.next_descriptor().ok_or(VsockError::BufDescMissing)?;
// TX data should be read-only.
if buf_desc.is_write_only() {
return Err(VsockError::UnreadableDescriptor);
}
// The data buffer should be large enough to fit the size of the data, as described by
// the header descriptor.
if buf_desc.len < pkt.len() {
return Err(VsockError::BufDescTooSmall);
}
pkt.buf_size = buf_desc.len as usize;
pkt.buf = Some(
get_host_address_range(buf_desc.mem, buf_desc.addr, pkt.buf_size)
.ok_or_else(|| VsockError::GuestMemory)? as *mut u8,
);
Ok(pkt)
}
/// Create the packet wrapper from an RX virtq chain head.
///
/// There must be two descriptors in the chain, both writable: a header descriptor and a data
/// descriptor. Bounds and pointer checks are performed when creating the wrapper.
///
pub fn from_rx_virtq_head(head: &DescriptorChain) -> Result<Self> {
// All RX buffers must be writable.
//
if !head.is_write_only() {
return Err(VsockError::UnwritableDescriptor);
}
// The packet header should fit inside the head descriptor.
if head.len < VSOCK_PKT_HDR_SIZE as u32 {
return Err(VsockError::HdrDescTooSmall(head.len));
}
// All RX descriptor chains should have a header and a data descriptor.
if !head.has_next() {
return Err(VsockError::BufDescMissing);
}
let buf_desc = head.next_descriptor().ok_or(VsockError::BufDescMissing)?;
let buf_size = buf_desc.len as usize;
Ok(Self {
hdr: get_host_address_range(head.mem, head.addr, VSOCK_PKT_HDR_SIZE)
.ok_or_else(|| VsockError::GuestMemory)? as *mut u8,
buf: Some(
get_host_address_range(buf_desc.mem, buf_desc.addr, buf_size)
.ok_or_else(|| VsockError::GuestMemory)? as *mut u8,
),
buf_size,
})
}
/// Provides in-place, byte-slice, access to the vsock packet header.
///
pub fn hdr(&self) -> &[u8] {
// This is safe since bound checks have already been performed when creating the packet
// from the virtq descriptor.
unsafe { std::slice::from_raw_parts(self.hdr as *const u8, VSOCK_PKT_HDR_SIZE) }
}
/// Provides in-place, byte-slice, mutable access to the vsock packet header.
///
pub fn hdr_mut(&mut self) -> &mut [u8] {
// This is safe since bound checks have already been performed when creating the packet
// from the virtq descriptor.
unsafe { std::slice::from_raw_parts_mut(self.hdr, VSOCK_PKT_HDR_SIZE) }
}
/// Provides in-place, byte-slice access to the vsock packet data buffer.
///
/// Note: control packets (e.g. connection request or reset) have no data buffer associated.
/// For those packets, this method will return `None`.
/// Also note: calling `len()` on the returned slice will yield the buffer size, which may be
/// (and often is) larger than the length of the packet data. The packet data length
/// is stored in the packet header, and accessible via `VsockPacket::len()`.
pub fn buf(&self) -> Option<&[u8]> {
self.buf.map(|ptr| {
// This is safe since bound checks have already been performed when creating the packet
// from the virtq descriptor.
unsafe { std::slice::from_raw_parts(ptr as *const u8, self.buf_size) }
})
}
/// Provides in-place, byte-slice, mutable access to the vsock packet data buffer.
///
/// Note: control packets (e.g. connection request or reset) have no data buffer associated.
/// For those packets, this method will return `None`.
/// Also note: calling `len()` on the returned slice will yield the buffer size, which may be
/// (and often is) larger than the length of the packet data. The packet data length
/// is stored in the packet header, and accessible via `VsockPacket::len()`.
pub fn buf_mut(&mut self) -> Option<&mut [u8]> {
self.buf.map(|ptr| {
// This is safe since bound checks have already been performed when creating the packet
// from the virtq descriptor.
unsafe { std::slice::from_raw_parts_mut(ptr, self.buf_size) }
})
}
pub fn src_cid(&self) -> u64 {
LittleEndian::read_u64(&self.hdr()[HDROFF_SRC_CID..])
}
pub fn set_src_cid(&mut self, cid: u64) -> &mut Self {
LittleEndian::write_u64(&mut self.hdr_mut()[HDROFF_SRC_CID..], cid);
self
}
pub fn dst_cid(&self) -> u64 {
LittleEndian::read_u64(&self.hdr()[HDROFF_DST_CID..])
}
pub fn set_dst_cid(&mut self, cid: u64) -> &mut Self {
LittleEndian::write_u64(&mut self.hdr_mut()[HDROFF_DST_CID..], cid);
self
}
pub fn src_port(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_SRC_PORT..])
}
pub fn set_src_port(&mut self, port: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_SRC_PORT..], port);
self
}
pub fn dst_port(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_DST_PORT..])
}
pub fn set_dst_port(&mut self, port: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_DST_PORT..], port);
self
}
pub fn len(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_LEN..])
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn set_len(&mut self, len: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_LEN..], len);
self
}
pub fn type_(&self) -> u16 {
LittleEndian::read_u16(&self.hdr()[HDROFF_TYPE..])
}
pub fn set_type(&mut self, type_: u16) -> &mut Self {
LittleEndian::write_u16(&mut self.hdr_mut()[HDROFF_TYPE..], type_);
self
}
pub fn op(&self) -> u16 {
LittleEndian::read_u16(&self.hdr()[HDROFF_OP..])
}
pub fn set_op(&mut self, op: u16) -> &mut Self {
LittleEndian::write_u16(&mut self.hdr_mut()[HDROFF_OP..], op);
self
}
pub fn flags(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_FLAGS..])
}
pub fn set_flags(&mut self, flags: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_FLAGS..], flags);
self
}
pub fn set_flag(&mut self, flag: u32) -> &mut Self {
self.set_flags(self.flags() | flag);
self
}
pub fn buf_alloc(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_BUF_ALLOC..])
}
pub fn set_buf_alloc(&mut self, buf_alloc: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_BUF_ALLOC..], buf_alloc);
self
}
pub fn fwd_cnt(&self) -> u32 {
LittleEndian::read_u32(&self.hdr()[HDROFF_FWD_CNT..])
}
pub fn set_fwd_cnt(&mut self, fwd_cnt: u32) -> &mut Self {
LittleEndian::write_u32(&mut self.hdr_mut()[HDROFF_FWD_CNT..], fwd_cnt);
self
}
}
#[cfg(test)]
mod tests {
use super::super::tests::TestContext;
use super::*;
use crate::vsock::defs::MAX_PKT_BUF_SIZE;
use vm_memory::{GuestAddress, GuestMemoryMmap};
use vm_virtio::queue::testing::VirtqDesc as GuestQDesc;
use vm_virtio::queue::VIRTQ_DESC_F_WRITE;
macro_rules! create_context {
($test_ctx:ident, $handler_ctx:ident) => {
let $test_ctx = TestContext::new();
let mut $handler_ctx = $test_ctx.create_epoll_handler_context();
// For TX packets, hdr.len should be set to a valid value.
set_pkt_len(1024, &$handler_ctx.guest_txvq.dtable[0], &$test_ctx.mem);
};
}
macro_rules! expect_asm_error {
(tx, $test_ctx:expr, $handler_ctx:expr, $err:pat) => {
expect_asm_error!($test_ctx, $handler_ctx, $err, from_tx_virtq_head, 1);
};
(rx, $test_ctx:expr, $handler_ctx:expr, $err:pat) => {
expect_asm_error!($test_ctx, $handler_ctx, $err, from_rx_virtq_head, 0);
};
($test_ctx:expr, $handler_ctx:expr, $err:pat, $ctor:ident, $vq:expr) => {
match VsockPacket::$ctor(
&$handler_ctx.handler.queues[$vq]
.iter(&$test_ctx.mem)
.next()
.unwrap(),
) {
Err($err) => (),
Ok(_) => panic!("Packet assembly should've failed!"),
Err(other) => panic!("Packet assembly failed with: {:?}", other),
}
};
}
fn set_pkt_len(len: u32, guest_desc: &GuestQDesc, mem: &GuestMemoryMmap) {
let hdr_gpa = guest_desc.addr.get();
let hdr_ptr = get_host_address_range(mem, GuestAddress(hdr_gpa), VSOCK_PKT_HDR_SIZE)
.unwrap() as *mut u8;
let len_ptr = unsafe { hdr_ptr.add(HDROFF_LEN) };
LittleEndian::write_u32(unsafe { std::slice::from_raw_parts_mut(len_ptr, 4) }, len);
}
#[test]
#[allow(clippy::cognitive_complexity)]
fn test_tx_packet_assembly() {
// Test case: successful TX packet assembly.
{
create_context!(test_ctx, handler_ctx);
let pkt = VsockPacket::from_tx_virtq_head(
&handler_ctx.handler.queues[1]
.iter(&test_ctx.mem)
.next()
.unwrap(),
)
.unwrap();
assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE);
assert_eq!(
pkt.buf().unwrap().len(),
handler_ctx.guest_txvq.dtable[1].len.get() as usize
);
}
// Test case: error on write-only hdr descriptor.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_txvq.dtable[0]
.flags
.set(VIRTQ_DESC_F_WRITE);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::UnreadableDescriptor);
}
// Test case: header descriptor has insufficient space to hold the packet header.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_txvq.dtable[0]
.len
.set(VSOCK_PKT_HDR_SIZE as u32 - 1);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::HdrDescTooSmall(_));
}
// Test case: zero-length TX packet.
{
create_context!(test_ctx, handler_ctx);
set_pkt_len(0, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem);
let mut pkt = VsockPacket::from_tx_virtq_head(
&handler_ctx.handler.queues[1]
.iter(&test_ctx.mem)
.next()
.unwrap(),
)
.unwrap();
assert!(pkt.buf().is_none());
assert!(pkt.buf_mut().is_none());
}
// Test case: TX packet has more data than we can handle.
{
create_context!(test_ctx, handler_ctx);
set_pkt_len(
MAX_PKT_BUF_SIZE as u32 + 1,
&handler_ctx.guest_txvq.dtable[0],
&test_ctx.mem,
);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::InvalidPktLen(_));
}
// Test case:
// - packet header advertises some data length; and
// - the data descriptor is missing.
{
create_context!(test_ctx, handler_ctx);
set_pkt_len(1024, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem);
handler_ctx.guest_txvq.dtable[0].flags.set(0);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::BufDescMissing);
}
// Test case: error on write-only buf descriptor.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_txvq.dtable[1]
.flags
.set(VIRTQ_DESC_F_WRITE);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::UnreadableDescriptor);
}
// Test case: the buffer descriptor cannot fit all the data advertised by the the
// packet header `len` field.
{
create_context!(test_ctx, handler_ctx);
set_pkt_len(8 * 1024, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem);
handler_ctx.guest_txvq.dtable[1].len.set(4 * 1024);
expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::BufDescTooSmall);
}
}
#[test]
fn test_rx_packet_assembly() {
// Test case: successful RX packet assembly.
{
create_context!(test_ctx, handler_ctx);
let pkt = VsockPacket::from_rx_virtq_head(
&handler_ctx.handler.queues[0]
.iter(&test_ctx.mem)
.next()
.unwrap(),
)
.unwrap();
assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE);
assert_eq!(
pkt.buf().unwrap().len(),
handler_ctx.guest_rxvq.dtable[1].len.get() as usize
);
}
// Test case: read-only RX packet header.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_rxvq.dtable[0].flags.set(0);
expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::UnwritableDescriptor);
}
// Test case: RX descriptor head cannot fit the entire packet header.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_rxvq.dtable[0]
.len
.set(VSOCK_PKT_HDR_SIZE as u32 - 1);
expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::HdrDescTooSmall(_));
}
// Test case: RX descriptor chain is missing the packet buffer descriptor.
{
create_context!(test_ctx, handler_ctx);
handler_ctx.guest_rxvq.dtable[0]
.flags
.set(VIRTQ_DESC_F_WRITE);
expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::BufDescMissing);
}
}
#[test]
#[allow(clippy::cognitive_complexity)]
fn test_packet_hdr_accessors() {
const SRC_CID: u64 = 1;
const DST_CID: u64 = 2;
const SRC_PORT: u32 = 3;
const DST_PORT: u32 = 4;
const LEN: u32 = 5;
const TYPE: u16 = 6;
const OP: u16 = 7;
const FLAGS: u32 = 8;
const BUF_ALLOC: u32 = 9;
const FWD_CNT: u32 = 10;
create_context!(test_ctx, handler_ctx);
let mut pkt = VsockPacket::from_rx_virtq_head(
&handler_ctx.handler.queues[0]
.iter(&test_ctx.mem)
.next()
.unwrap(),
)
.unwrap();
// Test field accessors.
pkt.set_src_cid(SRC_CID)
.set_dst_cid(DST_CID)
.set_src_port(SRC_PORT)
.set_dst_port(DST_PORT)
.set_len(LEN)
.set_type(TYPE)
.set_op(OP)
.set_flags(FLAGS)
.set_buf_alloc(BUF_ALLOC)
.set_fwd_cnt(FWD_CNT);
assert_eq!(pkt.src_cid(), SRC_CID);
assert_eq!(pkt.dst_cid(), DST_CID);
assert_eq!(pkt.src_port(), SRC_PORT);
assert_eq!(pkt.dst_port(), DST_PORT);
assert_eq!(pkt.len(), LEN);
assert_eq!(pkt.type_(), TYPE);
assert_eq!(pkt.op(), OP);
assert_eq!(pkt.flags(), FLAGS);
assert_eq!(pkt.buf_alloc(), BUF_ALLOC);
assert_eq!(pkt.fwd_cnt(), FWD_CNT);
// Test individual flag setting.
let flags = pkt.flags() | 0b1000;
pkt.set_flag(0b1000);
assert_eq!(pkt.flags(), flags);
// Test packet header as-slice access.
//
assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE);
assert_eq!(
SRC_CID,
LittleEndian::read_u64(&pkt.hdr()[HDROFF_SRC_CID..])
);
assert_eq!(
DST_CID,
LittleEndian::read_u64(&pkt.hdr()[HDROFF_DST_CID..])
);
assert_eq!(
SRC_PORT,
LittleEndian::read_u32(&pkt.hdr()[HDROFF_SRC_PORT..])
);
assert_eq!(
DST_PORT,
LittleEndian::read_u32(&pkt.hdr()[HDROFF_DST_PORT..])
);
assert_eq!(LEN, LittleEndian::read_u32(&pkt.hdr()[HDROFF_LEN..]));
assert_eq!(TYPE, LittleEndian::read_u16(&pkt.hdr()[HDROFF_TYPE..]));
assert_eq!(OP, LittleEndian::read_u16(&pkt.hdr()[HDROFF_OP..]));
assert_eq!(FLAGS, LittleEndian::read_u32(&pkt.hdr()[HDROFF_FLAGS..]));
assert_eq!(
BUF_ALLOC,
LittleEndian::read_u32(&pkt.hdr()[HDROFF_BUF_ALLOC..])
);
assert_eq!(
FWD_CNT,
LittleEndian::read_u32(&pkt.hdr()[HDROFF_FWD_CNT..])
);
assert_eq!(pkt.hdr_mut().len(), VSOCK_PKT_HDR_SIZE);
for b in pkt.hdr_mut() {
*b = 0;
}
assert_eq!(pkt.src_cid(), 0);
assert_eq!(pkt.dst_cid(), 0);
assert_eq!(pkt.src_port(), 0);
assert_eq!(pkt.dst_port(), 0);
assert_eq!(pkt.len(), 0);
assert_eq!(pkt.type_(), 0);
assert_eq!(pkt.op(), 0);
assert_eq!(pkt.flags(), 0);
assert_eq!(pkt.buf_alloc(), 0);
assert_eq!(pkt.fwd_cnt(), 0);
}
#[test]
fn test_packet_buf() {
create_context!(test_ctx, handler_ctx);
let mut pkt = VsockPacket::from_rx_virtq_head(
&handler_ctx.handler.queues[0]
.iter(&test_ctx.mem)
.next()
.unwrap(),
)
.unwrap();
assert_eq!(
pkt.buf().unwrap().len(),
handler_ctx.guest_rxvq.dtable[1].len.get() as usize
);
assert_eq!(
pkt.buf_mut().unwrap().len(),
handler_ctx.guest_rxvq.dtable[1].len.get() as usize
);
for i in 0..pkt.buf().unwrap().len() {
pkt.buf_mut().unwrap()[i] = (i % 0x100) as u8;
assert_eq!(pkt.buf().unwrap()[i], (i % 0x100) as u8);
}
}
}