vfio_user: Implement client side support

Implement (most) of the client side (i.e. VMM side) of the vfio-user
protocol:

https://github.com/nutanix/libvfio-user/blob/master/docs/vfio-user.rst

Items that are not implemented (because they are optimisations or unused
due to alternative solutions:

* VFIO_USER_DMA_READ/WRITE - this is a way for the server to read guest
  memory if the guest memory is not shared by fd where the client
  doesn't support it. However since we do support sharing the memory by
  fd this is not required.
* VFIO_USER_GET_REGION_IO_FDS - an optimisation to bypass the VMM by
  having KVM talk directly to the backend using ioregionfd
* VFIO_USER_DIRTY_PAGES - for the implementation of live migration

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
Rob Bradford 2021-06-04 11:42:36 +00:00
parent 77e147f333
commit 2e236c53c8
4 changed files with 743 additions and 20 deletions

67
Cargo.lock generated
View File

@ -6,7 +6,7 @@ version = 3
name = "acpi_tables"
version = "0.1.0"
dependencies = [
"vm-memory",
"vm-memory 0.6.0",
]
[[package]]
@ -78,7 +78,7 @@ dependencies = [
"versionize",
"versionize_derive",
"vm-fdt",
"vm-memory",
"vm-memory 0.6.0",
"vm-migration",
"vmm-sys-util",
]
@ -142,7 +142,7 @@ dependencies = [
"versionize",
"versionize_derive",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vm-virtio",
"vmm-sys-util",
]
@ -209,7 +209,7 @@ dependencies = [
"signal-hook",
"test_infra",
"thiserror",
"vm-memory",
"vm-memory 0.6.0",
"vmm",
"vmm-sys-util",
"wait-timeout",
@ -255,7 +255,7 @@ dependencies = [
"versionize",
"versionize_derive",
"vm-device",
"vm-memory",
"vm-memory 0.6.0",
"vm-migration",
"vmm-sys-util",
]
@ -397,7 +397,7 @@ dependencies = [
"serde_derive",
"serde_json",
"thiserror",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
@ -501,7 +501,7 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c819cc8275b0f2c1ed9feec455ca288b45d82932384a6a5f7a86812ee3427459"
dependencies = [
"vm-memory",
"vm-memory 0.6.0",
]
[[package]]
@ -592,7 +592,7 @@ dependencies = [
"versionize",
"versionize_derive",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vm-virtio",
"vmm-sys-util",
]
@ -663,7 +663,7 @@ dependencies = [
"vfio-ioctls",
"vm-allocator",
"vm-device",
"vm-memory",
"vm-memory 0.6.0",
"vm-migration",
"vmm-sys-util",
]
@ -1116,7 +1116,23 @@ dependencies = [
"kvm-ioctls",
"log",
"vfio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
[[package]]
name = "vfio_user"
version = "0.1.0"
dependencies = [
"anyhow",
"libc",
"log",
"serde",
"serde_derive",
"serde_json",
"thiserror",
"vfio-bindings",
"vm-memory 0.5.0",
"vmm-sys-util",
]
@ -1128,7 +1144,7 @@ checksum = "8a6b90237e10f1a61b35fba73885c3567e1a5a8c40d44daae335f7710210a7dc"
dependencies = [
"bitflags",
"libc",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
@ -1141,7 +1157,7 @@ dependencies = [
"log",
"vhost",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vm-virtio",
"vmm-sys-util",
]
@ -1161,7 +1177,7 @@ dependencies = [
"vhost",
"vhost_user_backend",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
@ -1179,7 +1195,7 @@ dependencies = [
"vhost",
"vhost_user_backend",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
@ -1216,7 +1232,7 @@ dependencies = [
"virtio-bindings",
"vm-allocator",
"vm-device",
"vm-memory",
"vm-memory 0.6.0",
"vm-migration",
"vm-virtio",
"vmm-sys-util",
@ -1228,7 +1244,7 @@ version = "0.1.0"
dependencies = [
"arch",
"libc",
"vm-memory",
"vm-memory 0.6.0",
]
[[package]]
@ -1241,7 +1257,7 @@ dependencies = [
"serde_json",
"thiserror",
"vfio-ioctls",
"vm-memory",
"vm-memory 0.6.0",
"vmm-sys-util",
]
@ -1250,6 +1266,17 @@ name = "vm-fdt"
version = "0.1.0"
source = "git+https://github.com/rust-vmm/vm-fdt?branch=master#af59838e5df826cd3153d92ed1546f0b2cc454f7"
[[package]]
name = "vm-memory"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "625f401b1b8b3ac3d43f53903cd138cfe840bd985f8581e553027b31d2bb8ae8"
dependencies = [
"arc-swap",
"libc",
"winapi",
]
[[package]]
name = "vm-memory"
version = "0.6.0"
@ -1272,7 +1299,7 @@ dependencies = [
"thiserror",
"versionize",
"versionize_derive",
"vm-memory",
"vm-memory 0.6.0",
]
[[package]]
@ -1281,7 +1308,7 @@ version = "0.1.0"
dependencies = [
"log",
"virtio-bindings",
"vm-memory",
"vm-memory 0.6.0",
]
[[package]]
@ -1322,7 +1349,7 @@ dependencies = [
"virtio-devices",
"vm-allocator",
"vm-device",
"vm-memory",
"vm-memory 0.6.0",
"vm-migration",
"vm-virtio",
"vmm-sys-util",

View File

@ -77,6 +77,7 @@ members = [
"pci",
"qcow",
"rate_limiter",
"vfio_user",
"vhost_user_backend",
"vhost_user_block",
"vhost_user_net",

20
vfio_user/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "vfio_user"
version = "0.1.0"
authors = ["The Cloud Hypervisor Authors"]
edition = "2018"
[dependencies]
anyhow = "1.0.42"
libc = "0.2.95"
log = "0.4.14"
serde = {version = ">=1.0.27", features = ["rc"] }
serde_derive = ">=1.0.27"
serde_json = ">=1.0.9"
thiserror = "1.0.26"
vm-memory = { version = "0.5.0", features = ["backend-mmap", "backend-atomic"] }
vmm-sys-util = ">=0.3.1"
[dependencies.vfio-bindings]
version = "0.2.0"
features = ["fam-wrappers"]

675
vfio_user/src/lib.rs Normal file
View File

@ -0,0 +1,675 @@
// Copyright © 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use std::ffi::CString;
use std::io::{IoSlice, Read, Write};
use std::num::Wrapping;
use std::os::unix::net::UnixStream;
use std::os::unix::prelude::RawFd;
use std::path::Path;
use thiserror::Error;
use vfio_bindings::bindings::vfio::*;
use vm_memory::{ByteValued, FileOffset};
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
#[macro_use]
extern crate serde_derive;
#[macro_use]
extern crate log;
#[allow(dead_code)]
#[repr(u16)]
#[derive(Clone, Copy, Debug)]
enum Command {
Unknown = 0,
Version = 1,
DmaMap = 2,
DmaUnmap = 3,
DeviceGetInfo = 4,
DeviceGetRegionInfo = 5,
GetRegionIoFds = 6,
GetIrqInfo = 7,
SetIrqs = 8,
RegionRead = 9,
RegionWrite = 10,
DmaRead = 11,
DmaWrite = 12,
DeviceReset = 13,
UserDirtyPages = 14,
}
impl Default for Command {
fn default() -> Self {
Command::Unknown
}
}
#[allow(dead_code)]
#[repr(u32)]
#[derive(Clone, Copy, Debug, PartialEq)]
enum HeaderFlags {
Command = 0,
Reply = 1,
NoReply = 1 << 4,
Error = 1 << 5,
}
impl Default for HeaderFlags {
fn default() -> Self {
HeaderFlags::Command
}
}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct Header {
message_id: u16,
command: Command,
message_size: u32,
flags: u32,
error: u32,
}
unsafe impl ByteValued for Header {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct Version {
header: Header,
major: u16,
minor: u16,
}
unsafe impl ByteValued for Version {}
#[derive(Serialize, Deserialize, Debug)]
struct MigrationCapabilities {
pgsize: u32,
}
const fn default_max_msg_fds() -> u32 {
1
}
const fn default_max_data_xfer_size() -> u32 {
1048576
}
const fn default_migration_capabilities() -> MigrationCapabilities {
MigrationCapabilities { pgsize: 4096 }
}
#[repr(u32)]
#[derive(Clone, Copy, Debug)]
#[allow(dead_code)]
enum DmaMapFlags {
Unknown = 0,
ReadOnly = 1,
WriteOnly = 2,
ReadWrite = 3,
}
impl Default for DmaMapFlags {
fn default() -> Self {
Self::Unknown
}
}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct DmaMap {
header: Header,
argsz: u32,
flags: DmaMapFlags,
offset: u64,
address: u64,
size: u64,
}
unsafe impl ByteValued for DmaMap {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct DmaUnmap {
header: Header,
argsz: u32,
flags: u32,
address: u64,
size: u64,
}
unsafe impl ByteValued for DmaUnmap {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct DeviceGetInfo {
header: Header,
argsz: u32,
flags: u32,
num_regions: u32,
num_irqs: u32,
}
unsafe impl ByteValued for DeviceGetInfo {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct DeviceGetRegionInfo {
header: Header,
region_info: vfio_region_info,
}
unsafe impl ByteValued for DeviceGetRegionInfo {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct RegionAccess {
header: Header,
offset: u64,
region: u32,
count: u32,
}
unsafe impl ByteValued for RegionAccess {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct GetIrqInfo {
header: Header,
argsz: u32,
flags: u32,
index: u32,
count: u32,
}
unsafe impl ByteValued for GetIrqInfo {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct SetIrqs {
header: Header,
argsz: u32,
flags: u32,
index: u32,
start: u32,
count: u32,
}
unsafe impl ByteValued for SetIrqs {}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug)]
struct DeviceReset {
header: Header,
}
unsafe impl ByteValued for DeviceReset {}
#[derive(Serialize, Deserialize, Debug)]
struct Capabilities {
#[serde(default = "default_max_msg_fds")]
max_msg_fds: u32,
#[serde(default = "default_max_data_xfer_size")]
max_data_xfer_size: u32,
#[serde(default = "default_migration_capabilities")]
migration: MigrationCapabilities,
}
impl Default for Capabilities {
fn default() -> Self {
Self {
max_msg_fds: default_max_msg_fds(),
max_data_xfer_size: default_max_data_xfer_size(),
migration: default_migration_capabilities(),
}
}
}
pub struct Client {
stream: UnixStream,
next_message_id: Wrapping<u16>,
num_irqs: u32,
resettable: bool,
regions: Vec<Region>,
}
#[derive(Debug)]
pub struct Region {
pub flags: u32,
pub index: u32,
pub size: u64,
pub file_offset: Option<FileOffset>,
}
#[derive(Debug)]
pub struct IrqInfo {
pub index: u32,
pub flags: u32,
pub count: u32,
}
#[derive(Error, Debug)]
pub enum Error {
#[error("Error connecting: {0}")]
Connect(#[source] std::io::Error),
#[error("Error serializing capabilities: {0}")]
SerializeCapabilites(#[source] serde_json::Error),
#[error("Error deserializing capabilities: {0}")]
DeserializeCapabilites(#[source] serde_json::Error),
#[error("Error writing to stream: {0}")]
StreamWrite(#[source] std::io::Error),
#[error("Error reading from stream: {0}")]
StreamRead(#[source] std::io::Error),
#[error("Error writing with file descriptors: {0}")]
SendWithFd(#[source] vmm_sys_util::errno::Error),
#[error("Error reading with file descriptors: {0}")]
ReceiveWithFd(#[source] vmm_sys_util::errno::Error),
#[error("Not a PCI device")]
NotPciDevice,
}
impl Client {
pub fn new(path: &Path) -> Result<Client, Error> {
let stream = UnixStream::connect(path).map_err(Error::Connect)?;
let mut client = Client {
next_message_id: Wrapping(0),
stream,
num_irqs: 0,
resettable: false,
regions: Vec::new(),
};
client.negotiate_version()?;
client.regions = client.get_regions()?;
Ok(client)
}
fn negotiate_version(&mut self) -> Result<(), Error> {
let caps = Capabilities::default();
let version_data = serde_json::to_string(&caps).map_err(Error::SerializeCapabilites)?;
let version = Version {
header: Header {
message_id: self.next_message_id.0,
command: Command::Version,
flags: HeaderFlags::Command as u32,
message_size: (std::mem::size_of::<Version>() + version_data.len() + 1) as u32,
..Default::default()
},
major: 0,
minor: 1,
};
info!("Command: {:?}", version);
let version_data = CString::new(version_data.as_bytes()).unwrap();
let bufs = vec![
IoSlice::new(version.as_slice()),
IoSlice::new(version_data.as_bytes_with_nul()),
];
// TODO: Use write_all_vectored() when ready
let _ = self
.stream
.write_vectored(&bufs)
.map_err(Error::StreamWrite)?;
info!(
"Sent client version information: major = {} minor = {} capabilities = {:?}",
version.major, version.minor, &caps
);
self.next_message_id += Wrapping(1);
let mut server_version: Version = Version::default();
self.stream
.read_exact(server_version.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", server_version);
let mut server_version_data = Vec::new();
server_version_data.resize(
server_version.header.message_size as usize - std::mem::size_of::<Version>(),
0,
);
self.stream
.read_exact(server_version_data.as_mut_slice())
.map_err(Error::StreamRead)?;
let server_caps: Capabilities =
serde_json::from_slice(&server_version_data[0..server_version_data.len() - 1])
.map_err(Error::DeserializeCapabilites)?;
info!(
"Received server version information: major = {} minor = {} capabilities = {:?}",
server_version.major, server_version.minor, &server_caps
);
Ok(())
}
pub fn dma_map(
&mut self,
offset: u64,
address: u64,
size: u64,
fd: RawFd,
) -> Result<(), Error> {
let dma_map = DmaMap {
header: Header {
message_id: self.next_message_id.0,
command: Command::DmaMap,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<DmaMap>() as u32,
..Default::default()
},
argsz: (std::mem::size_of::<DmaMap>() - std::mem::size_of::<Header>()) as u32,
flags: DmaMapFlags::ReadWrite,
offset,
address,
size,
};
info!("Command: {:?}", dma_map);
self.next_message_id += Wrapping(1);
self.stream
.send_with_fd(dma_map.as_slice(), fd)
.map_err(Error::SendWithFd)?;
let mut reply = Header::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(())
}
pub fn dma_unmap(&mut self, address: u64, size: u64) -> Result<(), Error> {
let dma_unmap = DmaUnmap {
header: Header {
message_id: self.next_message_id.0,
command: Command::DmaUnmap,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<DmaUnmap>() as u32,
..Default::default()
},
argsz: (std::mem::size_of::<DmaUnmap>() - std::mem::size_of::<Header>()) as u32,
flags: 0,
address,
size,
};
info!("Command: {:?}", dma_unmap);
self.next_message_id += Wrapping(1);
self.stream
.write_all(dma_unmap.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = Header::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(())
}
pub fn reset(&mut self) -> Result<(), Error> {
let reset = DeviceReset {
header: Header {
message_id: self.next_message_id.0,
command: Command::DeviceReset,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<DeviceReset>() as u32,
..Default::default()
},
};
info!("Command: {:?}", reset);
self.next_message_id += Wrapping(1);
self.stream
.write_all(reset.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = Header::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(())
}
fn get_regions(&mut self) -> Result<Vec<Region>, Error> {
let get_info = DeviceGetInfo {
header: Header {
message_id: self.next_message_id.0,
command: Command::DeviceGetInfo,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<DeviceGetInfo>() as u32,
..Default::default()
},
argsz: std::mem::size_of::<DeviceGetInfo>() as u32,
..Default::default()
};
info!("Command: {:?}", get_info);
self.next_message_id += Wrapping(1);
self.stream
.write_all(get_info.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = DeviceGetInfo::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
self.num_irqs = reply.num_irqs;
if reply.flags & VFIO_DEVICE_FLAGS_PCI != VFIO_DEVICE_FLAGS_PCI {
return Err(Error::NotPciDevice);
}
self.resettable = reply.flags & VFIO_DEVICE_FLAGS_RESET != VFIO_DEVICE_FLAGS_RESET;
let num_regions = reply.num_regions;
let mut regions = Vec::new();
for index in 0..num_regions {
let get_region_info = DeviceGetRegionInfo {
header: Header {
message_id: self.next_message_id.0,
command: Command::DeviceGetRegionInfo,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<DeviceGetRegionInfo>() as u32,
..Default::default()
},
region_info: vfio_region_info {
argsz: 1024, // Arbitrary max size
index,
..Default::default()
},
};
info!("Command: {:?}", get_region_info);
self.next_message_id += Wrapping(1);
self.stream
.write_all(get_region_info.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = DeviceGetRegionInfo::default();
info!("Reply: {:?}", reply);
let (_, fd) = self
.stream
.recv_with_fd(reply.as_mut_slice())
.map_err(Error::ReceiveWithFd)?;
regions.push(Region {
flags: reply.region_info.flags,
index: reply.region_info.index,
size: reply.region_info.size,
file_offset: fd.map(|fd| FileOffset::new(fd, reply.region_info.offset)),
});
// TODO: Handle region with capabilities
let mut _cap_data = Vec::with_capacity(
reply.header.message_size as usize - std::mem::size_of::<DeviceGetRegionInfo>(),
);
_cap_data.resize(_cap_data.capacity(), 0u8);
self.stream
.read_exact(_cap_data.as_mut_slice())
.map_err(Error::StreamRead)?;
}
Ok(regions)
}
pub fn region_read(&mut self, region: u32, offset: u64, data: &mut [u8]) -> Result<(), Error> {
let region_read = RegionAccess {
header: Header {
message_id: self.next_message_id.0,
command: Command::RegionRead,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<RegionAccess>() as u32,
..Default::default()
},
offset,
count: data.len() as u32,
region,
};
info!("Command: {:?}", region_read);
self.next_message_id += Wrapping(1);
self.stream
.write_all(region_read.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = RegionAccess::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
self.stream.read_exact(data).map_err(Error::StreamRead)?;
Ok(())
}
pub fn region_write(&mut self, region: u32, offset: u64, data: &[u8]) -> Result<(), Error> {
let region_write = RegionAccess {
header: Header {
message_id: self.next_message_id.0,
command: Command::RegionWrite,
flags: HeaderFlags::Command as u32,
message_size: (std::mem::size_of::<RegionAccess>() + data.len()) as u32,
..Default::default()
},
offset,
count: data.len() as u32,
region,
};
info!("Command: {:?}", region_write);
self.next_message_id += Wrapping(1);
let bufs = vec![IoSlice::new(region_write.as_slice()), IoSlice::new(data)];
// TODO: Use write_all_vectored() when ready
let _ = self
.stream
.write_vectored(&bufs)
.map_err(Error::StreamWrite)?;
let mut reply = RegionAccess::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(())
}
pub fn get_irq_info(&mut self, index: u32) -> Result<IrqInfo, Error> {
let get_irq_info = GetIrqInfo {
header: Header {
message_id: self.next_message_id.0,
command: Command::GetIrqInfo,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<GetIrqInfo>() as u32,
..Default::default()
},
argsz: (std::mem::size_of::<GetIrqInfo>() - std::mem::size_of::<Header>()) as u32,
flags: 0,
index,
count: 0,
};
info!("Command: {:?}", get_irq_info);
self.next_message_id += Wrapping(1);
self.stream
.write_all(get_irq_info.as_slice())
.map_err(Error::StreamWrite)?;
let mut reply = GetIrqInfo::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(IrqInfo {
index: reply.index,
flags: reply.flags,
count: reply.count,
})
}
pub fn set_irqs(
&mut self,
index: u32,
flags: u32,
start: u32,
count: u32,
fds: &[RawFd],
) -> Result<(), Error> {
let set_irqs = SetIrqs {
header: Header {
message_id: self.next_message_id.0,
command: Command::SetIrqs,
flags: HeaderFlags::Command as u32,
message_size: std::mem::size_of::<SetIrqs>() as u32,
..Default::default()
},
argsz: (std::mem::size_of::<SetIrqs>() - std::mem::size_of::<Header>()) as u32,
flags,
start,
index,
count,
};
info!("Command: {:?}", set_irqs);
self.next_message_id += Wrapping(1);
self.stream
.send_with_fds(&[set_irqs.as_slice()], fds)
.map_err(Error::SendWithFd)?;
let mut reply = Header::default();
self.stream
.read_exact(reply.as_mut_slice())
.map_err(Error::StreamRead)?;
info!("Reply: {:?}", reply);
Ok(())
}
pub fn region(&self, region_index: u32) -> Option<&Region> {
for region in &self.regions {
if region.index == region_index {
return Some(region);
}
}
None
}
pub fn resettable(&self) -> bool {
self.resettable
}
}