vmm: Rely on virtio-blk io_uring when possible

In case the host supports io_uring and the specific io_uring options
needed, the VMM will choose the asynchronous version of virtio-blk.
This will enable better I/O performances compared to the default
synchronous version.

This is also important to note the VMM won't be able to use the
asynchronous version if the backend image is in QCOW format.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2020-07-30 12:44:05 +02:00 committed by Rob Bradford
parent 64283726e7
commit 917027c55b
4 changed files with 88 additions and 52 deletions

1
Cargo.lock generated
View File

@ -1613,6 +1613,7 @@ dependencies = [
"anyhow",
"arc-swap",
"arch",
"block_util",
"clap",
"credibility",
"devices",

View File

@ -19,6 +19,7 @@ clap = "2.33.1"
acpi_tables = { path = "../acpi_tables", optional = true }
anyhow = "1.0"
arch = { path = "../arch" }
block_util = { path = "../block_util" }
devices = { path = "../devices" }
epoll = ">=4.0.1"
hypervisor = { path = "../hypervisor" }

View File

@ -32,6 +32,7 @@ use arch::layout;
use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
#[cfg(target_arch = "aarch64")]
use arch::DeviceType;
use block_util::block_io_uring_is_supported;
#[cfg(target_arch = "aarch64")]
use devices::gic;
#[cfg(target_arch = "x86_64")]
@ -1662,70 +1663,95 @@ impl DeviceManager {
)
.map_err(DeviceManagerError::Disk)?;
let mut raw_img = qcow::RawFile::new(image, disk_cfg.direct);
let mut raw_img = qcow::RawFile::new(image.try_clone().unwrap(), disk_cfg.direct);
let image_type = qcow::detect_image_type(&mut raw_img)
.map_err(DeviceManagerError::DetectImageType)?;
match image_type {
let (virtio_device, migratable_device) = match image_type {
ImageType::Raw => {
let dev = virtio_devices::Block::new(
id.clone(),
raw_img,
disk_cfg
.path
.as_ref()
.ok_or(DeviceManagerError::NoDiskPath)?
.clone(),
disk_cfg.readonly,
disk_cfg.iommu,
disk_cfg.num_queues,
disk_cfg.queue_size,
)
.map_err(DeviceManagerError::CreateVirtioBlock)?;
// Use asynchronous backend relying on io_uring if the
// syscalls are supported.
if block_io_uring_is_supported() {
let dev = Arc::new(Mutex::new(
virtio_devices::BlockIoUring::new(
id.clone(),
image,
disk_cfg
.path
.as_ref()
.ok_or(DeviceManagerError::NoDiskPath)?
.clone(),
disk_cfg.readonly,
disk_cfg.iommu,
disk_cfg.num_queues,
disk_cfg.queue_size,
)
.map_err(DeviceManagerError::CreateVirtioBlock)?,
));
let block = Arc::new(Mutex::new(dev));
(
Arc::clone(&dev) as VirtioDeviceArc,
dev as Arc<Mutex<dyn Migratable>>,
)
} else {
let dev = Arc::new(Mutex::new(
virtio_devices::Block::new(
id.clone(),
raw_img,
disk_cfg
.path
.as_ref()
.ok_or(DeviceManagerError::NoDiskPath)?
.clone(),
disk_cfg.readonly,
disk_cfg.iommu,
disk_cfg.num_queues,
disk_cfg.queue_size,
)
.map_err(DeviceManagerError::CreateVirtioBlock)?,
));
// Fill the device tree with a new node. In case of restore, we
// know there is nothing to do, so we can simply override the
// existing entry.
self.device_tree
.lock()
.unwrap()
.insert(id.clone(), device_node!(id, block));
Ok((Arc::clone(&block) as VirtioDeviceArc, disk_cfg.iommu, id))
(
Arc::clone(&dev) as VirtioDeviceArc,
dev as Arc<Mutex<dyn Migratable>>,
)
}
}
ImageType::Qcow2 => {
let qcow_img =
QcowFile::from(raw_img).map_err(DeviceManagerError::QcowDeviceCreate)?;
let dev = virtio_devices::Block::new(
id.clone(),
qcow_img,
disk_cfg
.path
.as_ref()
.ok_or(DeviceManagerError::NoDiskPath)?
.clone(),
disk_cfg.readonly,
disk_cfg.iommu,
disk_cfg.num_queues,
disk_cfg.queue_size,
let dev = Arc::new(Mutex::new(
virtio_devices::Block::new(
id.clone(),
qcow_img,
disk_cfg
.path
.as_ref()
.ok_or(DeviceManagerError::NoDiskPath)?
.clone(),
disk_cfg.readonly,
disk_cfg.iommu,
disk_cfg.num_queues,
disk_cfg.queue_size,
)
.map_err(DeviceManagerError::CreateVirtioBlock)?,
));
(
Arc::clone(&dev) as VirtioDeviceArc,
dev as Arc<Mutex<dyn Migratable>>,
)
.map_err(DeviceManagerError::CreateVirtioBlock)?;
let block = Arc::new(Mutex::new(dev));
// Fill the device tree with a new node. In case of restore, we
// know there is nothing to do, so we can simply override the
// existing entry.
self.device_tree
.lock()
.unwrap()
.insert(id.clone(), device_node!(id, block));
Ok((Arc::clone(&block) as VirtioDeviceArc, disk_cfg.iommu, id))
}
}
};
// Fill the device tree with a new node. In case of restore, we
// know there is nothing to do, so we can simply override the
// existing entry.
self.device_tree
.lock()
.unwrap()
.insert(id.clone(), device_node!(id, migratable_device));
Ok((virtio_device, disk_cfg.iommu, id))
}
}

View File

@ -36,6 +36,11 @@ macro_rules! or {
($($x:expr),*) => (vec![$($x),*])
}
// Define io_uring syscalls as they are not yet part of libc.
const SYS_IO_URING_SETUP: i64 = 425;
const SYS_IO_URING_ENTER: i64 = 426;
const SYS_IO_URING_REGISTER: i64 = 427;
// See include/uapi/asm-generic/ioctls.h in the kernel code.
const TCGETS: u64 = 0x5401;
const TCSETS: u64 = 0x5402;
@ -279,6 +284,9 @@ pub fn vmm_thread_filter() -> Result<SeccompFilter, Error> {
allow_syscall(libc::SYS_gettimeofday),
allow_syscall(libc::SYS_getuid),
allow_syscall_if(libc::SYS_ioctl, create_vmm_ioctl_seccomp_rule()?),
allow_syscall(SYS_IO_URING_ENTER),
allow_syscall(SYS_IO_URING_SETUP),
allow_syscall(SYS_IO_URING_REGISTER),
allow_syscall(libc::SYS_listen),
allow_syscall(libc::SYS_lseek),
allow_syscall(libc::SYS_madvise),