From bae13c5c56cf5979a9a4065f0ae2f095456d12ba Mon Sep 17 00:00:00 2001 From: Thomas Barrett Date: Wed, 18 Oct 2023 19:59:22 -0700 Subject: [PATCH] block: add aio disk backend Signed-off-by: Thomas Barrett --- block/src/lib.rs | 7 ++ block/src/raw_async_aio.rs | 138 ++++++++++++++++++++++++++ tests/integration.rs | 28 ++++-- virtio-devices/src/seccomp_filters.rs | 3 + vmm/src/config.rs | 7 ++ vmm/src/device_manager.rs | 24 ++++- vmm/src/seccomp_filters.rs | 5 + vmm/src/vm_config.rs | 4 + 8 files changed, 202 insertions(+), 14 deletions(-) create mode 100644 block/src/raw_async_aio.rs diff --git a/block/src/lib.rs b/block/src/lib.rs index 56a58168e..35fcb0757 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -24,6 +24,7 @@ pub mod qcow_sync; /// /// Enabled with the `"io_uring"` feature pub mod raw_async; +pub mod raw_async_aio; pub mod raw_sync; pub mod vhd; pub mod vhdx; @@ -61,6 +62,7 @@ use vm_memory::{ GuestMemoryError, GuestMemoryLoadGuard, }; use vm_virtio::{AccessPlatform, Translatable}; +use vmm_sys_util::aio; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr}; @@ -565,6 +567,11 @@ unsafe impl ByteValued for VirtioBlockConfig {} // SAFETY: data structure only contain a series of integers unsafe impl ByteValued for VirtioBlockGeometry {} +/// Check if aio can be used on the current system. +pub fn block_aio_is_supported() -> bool { + aio::IoContext::new(1).is_ok() +} + /// Check if io_uring for block device can be used on the current system, as /// it correctly supports the expected io_uring features. pub fn block_io_uring_is_supported() -> bool { diff --git a/block/src/raw_async_aio.rs b/block/src/raw_async_aio.rs new file mode 100644 index 000000000..091505fb5 --- /dev/null +++ b/block/src/raw_async_aio.rs @@ -0,0 +1,138 @@ +use crate::async_io::{ + AsyncIo, AsyncIoError, AsyncIoResult, DiskFile, DiskFileError, DiskFileResult, +}; +use crate::DiskTopology; +use std::fs::File; +use std::io::{Seek, SeekFrom}; +use std::os::unix::io::{AsRawFd, RawFd}; +use vmm_sys_util::aio; +use vmm_sys_util::eventfd::EventFd; + +pub struct RawFileDiskAio { + file: File, +} + +impl RawFileDiskAio { + pub fn new(file: File) -> Self { + RawFileDiskAio { file } + } +} + +impl DiskFile for RawFileDiskAio { + fn size(&mut self) -> DiskFileResult { + self.file + .seek(SeekFrom::End(0)) + .map_err(DiskFileError::Size) + } + + fn new_async_io(&self, ring_depth: u32) -> DiskFileResult> { + Ok(Box::new( + RawFileAsyncAio::new(self.file.as_raw_fd(), ring_depth) + .map_err(DiskFileError::NewAsyncIo)?, + ) as Box) + } + + fn topology(&mut self) -> DiskTopology { + if let Ok(topology) = DiskTopology::probe(&self.file) { + topology + } else { + warn!("Unable to get device topology. Using default topology"); + DiskTopology::default() + } + } +} + +pub struct RawFileAsyncAio { + fd: RawFd, + ctx: aio::IoContext, + eventfd: EventFd, +} + +impl RawFileAsyncAio { + pub fn new(fd: RawFd, queue_depth: u32) -> std::io::Result { + let eventfd = EventFd::new(libc::EFD_NONBLOCK)?; + let ctx = aio::IoContext::new(queue_depth)?; + + Ok(RawFileAsyncAio { fd, ctx, eventfd }) + } +} + +impl AsyncIo for RawFileAsyncAio { + fn notifier(&self) -> &EventFd { + &self.eventfd + } + + fn read_vectored( + &mut self, + offset: libc::off_t, + iovecs: &[libc::iovec], + user_data: u64, + ) -> AsyncIoResult<()> { + let iocbs = [&mut aio::IoControlBlock { + aio_fildes: self.fd.as_raw_fd() as u32, + aio_lio_opcode: aio::IOCB_CMD_PREADV as u16, + aio_buf: iovecs.as_ptr() as u64, + aio_nbytes: iovecs.len() as u64, + aio_offset: offset, + aio_data: user_data, + aio_flags: aio::IOCB_FLAG_RESFD, + aio_resfd: self.eventfd.as_raw_fd() as u32, + ..Default::default() + }]; + let _ = self + .ctx + .submit(&iocbs[..]) + .map_err(AsyncIoError::ReadVectored)?; + + Ok(()) + } + + fn write_vectored( + &mut self, + offset: libc::off_t, + iovecs: &[libc::iovec], + user_data: u64, + ) -> AsyncIoResult<()> { + let iocbs = [&mut aio::IoControlBlock { + aio_fildes: self.fd.as_raw_fd() as u32, + aio_lio_opcode: aio::IOCB_CMD_PWRITEV as u16, + aio_buf: iovecs.as_ptr() as u64, + aio_nbytes: iovecs.len() as u64, + aio_offset: offset, + aio_data: user_data, + aio_flags: aio::IOCB_FLAG_RESFD, + aio_resfd: self.eventfd.as_raw_fd() as u32, + ..Default::default() + }]; + let _ = self + .ctx + .submit(&iocbs[..]) + .map_err(AsyncIoError::WriteVectored)?; + + Ok(()) + } + + fn fsync(&mut self, user_data: Option) -> AsyncIoResult<()> { + let iocbs = [&mut aio::IoControlBlock { + aio_fildes: self.fd.as_raw_fd() as u32, + aio_lio_opcode: aio::IOCB_CMD_FSYNC as u16, + aio_data: user_data.unwrap_or(0), + aio_flags: aio::IOCB_FLAG_RESFD, + aio_resfd: self.eventfd.as_raw_fd() as u32, + ..Default::default() + }]; + let _ = self.ctx.submit(&iocbs[..]).map_err(AsyncIoError::Fsync)?; + + Ok(()) + } + + fn next_completed_request(&mut self) -> Option<(u64, i32)> { + let mut events: [aio::IoEvent; 1] = [aio::IoEvent::default()]; + let rc = self.ctx.get_events(0, &mut events, None).unwrap(); + if rc == 0 { + None + } else { + Some((events[0].data, events[0].res as i32)) + } + } +} diff --git a/tests/integration.rs b/tests/integration.rs index 777ead03f..56efba8b7 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3055,7 +3055,7 @@ mod common_parallel { handle_child_output(r, &output); } - fn _test_virtio_block(image_name: &str, disable_io_uring: bool) { + fn _test_virtio_block(image_name: &str, disable_io_uring: bool, disable_aio: bool) { let focal = UbuntuDiskConfig::new(image_name.to_string()); let guest = Guest::new(Box::new(focal)); @@ -3085,9 +3085,10 @@ mod common_parallel { ) .as_str(), format!( - "path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={}", + "path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={},_disable_aio={}", blk_file_path.to_str().unwrap(), - disable_io_uring + disable_io_uring, + disable_aio, ) .as_str(), ]) @@ -3140,23 +3141,28 @@ mod common_parallel { } #[test] - fn test_virtio_block() { - _test_virtio_block(FOCAL_IMAGE_NAME, false) + fn test_virtio_block_io_uring() { + _test_virtio_block(FOCAL_IMAGE_NAME, false, true) } #[test] - fn test_virtio_block_disable_io_uring() { - _test_virtio_block(FOCAL_IMAGE_NAME, true) + fn test_virtio_block_aio() { + _test_virtio_block(FOCAL_IMAGE_NAME, true, false) + } + + #[test] + fn test_virtio_block_sync() { + _test_virtio_block(FOCAL_IMAGE_NAME, true, true) } #[test] fn test_virtio_block_qcow2() { - _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false) + _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false, false) } #[test] fn test_virtio_block_qcow2_backing_file() { - _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false) + _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false, false) } #[test] @@ -3181,7 +3187,7 @@ mod common_parallel { .output() .expect("Expect generating VHD image from RAW image"); - _test_virtio_block(FOCAL_IMAGE_NAME_VHD, false) + _test_virtio_block(FOCAL_IMAGE_NAME_VHD, false, false) } #[test] @@ -3205,7 +3211,7 @@ mod common_parallel { .output() .expect("Expect generating dynamic VHDx image from RAW image"); - _test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false) + _test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false, false) } #[test] diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index 43d723b8b..31a4edc0f 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -86,6 +86,9 @@ fn virtio_block_thread_rules() -> Vec<(i64, Vec)> { (libc::SYS_fsync, vec![]), (libc::SYS_ftruncate, vec![]), (libc::SYS_getrandom, vec![]), + (libc::SYS_io_destroy, vec![]), + (libc::SYS_io_getevents, vec![]), + (libc::SYS_io_submit, vec![]), (libc::SYS_io_uring_enter, vec![]), (libc::SYS_lseek, vec![]), (libc::SYS_prctl, vec![]), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index b88ee6f33..3bac721ac 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -888,6 +888,7 @@ impl DiskConfig { .add("ops_refill_time") .add("id") .add("_disable_io_uring") + .add("_disable_aio") .add("pci_segment") .add("serial"); parser.parse(disk).map_err(Error::ParseDisk)?; @@ -928,6 +929,11 @@ impl DiskConfig { .map_err(Error::ParseDisk)? .unwrap_or(Toggle(false)) .0; + let disable_aio = parser + .convert::("_disable_aio") + .map_err(Error::ParseDisk)? + .unwrap_or(Toggle(false)) + .0; let pci_segment = parser .convert("pci_segment") .map_err(Error::ParseDisk)? @@ -996,6 +1002,7 @@ impl DiskConfig { rate_limiter_config, id, disable_io_uring, + disable_aio, pci_segment, serial, }) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 17c227131..24900e787 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -35,9 +35,9 @@ use arch::NumaNodes; #[cfg(target_arch = "aarch64")] use arch::{DeviceType, MmioDeviceInfo}; use block::{ - async_io::DiskFile, block_io_uring_is_supported, detect_image_type, - fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync, - vhdx, vhdx_sync::VhdxDiskSync, ImageType, + async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, + fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, + raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, }; #[cfg(feature = "io_uring")] use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; @@ -949,6 +949,9 @@ pub struct DeviceManager { // io_uring availability if detected io_uring_supported: Option, + // aio availability if detected + aio_supported: Option, + // List of unique identifiers provided at boot through the configuration. boot_id_list: BTreeSet, @@ -1138,6 +1141,7 @@ impl DeviceManager { pvpanic_device: None, force_iommu, io_uring_supported: None, + aio_supported: None, boot_id_list, timestamp, pending_activations: Arc::new(Mutex::new(Vec::default())), @@ -2171,6 +2175,17 @@ impl DeviceManager { Ok(devices) } + // Cache whether aio is supported to avoid checking for very block device + fn aio_is_supported(&mut self) -> bool { + if let Some(supported) = self.aio_supported { + return supported; + } + + let supported = block_aio_is_supported(); + self.aio_supported = Some(supported); + supported + } + // Cache whether io_uring is supported to avoid probing for very block device fn io_uring_is_supported(&mut self) -> bool { if let Some(supported) = self.io_uring_supported { @@ -2292,6 +2307,9 @@ impl DeviceManager { { Box::new(RawFileDisk::new(file)) as Box } + } else if !disk_cfg.disable_aio && self.aio_is_supported() { + info!("Using asynchronous RAW disk file (aio)"); + Box::new(RawFileDiskAio::new(file)) as Box } else { info!("Using synchronous RAW disk file"); Box::new(RawFileDiskSync::new(file)) as Box diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 29d2df60f..83f73a278 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -553,6 +553,11 @@ fn vmm_thread_rules( libc::SYS_ioctl, create_vmm_ioctl_seccomp_rule(hypervisor_type)?, ), + (libc::SYS_io_cancel, vec![]), + (libc::SYS_io_destroy, vec![]), + (libc::SYS_io_getevents, vec![]), + (libc::SYS_io_setup, vec![]), + (libc::SYS_io_submit, vec![]), (libc::SYS_io_uring_enter, vec![]), (libc::SYS_io_uring_setup, vec![]), (libc::SYS_io_uring_register, vec![]), diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 6d73795c3..dc028d890 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -218,6 +218,9 @@ pub struct DiskConfig { // For testing use only. Not exposed in API. #[serde(default)] pub disable_io_uring: bool, + // For testing use only. Not exposed in API. + #[serde(default)] + pub disable_aio: bool, #[serde(default)] pub pci_segment: u16, #[serde(default)] @@ -249,6 +252,7 @@ impl Default for DiskConfig { vhost_socket: None, id: None, disable_io_uring: false, + disable_aio: false, rate_limiter_config: None, pci_segment: 0, serial: None,