block: add aio disk backend

Signed-off-by: Thomas Barrett <tbarrett@crusoeenergy.com>
This commit is contained in:
Thomas Barrett 2023-10-18 19:59:22 -07:00 committed by Bo Chen
parent 6105d3ca83
commit bae13c5c56
8 changed files with 202 additions and 14 deletions

View File

@ -24,6 +24,7 @@ pub mod qcow_sync;
/// ///
/// Enabled with the `"io_uring"` feature /// Enabled with the `"io_uring"` feature
pub mod raw_async; pub mod raw_async;
pub mod raw_async_aio;
pub mod raw_sync; pub mod raw_sync;
pub mod vhd; pub mod vhd;
pub mod vhdx; pub mod vhdx;
@ -61,6 +62,7 @@ use vm_memory::{
GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryError, GuestMemoryLoadGuard,
}; };
use vm_virtio::{AccessPlatform, Translatable}; use vm_virtio::{AccessPlatform, Translatable};
use vmm_sys_util::aio;
use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::eventfd::EventFd;
use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr}; use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr};
@ -565,6 +567,11 @@ unsafe impl ByteValued for VirtioBlockConfig {}
// SAFETY: data structure only contain a series of integers // SAFETY: data structure only contain a series of integers
unsafe impl ByteValued for VirtioBlockGeometry {} unsafe impl ByteValued for VirtioBlockGeometry {}
/// Check if aio can be used on the current system.
pub fn block_aio_is_supported() -> bool {
aio::IoContext::new(1).is_ok()
}
/// Check if io_uring for block device can be used on the current system, as /// Check if io_uring for block device can be used on the current system, as
/// it correctly supports the expected io_uring features. /// it correctly supports the expected io_uring features.
pub fn block_io_uring_is_supported() -> bool { pub fn block_io_uring_is_supported() -> bool {

138
block/src/raw_async_aio.rs Normal file
View File

@ -0,0 +1,138 @@
use crate::async_io::{
AsyncIo, AsyncIoError, AsyncIoResult, DiskFile, DiskFileError, DiskFileResult,
};
use crate::DiskTopology;
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::os::unix::io::{AsRawFd, RawFd};
use vmm_sys_util::aio;
use vmm_sys_util::eventfd::EventFd;
pub struct RawFileDiskAio {
file: File,
}
impl RawFileDiskAio {
pub fn new(file: File) -> Self {
RawFileDiskAio { file }
}
}
impl DiskFile for RawFileDiskAio {
fn size(&mut self) -> DiskFileResult<u64> {
self.file
.seek(SeekFrom::End(0))
.map_err(DiskFileError::Size)
}
fn new_async_io(&self, ring_depth: u32) -> DiskFileResult<Box<dyn AsyncIo>> {
Ok(Box::new(
RawFileAsyncAio::new(self.file.as_raw_fd(), ring_depth)
.map_err(DiskFileError::NewAsyncIo)?,
) as Box<dyn AsyncIo>)
}
fn topology(&mut self) -> DiskTopology {
if let Ok(topology) = DiskTopology::probe(&self.file) {
topology
} else {
warn!("Unable to get device topology. Using default topology");
DiskTopology::default()
}
}
}
pub struct RawFileAsyncAio {
fd: RawFd,
ctx: aio::IoContext,
eventfd: EventFd,
}
impl RawFileAsyncAio {
pub fn new(fd: RawFd, queue_depth: u32) -> std::io::Result<Self> {
let eventfd = EventFd::new(libc::EFD_NONBLOCK)?;
let ctx = aio::IoContext::new(queue_depth)?;
Ok(RawFileAsyncAio { fd, ctx, eventfd })
}
}
impl AsyncIo for RawFileAsyncAio {
fn notifier(&self) -> &EventFd {
&self.eventfd
}
fn read_vectored(
&mut self,
offset: libc::off_t,
iovecs: &[libc::iovec],
user_data: u64,
) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_PREADV as u16,
aio_buf: iovecs.as_ptr() as u64,
aio_nbytes: iovecs.len() as u64,
aio_offset: offset,
aio_data: user_data,
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self
.ctx
.submit(&iocbs[..])
.map_err(AsyncIoError::ReadVectored)?;
Ok(())
}
fn write_vectored(
&mut self,
offset: libc::off_t,
iovecs: &[libc::iovec],
user_data: u64,
) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_PWRITEV as u16,
aio_buf: iovecs.as_ptr() as u64,
aio_nbytes: iovecs.len() as u64,
aio_offset: offset,
aio_data: user_data,
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self
.ctx
.submit(&iocbs[..])
.map_err(AsyncIoError::WriteVectored)?;
Ok(())
}
fn fsync(&mut self, user_data: Option<u64>) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_FSYNC as u16,
aio_data: user_data.unwrap_or(0),
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self.ctx.submit(&iocbs[..]).map_err(AsyncIoError::Fsync)?;
Ok(())
}
fn next_completed_request(&mut self) -> Option<(u64, i32)> {
let mut events: [aio::IoEvent; 1] = [aio::IoEvent::default()];
let rc = self.ctx.get_events(0, &mut events, None).unwrap();
if rc == 0 {
None
} else {
Some((events[0].data, events[0].res as i32))
}
}
}

View File

@ -3055,7 +3055,7 @@ mod common_parallel {
handle_child_output(r, &output); handle_child_output(r, &output);
} }
fn _test_virtio_block(image_name: &str, disable_io_uring: bool) { fn _test_virtio_block(image_name: &str, disable_io_uring: bool, disable_aio: bool) {
let focal = UbuntuDiskConfig::new(image_name.to_string()); let focal = UbuntuDiskConfig::new(image_name.to_string());
let guest = Guest::new(Box::new(focal)); let guest = Guest::new(Box::new(focal));
@ -3085,9 +3085,10 @@ mod common_parallel {
) )
.as_str(), .as_str(),
format!( format!(
"path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={}", "path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={},_disable_aio={}",
blk_file_path.to_str().unwrap(), blk_file_path.to_str().unwrap(),
disable_io_uring disable_io_uring,
disable_aio,
) )
.as_str(), .as_str(),
]) ])
@ -3140,23 +3141,28 @@ mod common_parallel {
} }
#[test] #[test]
fn test_virtio_block() { fn test_virtio_block_io_uring() {
_test_virtio_block(FOCAL_IMAGE_NAME, false) _test_virtio_block(FOCAL_IMAGE_NAME, false, true)
} }
#[test] #[test]
fn test_virtio_block_disable_io_uring() { fn test_virtio_block_aio() {
_test_virtio_block(FOCAL_IMAGE_NAME, true) _test_virtio_block(FOCAL_IMAGE_NAME, true, false)
}
#[test]
fn test_virtio_block_sync() {
_test_virtio_block(FOCAL_IMAGE_NAME, true, true)
} }
#[test] #[test]
fn test_virtio_block_qcow2() { fn test_virtio_block_qcow2() {
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false) _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false, false)
} }
#[test] #[test]
fn test_virtio_block_qcow2_backing_file() { fn test_virtio_block_qcow2_backing_file() {
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false) _test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false, false)
} }
#[test] #[test]
@ -3181,7 +3187,7 @@ mod common_parallel {
.output() .output()
.expect("Expect generating VHD image from RAW image"); .expect("Expect generating VHD image from RAW image");
_test_virtio_block(FOCAL_IMAGE_NAME_VHD, false) _test_virtio_block(FOCAL_IMAGE_NAME_VHD, false, false)
} }
#[test] #[test]
@ -3205,7 +3211,7 @@ mod common_parallel {
.output() .output()
.expect("Expect generating dynamic VHDx image from RAW image"); .expect("Expect generating dynamic VHDx image from RAW image");
_test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false) _test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false, false)
} }
#[test] #[test]

View File

@ -86,6 +86,9 @@ fn virtio_block_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
(libc::SYS_fsync, vec![]), (libc::SYS_fsync, vec![]),
(libc::SYS_ftruncate, vec![]), (libc::SYS_ftruncate, vec![]),
(libc::SYS_getrandom, vec![]), (libc::SYS_getrandom, vec![]),
(libc::SYS_io_destroy, vec![]),
(libc::SYS_io_getevents, vec![]),
(libc::SYS_io_submit, vec![]),
(libc::SYS_io_uring_enter, vec![]), (libc::SYS_io_uring_enter, vec![]),
(libc::SYS_lseek, vec![]), (libc::SYS_lseek, vec![]),
(libc::SYS_prctl, vec![]), (libc::SYS_prctl, vec![]),

View File

@ -888,6 +888,7 @@ impl DiskConfig {
.add("ops_refill_time") .add("ops_refill_time")
.add("id") .add("id")
.add("_disable_io_uring") .add("_disable_io_uring")
.add("_disable_aio")
.add("pci_segment") .add("pci_segment")
.add("serial"); .add("serial");
parser.parse(disk).map_err(Error::ParseDisk)?; parser.parse(disk).map_err(Error::ParseDisk)?;
@ -928,6 +929,11 @@ impl DiskConfig {
.map_err(Error::ParseDisk)? .map_err(Error::ParseDisk)?
.unwrap_or(Toggle(false)) .unwrap_or(Toggle(false))
.0; .0;
let disable_aio = parser
.convert::<Toggle>("_disable_aio")
.map_err(Error::ParseDisk)?
.unwrap_or(Toggle(false))
.0;
let pci_segment = parser let pci_segment = parser
.convert("pci_segment") .convert("pci_segment")
.map_err(Error::ParseDisk)? .map_err(Error::ParseDisk)?
@ -996,6 +1002,7 @@ impl DiskConfig {
rate_limiter_config, rate_limiter_config,
id, id,
disable_io_uring, disable_io_uring,
disable_aio,
pci_segment, pci_segment,
serial, serial,
}) })

View File

@ -35,9 +35,9 @@ use arch::NumaNodes;
#[cfg(target_arch = "aarch64")] #[cfg(target_arch = "aarch64")]
use arch::{DeviceType, MmioDeviceInfo}; use arch::{DeviceType, MmioDeviceInfo};
use block::{ use block::{
async_io::DiskFile, block_io_uring_is_supported, detect_image_type, async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync, fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
vhdx, vhdx_sync::VhdxDiskSync, ImageType, raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
}; };
#[cfg(feature = "io_uring")] #[cfg(feature = "io_uring")]
use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
@ -949,6 +949,9 @@ pub struct DeviceManager {
// io_uring availability if detected // io_uring availability if detected
io_uring_supported: Option<bool>, io_uring_supported: Option<bool>,
// aio availability if detected
aio_supported: Option<bool>,
// List of unique identifiers provided at boot through the configuration. // List of unique identifiers provided at boot through the configuration.
boot_id_list: BTreeSet<String>, boot_id_list: BTreeSet<String>,
@ -1138,6 +1141,7 @@ impl DeviceManager {
pvpanic_device: None, pvpanic_device: None,
force_iommu, force_iommu,
io_uring_supported: None, io_uring_supported: None,
aio_supported: None,
boot_id_list, boot_id_list,
timestamp, timestamp,
pending_activations: Arc::new(Mutex::new(Vec::default())), pending_activations: Arc::new(Mutex::new(Vec::default())),
@ -2171,6 +2175,17 @@ impl DeviceManager {
Ok(devices) Ok(devices)
} }
// Cache whether aio is supported to avoid checking for very block device
fn aio_is_supported(&mut self) -> bool {
if let Some(supported) = self.aio_supported {
return supported;
}
let supported = block_aio_is_supported();
self.aio_supported = Some(supported);
supported
}
// Cache whether io_uring is supported to avoid probing for very block device // Cache whether io_uring is supported to avoid probing for very block device
fn io_uring_is_supported(&mut self) -> bool { fn io_uring_is_supported(&mut self) -> bool {
if let Some(supported) = self.io_uring_supported { if let Some(supported) = self.io_uring_supported {
@ -2292,6 +2307,9 @@ impl DeviceManager {
{ {
Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
} }
} else if !disk_cfg.disable_aio && self.aio_is_supported() {
info!("Using asynchronous RAW disk file (aio)");
Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
} else { } else {
info!("Using synchronous RAW disk file"); info!("Using synchronous RAW disk file");
Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>

View File

@ -553,6 +553,11 @@ fn vmm_thread_rules(
libc::SYS_ioctl, libc::SYS_ioctl,
create_vmm_ioctl_seccomp_rule(hypervisor_type)?, create_vmm_ioctl_seccomp_rule(hypervisor_type)?,
), ),
(libc::SYS_io_cancel, vec![]),
(libc::SYS_io_destroy, vec![]),
(libc::SYS_io_getevents, vec![]),
(libc::SYS_io_setup, vec![]),
(libc::SYS_io_submit, vec![]),
(libc::SYS_io_uring_enter, vec![]), (libc::SYS_io_uring_enter, vec![]),
(libc::SYS_io_uring_setup, vec![]), (libc::SYS_io_uring_setup, vec![]),
(libc::SYS_io_uring_register, vec![]), (libc::SYS_io_uring_register, vec![]),

View File

@ -218,6 +218,9 @@ pub struct DiskConfig {
// For testing use only. Not exposed in API. // For testing use only. Not exposed in API.
#[serde(default)] #[serde(default)]
pub disable_io_uring: bool, pub disable_io_uring: bool,
// For testing use only. Not exposed in API.
#[serde(default)]
pub disable_aio: bool,
#[serde(default)] #[serde(default)]
pub pci_segment: u16, pub pci_segment: u16,
#[serde(default)] #[serde(default)]
@ -249,6 +252,7 @@ impl Default for DiskConfig {
vhost_socket: None, vhost_socket: None,
id: None, id: None,
disable_io_uring: false, disable_io_uring: false,
disable_aio: false,
rate_limiter_config: None, rate_limiter_config: None,
pci_segment: 0, pci_segment: 0,
serial: None, serial: None,