block: add aio disk backend

Signed-off-by: Thomas Barrett <tbarrett@crusoeenergy.com>
This commit is contained in:
Thomas Barrett 2023-10-18 19:59:22 -07:00 committed by Bo Chen
parent 6105d3ca83
commit bae13c5c56
8 changed files with 202 additions and 14 deletions

View File

@ -24,6 +24,7 @@ pub mod qcow_sync;
///
/// Enabled with the `"io_uring"` feature
pub mod raw_async;
pub mod raw_async_aio;
pub mod raw_sync;
pub mod vhd;
pub mod vhdx;
@ -61,6 +62,7 @@ use vm_memory::{
GuestMemoryError, GuestMemoryLoadGuard,
};
use vm_virtio::{AccessPlatform, Translatable};
use vmm_sys_util::aio;
use vmm_sys_util::eventfd::EventFd;
use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr};
@ -565,6 +567,11 @@ unsafe impl ByteValued for VirtioBlockConfig {}
// SAFETY: data structure only contain a series of integers
unsafe impl ByteValued for VirtioBlockGeometry {}
/// Check if aio can be used on the current system.
pub fn block_aio_is_supported() -> bool {
aio::IoContext::new(1).is_ok()
}
/// Check if io_uring for block device can be used on the current system, as
/// it correctly supports the expected io_uring features.
pub fn block_io_uring_is_supported() -> bool {

138
block/src/raw_async_aio.rs Normal file
View File

@ -0,0 +1,138 @@
use crate::async_io::{
AsyncIo, AsyncIoError, AsyncIoResult, DiskFile, DiskFileError, DiskFileResult,
};
use crate::DiskTopology;
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::os::unix::io::{AsRawFd, RawFd};
use vmm_sys_util::aio;
use vmm_sys_util::eventfd::EventFd;
pub struct RawFileDiskAio {
file: File,
}
impl RawFileDiskAio {
pub fn new(file: File) -> Self {
RawFileDiskAio { file }
}
}
impl DiskFile for RawFileDiskAio {
fn size(&mut self) -> DiskFileResult<u64> {
self.file
.seek(SeekFrom::End(0))
.map_err(DiskFileError::Size)
}
fn new_async_io(&self, ring_depth: u32) -> DiskFileResult<Box<dyn AsyncIo>> {
Ok(Box::new(
RawFileAsyncAio::new(self.file.as_raw_fd(), ring_depth)
.map_err(DiskFileError::NewAsyncIo)?,
) as Box<dyn AsyncIo>)
}
fn topology(&mut self) -> DiskTopology {
if let Ok(topology) = DiskTopology::probe(&self.file) {
topology
} else {
warn!("Unable to get device topology. Using default topology");
DiskTopology::default()
}
}
}
pub struct RawFileAsyncAio {
fd: RawFd,
ctx: aio::IoContext,
eventfd: EventFd,
}
impl RawFileAsyncAio {
pub fn new(fd: RawFd, queue_depth: u32) -> std::io::Result<Self> {
let eventfd = EventFd::new(libc::EFD_NONBLOCK)?;
let ctx = aio::IoContext::new(queue_depth)?;
Ok(RawFileAsyncAio { fd, ctx, eventfd })
}
}
impl AsyncIo for RawFileAsyncAio {
fn notifier(&self) -> &EventFd {
&self.eventfd
}
fn read_vectored(
&mut self,
offset: libc::off_t,
iovecs: &[libc::iovec],
user_data: u64,
) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_PREADV as u16,
aio_buf: iovecs.as_ptr() as u64,
aio_nbytes: iovecs.len() as u64,
aio_offset: offset,
aio_data: user_data,
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self
.ctx
.submit(&iocbs[..])
.map_err(AsyncIoError::ReadVectored)?;
Ok(())
}
fn write_vectored(
&mut self,
offset: libc::off_t,
iovecs: &[libc::iovec],
user_data: u64,
) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_PWRITEV as u16,
aio_buf: iovecs.as_ptr() as u64,
aio_nbytes: iovecs.len() as u64,
aio_offset: offset,
aio_data: user_data,
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self
.ctx
.submit(&iocbs[..])
.map_err(AsyncIoError::WriteVectored)?;
Ok(())
}
fn fsync(&mut self, user_data: Option<u64>) -> AsyncIoResult<()> {
let iocbs = [&mut aio::IoControlBlock {
aio_fildes: self.fd.as_raw_fd() as u32,
aio_lio_opcode: aio::IOCB_CMD_FSYNC as u16,
aio_data: user_data.unwrap_or(0),
aio_flags: aio::IOCB_FLAG_RESFD,
aio_resfd: self.eventfd.as_raw_fd() as u32,
..Default::default()
}];
let _ = self.ctx.submit(&iocbs[..]).map_err(AsyncIoError::Fsync)?;
Ok(())
}
fn next_completed_request(&mut self) -> Option<(u64, i32)> {
let mut events: [aio::IoEvent; 1] = [aio::IoEvent::default()];
let rc = self.ctx.get_events(0, &mut events, None).unwrap();
if rc == 0 {
None
} else {
Some((events[0].data, events[0].res as i32))
}
}
}

View File

@ -3055,7 +3055,7 @@ mod common_parallel {
handle_child_output(r, &output);
}
fn _test_virtio_block(image_name: &str, disable_io_uring: bool) {
fn _test_virtio_block(image_name: &str, disable_io_uring: bool, disable_aio: bool) {
let focal = UbuntuDiskConfig::new(image_name.to_string());
let guest = Guest::new(Box::new(focal));
@ -3085,9 +3085,10 @@ mod common_parallel {
)
.as_str(),
format!(
"path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={}",
"path={},readonly=on,direct=on,num_queues=4,_disable_io_uring={},_disable_aio={}",
blk_file_path.to_str().unwrap(),
disable_io_uring
disable_io_uring,
disable_aio,
)
.as_str(),
])
@ -3140,23 +3141,28 @@ mod common_parallel {
}
#[test]
fn test_virtio_block() {
_test_virtio_block(FOCAL_IMAGE_NAME, false)
fn test_virtio_block_io_uring() {
_test_virtio_block(FOCAL_IMAGE_NAME, false, true)
}
#[test]
fn test_virtio_block_disable_io_uring() {
_test_virtio_block(FOCAL_IMAGE_NAME, true)
fn test_virtio_block_aio() {
_test_virtio_block(FOCAL_IMAGE_NAME, true, false)
}
#[test]
fn test_virtio_block_sync() {
_test_virtio_block(FOCAL_IMAGE_NAME, true, true)
}
#[test]
fn test_virtio_block_qcow2() {
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false)
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2, false, false)
}
#[test]
fn test_virtio_block_qcow2_backing_file() {
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false)
_test_virtio_block(FOCAL_IMAGE_NAME_QCOW2_BACKING_FILE, false, false)
}
#[test]
@ -3181,7 +3187,7 @@ mod common_parallel {
.output()
.expect("Expect generating VHD image from RAW image");
_test_virtio_block(FOCAL_IMAGE_NAME_VHD, false)
_test_virtio_block(FOCAL_IMAGE_NAME_VHD, false, false)
}
#[test]
@ -3205,7 +3211,7 @@ mod common_parallel {
.output()
.expect("Expect generating dynamic VHDx image from RAW image");
_test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false)
_test_virtio_block(FOCAL_IMAGE_NAME_VHDX, false, false)
}
#[test]

View File

@ -86,6 +86,9 @@ fn virtio_block_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
(libc::SYS_fsync, vec![]),
(libc::SYS_ftruncate, vec![]),
(libc::SYS_getrandom, vec![]),
(libc::SYS_io_destroy, vec![]),
(libc::SYS_io_getevents, vec![]),
(libc::SYS_io_submit, vec![]),
(libc::SYS_io_uring_enter, vec![]),
(libc::SYS_lseek, vec![]),
(libc::SYS_prctl, vec![]),

View File

@ -888,6 +888,7 @@ impl DiskConfig {
.add("ops_refill_time")
.add("id")
.add("_disable_io_uring")
.add("_disable_aio")
.add("pci_segment")
.add("serial");
parser.parse(disk).map_err(Error::ParseDisk)?;
@ -928,6 +929,11 @@ impl DiskConfig {
.map_err(Error::ParseDisk)?
.unwrap_or(Toggle(false))
.0;
let disable_aio = parser
.convert::<Toggle>("_disable_aio")
.map_err(Error::ParseDisk)?
.unwrap_or(Toggle(false))
.0;
let pci_segment = parser
.convert("pci_segment")
.map_err(Error::ParseDisk)?
@ -996,6 +1002,7 @@ impl DiskConfig {
rate_limiter_config,
id,
disable_io_uring,
disable_aio,
pci_segment,
serial,
})

View File

@ -35,9 +35,9 @@ use arch::NumaNodes;
#[cfg(target_arch = "aarch64")]
use arch::{DeviceType, MmioDeviceInfo};
use block::{
async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync,
vhdx, vhdx_sync::VhdxDiskSync, ImageType,
async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
};
#[cfg(feature = "io_uring")]
use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
@ -949,6 +949,9 @@ pub struct DeviceManager {
// io_uring availability if detected
io_uring_supported: Option<bool>,
// aio availability if detected
aio_supported: Option<bool>,
// List of unique identifiers provided at boot through the configuration.
boot_id_list: BTreeSet<String>,
@ -1138,6 +1141,7 @@ impl DeviceManager {
pvpanic_device: None,
force_iommu,
io_uring_supported: None,
aio_supported: None,
boot_id_list,
timestamp,
pending_activations: Arc::new(Mutex::new(Vec::default())),
@ -2171,6 +2175,17 @@ impl DeviceManager {
Ok(devices)
}
// Cache whether aio is supported to avoid checking for very block device
fn aio_is_supported(&mut self) -> bool {
if let Some(supported) = self.aio_supported {
return supported;
}
let supported = block_aio_is_supported();
self.aio_supported = Some(supported);
supported
}
// Cache whether io_uring is supported to avoid probing for very block device
fn io_uring_is_supported(&mut self) -> bool {
if let Some(supported) = self.io_uring_supported {
@ -2292,6 +2307,9 @@ impl DeviceManager {
{
Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
}
} else if !disk_cfg.disable_aio && self.aio_is_supported() {
info!("Using asynchronous RAW disk file (aio)");
Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
} else {
info!("Using synchronous RAW disk file");
Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>

View File

@ -553,6 +553,11 @@ fn vmm_thread_rules(
libc::SYS_ioctl,
create_vmm_ioctl_seccomp_rule(hypervisor_type)?,
),
(libc::SYS_io_cancel, vec![]),
(libc::SYS_io_destroy, vec![]),
(libc::SYS_io_getevents, vec![]),
(libc::SYS_io_setup, vec![]),
(libc::SYS_io_submit, vec![]),
(libc::SYS_io_uring_enter, vec![]),
(libc::SYS_io_uring_setup, vec![]),
(libc::SYS_io_uring_register, vec![]),

View File

@ -218,6 +218,9 @@ pub struct DiskConfig {
// For testing use only. Not exposed in API.
#[serde(default)]
pub disable_io_uring: bool,
// For testing use only. Not exposed in API.
#[serde(default)]
pub disable_aio: bool,
#[serde(default)]
pub pci_segment: u16,
#[serde(default)]
@ -249,6 +252,7 @@ impl Default for DiskConfig {
vhost_socket: None,
id: None,
disable_io_uring: false,
disable_aio: false,
rate_limiter_config: None,
pci_segment: 0,
serial: None,