diff --git a/block_util/src/fixed_vhd_async.rs b/block_util/src/fixed_vhd_async.rs new file mode 100644 index 000000000..16aba4fe6 --- /dev/null +++ b/block_util/src/fixed_vhd_async.rs @@ -0,0 +1,110 @@ +// Copyright © 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::async_io::{ + AsyncIo, AsyncIoError, AsyncIoResult, DiskFile, DiskFileError, DiskFileResult, +}; +use crate::raw_async::RawFileAsync; +use crate::vhd::VhdFooter; +use std::fs::File; +use std::os::unix::io::{AsRawFd, RawFd}; +use vmm_sys_util::eventfd::EventFd; + +pub struct FixedVhdDiskAsync { + file: File, + size: u64, +} + +impl FixedVhdDiskAsync { + pub fn new(mut file: File) -> std::io::Result { + let footer = VhdFooter::new(&mut file)?; + + Ok(FixedVhdDiskAsync { + file, + size: footer.current_size(), + }) + } +} + +impl DiskFile for FixedVhdDiskAsync { + fn size(&mut self) -> DiskFileResult { + Ok(self.size) + } + + fn new_async_io(&self, ring_depth: u32) -> DiskFileResult> { + Ok(Box::new( + FixedVhdAsync::new(self.file.as_raw_fd(), ring_depth, self.size) + .map_err(DiskFileError::NewAsyncIo)?, + ) as Box) + } +} + +pub struct FixedVhdAsync { + raw_file_async: RawFileAsync, + size: u64, +} + +impl FixedVhdAsync { + pub fn new(fd: RawFd, ring_depth: u32, size: u64) -> std::io::Result { + let raw_file_async = RawFileAsync::new(fd, ring_depth)?; + + Ok(FixedVhdAsync { + raw_file_async, + size, + }) + } +} + +impl AsyncIo for FixedVhdAsync { + fn notifier(&self) -> &EventFd { + self.raw_file_async.notifier() + } + + fn read_vectored( + &mut self, + offset: libc::off_t, + iovecs: Vec, + user_data: u64, + ) -> AsyncIoResult<()> { + if offset as u64 >= self.size { + return Err(AsyncIoError::ReadVectored(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Invalid offset {}, can't be larger than file size {}", + offset, self.size + ), + ))); + } + + self.raw_file_async.read_vectored(offset, iovecs, user_data) + } + + fn write_vectored( + &mut self, + offset: libc::off_t, + iovecs: Vec, + user_data: u64, + ) -> AsyncIoResult<()> { + if offset as u64 >= self.size { + return Err(AsyncIoError::WriteVectored(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Invalid offset {}, can't be larger than file size {}", + offset, self.size + ), + ))); + } + + self.raw_file_async + .write_vectored(offset, iovecs, user_data) + } + + fn fsync(&mut self, user_data: Option) -> AsyncIoResult<()> { + self.raw_file_async.fsync(user_data) + } + + fn complete(&mut self) -> Vec<(u64, i32)> { + self.raw_file_async.complete() + } +} diff --git a/block_util/src/lib.rs b/block_util/src/lib.rs index b2954453c..087edfa7d 100644 --- a/block_util/src/lib.rs +++ b/block_util/src/lib.rs @@ -14,15 +14,19 @@ extern crate log; extern crate serde_derive; pub mod async_io; +pub mod fixed_vhd_async; pub mod qcow_sync; pub mod raw_async; pub mod raw_sync; +pub mod vhd; use crate::async_io::{AsyncIo, AsyncIoError, AsyncIoResult, DiskFileError, DiskFileResult}; #[cfg(feature = "io_uring")] use io_uring::{opcode, IoUring, Probe}; use serde::ser::{Serialize, SerializeStruct, Serializer}; use std::cmp; +use std::convert::TryInto; +use std::fs::File; use std::io::{self, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write}; use std::os::linux::fs::MetadataExt; #[cfg(feature = "io_uring")] @@ -652,3 +656,35 @@ pub fn fsync_sync( Ok(()) } + +pub enum ImageType { + FixedVhd, + Qcow2, + Raw, +} + +const QCOW_MAGIC: u32 = 0x5146_49fb; + +/// Determine image type through file parsing. +pub fn detect_image_type(f: &mut File) -> std::io::Result { + // We must create a buffer aligned on 512 bytes with a size being a + // multiple of 512 bytes as the file might be opened with O_DIRECT flag. + #[repr(align(512))] + struct Sector { + data: [u8; 512], + } + let mut s = Sector { data: [0; 512] }; + + f.read_exact(&mut s.data)?; + + // Check 4 first bytes to get the header value and determine the image type + let image_type = if u32::from_be_bytes(s.data[0..4].try_into().unwrap()) == QCOW_MAGIC { + ImageType::Qcow2 + } else if vhd::is_fixed_vhd(f)? { + ImageType::FixedVhd + } else { + ImageType::Raw + }; + + Ok(image_type) +} diff --git a/block_util/src/vhd.rs b/block_util/src/vhd.rs new file mode 100644 index 000000000..8a047e7c9 --- /dev/null +++ b/block_util/src/vhd.rs @@ -0,0 +1,120 @@ +// Copyright © 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryInto; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom}; + +#[derive(Clone, Copy)] +pub struct VhdFooter { + cookie: u64, + features: u32, + file_format_version: u32, + data_offset: u64, + time_stamp: u32, + creator_application: u32, + creator_version: u32, + creator_host_os: u32, + original_size: u64, + current_size: u64, + disk_geometry: u32, + disk_type: u32, + checksum: u32, + unique_id: u128, + saved_state: u8, +} + +impl VhdFooter { + pub fn new(file: &mut File) -> std::io::Result { + // We must create a buffer aligned on 512 bytes with a size being a + // multiple of 512 bytes as the file might be opened with O_DIRECT flag. + #[repr(align(512))] + struct Sector { + data: [u8; 512], + } + let mut s = Sector { data: [0; 512] }; + + // Place the cursor 512 bytes before the end of the file, as this is + // where the footer starts. + file.seek(SeekFrom::End(-512))?; + + // Fill in the VhdFooter structure + file.read_exact(&mut s.data)?; + + Ok(VhdFooter { + cookie: u64::from_be_bytes(s.data[0..8].try_into().unwrap()), + features: u32::from_be_bytes(s.data[8..12].try_into().unwrap()), + file_format_version: u32::from_be_bytes(s.data[12..16].try_into().unwrap()), + data_offset: u64::from_be_bytes(s.data[16..24].try_into().unwrap()), + time_stamp: u32::from_be_bytes(s.data[24..28].try_into().unwrap()), + creator_application: u32::from_be_bytes(s.data[28..32].try_into().unwrap()), + creator_version: u32::from_be_bytes(s.data[32..36].try_into().unwrap()), + creator_host_os: u32::from_be_bytes(s.data[36..40].try_into().unwrap()), + original_size: u64::from_be_bytes(s.data[40..48].try_into().unwrap()), + current_size: u64::from_be_bytes(s.data[48..56].try_into().unwrap()), + disk_geometry: u32::from_be_bytes(s.data[56..60].try_into().unwrap()), + disk_type: u32::from_be_bytes(s.data[60..64].try_into().unwrap()), + checksum: u32::from_be_bytes(s.data[64..68].try_into().unwrap()), + unique_id: u128::from_be_bytes(s.data[68..84].try_into().unwrap()), + saved_state: u8::from_be_bytes(s.data[84..85].try_into().unwrap()), + }) + } + + pub fn cookie(&self) -> u64 { + self.cookie + } + pub fn features(&self) -> u32 { + self.features + } + pub fn file_format_version(&self) -> u32 { + self.file_format_version + } + pub fn data_offset(&self) -> u64 { + self.data_offset + } + pub fn time_stamp(&self) -> u32 { + self.time_stamp + } + pub fn creator_application(&self) -> u32 { + self.creator_application + } + pub fn creator_version(&self) -> u32 { + self.creator_version + } + pub fn creator_host_os(&self) -> u32 { + self.creator_host_os + } + pub fn original_size(&self) -> u64 { + self.original_size + } + pub fn current_size(&self) -> u64 { + self.current_size + } + pub fn disk_geometry(&self) -> u32 { + self.disk_geometry + } + pub fn disk_type(&self) -> u32 { + self.disk_type + } + pub fn checksum(&self) -> u32 { + self.checksum + } + pub fn unique_id(&self) -> u128 { + self.unique_id + } + pub fn saved_state(&self) -> u8 { + self.saved_state + } +} + +/// Determine image type through file parsing. +pub fn is_fixed_vhd(f: &mut File) -> std::io::Result { + let footer = VhdFooter::new(f)?; + + // "conectix" => 0x636f6e6563746978 + Ok(footer.cookie() == 0x636f6e6563746978 + && footer.file_format_version() == 0x0001_0000 + && footer.data_offset() == 0xffff_ffff_ffff_ffff + && footer.disk_type() == 0x2) +} diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index 8af9840ce..1c68d6cb0 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -105,6 +105,7 @@ fn virtio_block_thread_rules() -> Result, Error> { allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_openat), allow_syscall(libc::SYS_prctl), + allow_syscall(libc::SYS_pread64), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sched_getaffinity), diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index efbb71606..321e00760 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -39,8 +39,9 @@ use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; #[cfg(target_arch = "aarch64")] use arch::DeviceType; use block_util::{ - async_io::DiskFile, block_io_uring_is_supported, qcow_sync::QcowDiskSync, - raw_async::RawFileDisk, raw_sync::RawFileDiskSync, + async_io::DiskFile, block_io_uring_is_supported, detect_image_type, + fixed_vhd_async::FixedVhdDiskAsync, qcow_sync::QcowDiskSync, raw_async::RawFileDisk, + raw_sync::RawFileDiskSync, ImageType, }; #[cfg(target_arch = "aarch64")] use devices::gic; @@ -62,7 +63,6 @@ use pci::{ DeviceRelocation, PciBarRegionType, PciBus, PciConfigIo, PciConfigMmio, PciDevice, PciRoot, VfioPciDevice, }; -use qcow::{self, ImageType}; use seccomp::SeccompAction; use std::any::Any; use std::collections::HashMap; @@ -178,7 +178,7 @@ pub enum DeviceManagerError { CreateVirtioWatchdog(io::Error), /// Failed parsing disk image format - DetectImageType(qcow::Error), + DetectImageType(io::Error), /// Cannot open qcow disk path QcowDeviceCreate(qcow::Error), @@ -377,6 +377,12 @@ pub enum DeviceManagerError { /// Failed to do power button notification PowerButtonNotification(io::Error), + + /// Failed to set O_DIRECT flag to file descriptor + SetDirectIo, + + /// Failed to create FixedVhdDiskAsync + CreateFixedVhdDiskAsync(io::Error), } pub type DeviceManagerResult = result::Result; @@ -1645,7 +1651,7 @@ impl DeviceManager { options.custom_flags(libc::O_DIRECT); } // Open block device path - let file: File = options + let mut file: File = options .open( disk_cfg .path @@ -1655,12 +1661,23 @@ impl DeviceManager { ) .map_err(DeviceManagerError::Disk)?; - let mut raw_img = qcow::RawFile::new(file.try_clone().unwrap(), disk_cfg.direct); - - let image_type = qcow::detect_image_type(&mut raw_img) - .map_err(DeviceManagerError::DetectImageType)?; + let image_type = + detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; let image = match image_type { + ImageType::FixedVhd => { + // Use asynchronous backend relying on io_uring if the + // syscalls are supported. + if block_io_uring_is_supported() && !disk_cfg.disable_io_uring { + info!("Using asynchronous fixed VHD disk file (io_uring)"); + Box::new( + FixedVhdDiskAsync::new(file) + .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, + ) as Box + } else { + unimplemented!("No synchronous implementation for fixed VHD files"); + } + } ImageType::Raw => { // Use asynchronous backend relying on io_uring if the // syscalls are supported.