mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-05 21:15:45 +00:00
vm-virtio: block: Add support for alignment restrictions
Doing I/O on an image opened with O_DIRECT requires to adhere to certain restrictions, requiring the following elements to be aligned: - Address of the source/destination memory buffer. - File offset. - Length of the data to be read/written. The actual alignment value depends on various elements, and according to open(2) "(...) there is currently no filesystem-independent interface for an application to discover these restrictions (...)". To discover such value, we iterate through a list of alignments (currently, 512 and 4096) calling pread() with each one and checking if the operation succeeded. We also extend RawFile so it can be used as a backend for QcowFile, so the later can be easily adapted to support O_DIRECT too. Signed-off-by: Sergio Lopez <slp@redhat.com>
This commit is contained in:
parent
e483cde1bb
commit
c5a656c9dc
@ -87,7 +87,9 @@ impl VhostUserBlkBackend {
|
|||||||
let image_id = build_disk_image_id(&PathBuf::from(&image_path));
|
let image_id = build_disk_image_id(&PathBuf::from(&image_path));
|
||||||
let image_type = qcow::detect_image_type(&raw_img).unwrap();
|
let image_type = qcow::detect_image_type(&raw_img).unwrap();
|
||||||
let mut image = match image_type {
|
let mut image = match image_type {
|
||||||
ImageType::Raw => Box::new(vm_virtio::RawFile::new(raw_img)) as Box<dyn DiskFile>,
|
ImageType::Raw => {
|
||||||
|
Box::new(vm_virtio::RawFile::new(raw_img, false)) as Box<dyn DiskFile>
|
||||||
|
}
|
||||||
ImageType::Qcow2 => Box::new(QcowFile::from(raw_img).unwrap()) as Box<dyn DiskFile>,
|
ImageType::Qcow2 => Box::new(QcowFile::from(raw_img).unwrap()) as Box<dyn DiskFile>,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -16,21 +16,24 @@ use super::{
|
|||||||
use crate::VirtioInterrupt;
|
use crate::VirtioInterrupt;
|
||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
use epoll;
|
use epoll;
|
||||||
use libc::EFD_NONBLOCK;
|
use libc::{c_void, EFD_NONBLOCK};
|
||||||
|
use std::alloc::{alloc_zeroed, dealloc, Layout};
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::fs::File;
|
use std::convert::TryInto;
|
||||||
|
use std::fs::{File, Metadata};
|
||||||
use std::io::{self, Read, Seek, SeekFrom, Write};
|
use std::io::{self, Read, Seek, SeekFrom, Write};
|
||||||
use std::os::linux::fs::MetadataExt;
|
use std::os::linux::fs::MetadataExt;
|
||||||
use std::os::unix::io::AsRawFd;
|
use std::os::unix::io::{AsRawFd, RawFd};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::result;
|
use std::result;
|
||||||
|
use std::slice;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use virtio_bindings::bindings::virtio_blk::*;
|
use virtio_bindings::bindings::virtio_blk::*;
|
||||||
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
|
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
|
||||||
use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap};
|
use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap};
|
||||||
use vmm_sys_util::eventfd::EventFd;
|
use vmm_sys_util::{eventfd::EventFd, seek_hole::SeekHole, write_zeroes::PunchHole};
|
||||||
|
|
||||||
const CONFIG_SPACE_SIZE: usize = 8;
|
const CONFIG_SPACE_SIZE: usize = 8;
|
||||||
const SECTOR_SHIFT: u8 = 9;
|
const SECTOR_SHIFT: u8 = 9;
|
||||||
@ -94,31 +97,251 @@ impl ExecuteError {
|
|||||||
pub trait DiskFile: Read + Seek + Write + Clone {}
|
pub trait DiskFile: Read + Seek + Write + Clone {}
|
||||||
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
|
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct RawFile {
|
pub struct RawFile {
|
||||||
file: File,
|
file: File,
|
||||||
|
alignment: usize,
|
||||||
|
position: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
const BLK_ALIGNMENTS: [usize; 2] = [512, 4096];
|
||||||
|
|
||||||
|
fn is_valid_alignment(fd: RawFd, alignment: usize) -> bool {
|
||||||
|
let layout = Layout::from_size_align(alignment, alignment).unwrap();
|
||||||
|
let ptr = unsafe { alloc_zeroed(layout) };
|
||||||
|
|
||||||
|
let ret = unsafe {
|
||||||
|
::libc::pread(
|
||||||
|
fd,
|
||||||
|
ptr as *mut c_void,
|
||||||
|
alignment,
|
||||||
|
alignment.try_into().unwrap(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
unsafe { dealloc(ptr, layout) };
|
||||||
|
|
||||||
|
ret >= 0
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RawFile {
|
impl RawFile {
|
||||||
pub fn new(file: File) -> Self {
|
pub fn new(file: File, direct_io: bool) -> Self {
|
||||||
RawFile { file }
|
// Assume no alignment restrictions if we aren't using O_DIRECT.
|
||||||
|
let mut alignment = 0;
|
||||||
|
if direct_io {
|
||||||
|
for align in &BLK_ALIGNMENTS {
|
||||||
|
if is_valid_alignment(file.as_raw_fd(), *align) {
|
||||||
|
alignment = *align;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RawFile {
|
||||||
|
file,
|
||||||
|
alignment: alignment.try_into().unwrap(),
|
||||||
|
position: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn round_up(&self, offset: u64) -> u64 {
|
||||||
|
let align: u64 = self.alignment.try_into().unwrap();
|
||||||
|
((offset / (align + 1)) + 1) * align
|
||||||
|
}
|
||||||
|
|
||||||
|
fn round_down(&self, offset: u64) -> u64 {
|
||||||
|
let align: u64 = self.alignment.try_into().unwrap();
|
||||||
|
(offset / align) * align
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_aligned(&self, buf: &[u8]) -> bool {
|
||||||
|
if self.alignment == 0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let align64: u64 = self.alignment.try_into().unwrap();
|
||||||
|
|
||||||
|
(self.position % align64 == 0)
|
||||||
|
&& ((buf.as_ptr() as usize) % self.alignment == 0)
|
||||||
|
&& (buf.len() % self.alignment == 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_len(&self, size: u64) -> std::io::Result<()> {
|
||||||
|
self.file.set_len(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn metadata(&self) -> std::io::Result<Metadata> {
|
||||||
|
self.file.metadata()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_clone(&self) -> std::io::Result<RawFile> {
|
||||||
|
Ok(RawFile {
|
||||||
|
file: self.file.try_clone().expect("RawFile cloning failed"),
|
||||||
|
alignment: self.alignment,
|
||||||
|
position: self.position,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sync_all(&self) -> std::io::Result<()> {
|
||||||
|
self.file.sync_all()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sync_data(&self) -> std::io::Result<()> {
|
||||||
|
self.file.sync_data()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Read for RawFile {
|
impl Read for RawFile {
|
||||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
self.file.read(buf)
|
if self.is_aligned(buf) {
|
||||||
|
match self.file.read(buf) {
|
||||||
|
Ok(r) => {
|
||||||
|
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
|
||||||
|
Ok(r)
|
||||||
}
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let rounded_pos: u64 = self.round_down(self.position);
|
||||||
|
let file_offset: usize = self
|
||||||
|
.position
|
||||||
|
.checked_sub(rounded_pos)
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.unwrap();
|
||||||
|
let buf_len: usize = buf.len();
|
||||||
|
let rounded_len: usize = self
|
||||||
|
.round_up(
|
||||||
|
file_offset
|
||||||
|
.checked_add(buf_len)
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.try_into()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
|
||||||
|
let tmp_ptr = unsafe { alloc_zeroed(layout) };
|
||||||
|
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
|
||||||
|
|
||||||
|
// This can eventually replaced with read_at once its interface
|
||||||
|
// has been stabilized.
|
||||||
|
let ret = unsafe {
|
||||||
|
::libc::pread64(
|
||||||
|
self.file.as_raw_fd(),
|
||||||
|
tmp_buf.as_mut_ptr() as *mut c_void,
|
||||||
|
tmp_buf.len(),
|
||||||
|
rounded_pos.try_into().unwrap(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
if ret < 0 {
|
||||||
|
unsafe { dealloc(tmp_ptr, layout) };
|
||||||
|
return Err(io::Error::last_os_error());
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Seek for RawFile {
|
let read: usize = ret.try_into().unwrap();
|
||||||
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
|
if read < file_offset {
|
||||||
self.file.seek(pos)
|
unsafe { dealloc(tmp_ptr, layout) };
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut to_copy = read - file_offset;
|
||||||
|
if to_copy > buf_len {
|
||||||
|
to_copy = buf_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.copy_from_slice(&tmp_buf[file_offset..(file_offset + buf_len)]);
|
||||||
|
unsafe { dealloc(tmp_ptr, layout) };
|
||||||
|
|
||||||
|
self.seek(SeekFrom::Current(to_copy.try_into().unwrap()))
|
||||||
|
.unwrap();
|
||||||
|
Ok(to_copy.try_into().unwrap())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Write for RawFile {
|
impl Write for RawFile {
|
||||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
self.file.write(buf)
|
if self.is_aligned(buf) {
|
||||||
|
match self.file.write(buf) {
|
||||||
|
Ok(r) => {
|
||||||
|
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
|
||||||
|
Ok(r)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let rounded_pos: u64 = self.round_down(self.position);
|
||||||
|
let file_offset: usize = self
|
||||||
|
.position
|
||||||
|
.checked_sub(rounded_pos)
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.unwrap();
|
||||||
|
let buf_len: usize = buf.len();
|
||||||
|
let rounded_len: usize = self
|
||||||
|
.round_up(
|
||||||
|
file_offset
|
||||||
|
.checked_add(buf_len)
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.try_into()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
|
||||||
|
let tmp_ptr = unsafe { alloc_zeroed(layout) };
|
||||||
|
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
|
||||||
|
|
||||||
|
// This can eventually replaced with read_at once its interface
|
||||||
|
// has been stabilized.
|
||||||
|
let ret = unsafe {
|
||||||
|
::libc::pread64(
|
||||||
|
self.file.as_raw_fd(),
|
||||||
|
tmp_buf.as_mut_ptr() as *mut c_void,
|
||||||
|
tmp_buf.len(),
|
||||||
|
rounded_pos.try_into().unwrap(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
if ret < 0 {
|
||||||
|
unsafe { dealloc(tmp_ptr, layout) };
|
||||||
|
return Err(io::Error::last_os_error());
|
||||||
|
};
|
||||||
|
|
||||||
|
tmp_buf[file_offset..(file_offset + buf_len)].copy_from_slice(buf);
|
||||||
|
|
||||||
|
// This can eventually replaced with write_at once its interface
|
||||||
|
// has been stabilized.
|
||||||
|
let ret = unsafe {
|
||||||
|
::libc::pwrite64(
|
||||||
|
self.file.as_raw_fd(),
|
||||||
|
tmp_buf.as_ptr() as *const c_void,
|
||||||
|
tmp_buf.len(),
|
||||||
|
rounded_pos.try_into().unwrap(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
unsafe { dealloc(tmp_ptr, layout) };
|
||||||
|
|
||||||
|
if ret < 0 {
|
||||||
|
return Err(io::Error::last_os_error());
|
||||||
|
}
|
||||||
|
|
||||||
|
let written: usize = ret.try_into().unwrap();
|
||||||
|
if written < file_offset {
|
||||||
|
Ok(0)
|
||||||
|
} else {
|
||||||
|
let mut to_seek = written - file_offset;
|
||||||
|
if to_seek > buf_len {
|
||||||
|
to_seek = buf_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.seek(SeekFrom::Current(to_seek.try_into().unwrap()))
|
||||||
|
.unwrap();
|
||||||
|
Ok(to_seek.try_into().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flush(&mut self) -> std::io::Result<()> {
|
fn flush(&mut self) -> std::io::Result<()> {
|
||||||
@ -126,10 +349,56 @@ impl Write for RawFile {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Seek for RawFile {
|
||||||
|
fn seek(&mut self, newpos: SeekFrom) -> std::io::Result<u64> {
|
||||||
|
match self.file.seek(newpos) {
|
||||||
|
Ok(pos) => {
|
||||||
|
self.position = pos;
|
||||||
|
Ok(pos)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PunchHole for RawFile {
|
||||||
|
fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
|
||||||
|
self.file.punch_hole(offset, length)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SeekHole for RawFile {
|
||||||
|
fn seek_hole(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
|
||||||
|
match self.file.seek_hole(offset) {
|
||||||
|
Ok(pos) => {
|
||||||
|
if let Some(p) = pos {
|
||||||
|
self.position = p;
|
||||||
|
}
|
||||||
|
Ok(pos)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn seek_data(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
|
||||||
|
match self.file.seek_data(offset) {
|
||||||
|
Ok(pos) => {
|
||||||
|
if let Some(p) = pos {
|
||||||
|
self.position = p;
|
||||||
|
}
|
||||||
|
Ok(pos)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Clone for RawFile {
|
impl Clone for RawFile {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
RawFile {
|
RawFile {
|
||||||
file: self.file.try_clone().expect("RawFile cloning failed"),
|
file: self.file.try_clone().expect("RawFile cloning failed"),
|
||||||
|
alignment: self.alignment,
|
||||||
|
position: self.position,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -937,7 +937,7 @@ impl DeviceManager {
|
|||||||
.map_err(DeviceManagerError::DetectImageType)?;
|
.map_err(DeviceManagerError::DetectImageType)?;
|
||||||
match image_type {
|
match image_type {
|
||||||
ImageType::Raw => {
|
ImageType::Raw => {
|
||||||
let raw_img = vm_virtio::RawFile::new(raw_img);
|
let raw_img = vm_virtio::RawFile::new(raw_img, false);
|
||||||
let dev = vm_virtio::Block::new(
|
let dev = vm_virtio::Block::new(
|
||||||
raw_img,
|
raw_img,
|
||||||
disk_cfg.path.clone(),
|
disk_cfg.path.clone(),
|
||||||
|
Loading…
Reference in New Issue
Block a user