mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-21 19:02:30 +00:00
vm-virtio: block: Add support for alignment restrictions
Doing I/O on an image opened with O_DIRECT requires to adhere to certain restrictions, requiring the following elements to be aligned: - Address of the source/destination memory buffer. - File offset. - Length of the data to be read/written. The actual alignment value depends on various elements, and according to open(2) "(...) there is currently no filesystem-independent interface for an application to discover these restrictions (...)". To discover such value, we iterate through a list of alignments (currently, 512 and 4096) calling pread() with each one and checking if the operation succeeded. We also extend RawFile so it can be used as a backend for QcowFile, so the later can be easily adapted to support O_DIRECT too. Signed-off-by: Sergio Lopez <slp@redhat.com>
This commit is contained in:
parent
e483cde1bb
commit
c5a656c9dc
@ -87,7 +87,9 @@ impl VhostUserBlkBackend {
|
||||
let image_id = build_disk_image_id(&PathBuf::from(&image_path));
|
||||
let image_type = qcow::detect_image_type(&raw_img).unwrap();
|
||||
let mut image = match image_type {
|
||||
ImageType::Raw => Box::new(vm_virtio::RawFile::new(raw_img)) as Box<dyn DiskFile>,
|
||||
ImageType::Raw => {
|
||||
Box::new(vm_virtio::RawFile::new(raw_img, false)) as Box<dyn DiskFile>
|
||||
}
|
||||
ImageType::Qcow2 => Box::new(QcowFile::from(raw_img).unwrap()) as Box<dyn DiskFile>,
|
||||
};
|
||||
|
||||
|
@ -16,21 +16,24 @@ use super::{
|
||||
use crate::VirtioInterrupt;
|
||||
use arc_swap::ArcSwap;
|
||||
use epoll;
|
||||
use libc::EFD_NONBLOCK;
|
||||
use libc::{c_void, EFD_NONBLOCK};
|
||||
use std::alloc::{alloc_zeroed, dealloc, Layout};
|
||||
use std::cmp;
|
||||
use std::fs::File;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::{File, Metadata};
|
||||
use std::io::{self, Read, Seek, SeekFrom, Write};
|
||||
use std::os::linux::fs::MetadataExt;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::os::unix::io::{AsRawFd, RawFd};
|
||||
use std::path::PathBuf;
|
||||
use std::result;
|
||||
use std::slice;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use virtio_bindings::bindings::virtio_blk::*;
|
||||
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
|
||||
use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
use vmm_sys_util::{eventfd::EventFd, seek_hole::SeekHole, write_zeroes::PunchHole};
|
||||
|
||||
const CONFIG_SPACE_SIZE: usize = 8;
|
||||
const SECTOR_SHIFT: u8 = 9;
|
||||
@ -94,31 +97,251 @@ impl ExecuteError {
|
||||
pub trait DiskFile: Read + Seek + Write + Clone {}
|
||||
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RawFile {
|
||||
file: File,
|
||||
alignment: usize,
|
||||
position: u64,
|
||||
}
|
||||
|
||||
const BLK_ALIGNMENTS: [usize; 2] = [512, 4096];
|
||||
|
||||
fn is_valid_alignment(fd: RawFd, alignment: usize) -> bool {
|
||||
let layout = Layout::from_size_align(alignment, alignment).unwrap();
|
||||
let ptr = unsafe { alloc_zeroed(layout) };
|
||||
|
||||
let ret = unsafe {
|
||||
::libc::pread(
|
||||
fd,
|
||||
ptr as *mut c_void,
|
||||
alignment,
|
||||
alignment.try_into().unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
unsafe { dealloc(ptr, layout) };
|
||||
|
||||
ret >= 0
|
||||
}
|
||||
|
||||
impl RawFile {
|
||||
pub fn new(file: File) -> Self {
|
||||
RawFile { file }
|
||||
pub fn new(file: File, direct_io: bool) -> Self {
|
||||
// Assume no alignment restrictions if we aren't using O_DIRECT.
|
||||
let mut alignment = 0;
|
||||
if direct_io {
|
||||
for align in &BLK_ALIGNMENTS {
|
||||
if is_valid_alignment(file.as_raw_fd(), *align) {
|
||||
alignment = *align;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
RawFile {
|
||||
file,
|
||||
alignment: alignment.try_into().unwrap(),
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn round_up(&self, offset: u64) -> u64 {
|
||||
let align: u64 = self.alignment.try_into().unwrap();
|
||||
((offset / (align + 1)) + 1) * align
|
||||
}
|
||||
|
||||
fn round_down(&self, offset: u64) -> u64 {
|
||||
let align: u64 = self.alignment.try_into().unwrap();
|
||||
(offset / align) * align
|
||||
}
|
||||
|
||||
fn is_aligned(&self, buf: &[u8]) -> bool {
|
||||
if self.alignment == 0 {
|
||||
return true;
|
||||
}
|
||||
|
||||
let align64: u64 = self.alignment.try_into().unwrap();
|
||||
|
||||
(self.position % align64 == 0)
|
||||
&& ((buf.as_ptr() as usize) % self.alignment == 0)
|
||||
&& (buf.len() % self.alignment == 0)
|
||||
}
|
||||
|
||||
pub fn set_len(&self, size: u64) -> std::io::Result<()> {
|
||||
self.file.set_len(size)
|
||||
}
|
||||
|
||||
pub fn metadata(&self) -> std::io::Result<Metadata> {
|
||||
self.file.metadata()
|
||||
}
|
||||
|
||||
pub fn try_clone(&self) -> std::io::Result<RawFile> {
|
||||
Ok(RawFile {
|
||||
file: self.file.try_clone().expect("RawFile cloning failed"),
|
||||
alignment: self.alignment,
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn sync_all(&self) -> std::io::Result<()> {
|
||||
self.file.sync_all()
|
||||
}
|
||||
|
||||
pub fn sync_data(&self) -> std::io::Result<()> {
|
||||
self.file.sync_data()
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for RawFile {
|
||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
self.file.read(buf)
|
||||
}
|
||||
}
|
||||
if self.is_aligned(buf) {
|
||||
match self.file.read(buf) {
|
||||
Ok(r) => {
|
||||
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
|
||||
Ok(r)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
} else {
|
||||
let rounded_pos: u64 = self.round_down(self.position);
|
||||
let file_offset: usize = self
|
||||
.position
|
||||
.checked_sub(rounded_pos)
|
||||
.unwrap()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
let buf_len: usize = buf.len();
|
||||
let rounded_len: usize = self
|
||||
.round_up(
|
||||
file_offset
|
||||
.checked_add(buf_len)
|
||||
.unwrap()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
impl Seek for RawFile {
|
||||
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
|
||||
self.file.seek(pos)
|
||||
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
|
||||
let tmp_ptr = unsafe { alloc_zeroed(layout) };
|
||||
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
|
||||
|
||||
// This can eventually replaced with read_at once its interface
|
||||
// has been stabilized.
|
||||
let ret = unsafe {
|
||||
::libc::pread64(
|
||||
self.file.as_raw_fd(),
|
||||
tmp_buf.as_mut_ptr() as *mut c_void,
|
||||
tmp_buf.len(),
|
||||
rounded_pos.try_into().unwrap(),
|
||||
)
|
||||
};
|
||||
if ret < 0 {
|
||||
unsafe { dealloc(tmp_ptr, layout) };
|
||||
return Err(io::Error::last_os_error());
|
||||
}
|
||||
|
||||
let read: usize = ret.try_into().unwrap();
|
||||
if read < file_offset {
|
||||
unsafe { dealloc(tmp_ptr, layout) };
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut to_copy = read - file_offset;
|
||||
if to_copy > buf_len {
|
||||
to_copy = buf_len;
|
||||
}
|
||||
|
||||
buf.copy_from_slice(&tmp_buf[file_offset..(file_offset + buf_len)]);
|
||||
unsafe { dealloc(tmp_ptr, layout) };
|
||||
|
||||
self.seek(SeekFrom::Current(to_copy.try_into().unwrap()))
|
||||
.unwrap();
|
||||
Ok(to_copy.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for RawFile {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.file.write(buf)
|
||||
if self.is_aligned(buf) {
|
||||
match self.file.write(buf) {
|
||||
Ok(r) => {
|
||||
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
|
||||
Ok(r)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
} else {
|
||||
let rounded_pos: u64 = self.round_down(self.position);
|
||||
let file_offset: usize = self
|
||||
.position
|
||||
.checked_sub(rounded_pos)
|
||||
.unwrap()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
let buf_len: usize = buf.len();
|
||||
let rounded_len: usize = self
|
||||
.round_up(
|
||||
file_offset
|
||||
.checked_add(buf_len)
|
||||
.unwrap()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
|
||||
let tmp_ptr = unsafe { alloc_zeroed(layout) };
|
||||
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
|
||||
|
||||
// This can eventually replaced with read_at once its interface
|
||||
// has been stabilized.
|
||||
let ret = unsafe {
|
||||
::libc::pread64(
|
||||
self.file.as_raw_fd(),
|
||||
tmp_buf.as_mut_ptr() as *mut c_void,
|
||||
tmp_buf.len(),
|
||||
rounded_pos.try_into().unwrap(),
|
||||
)
|
||||
};
|
||||
if ret < 0 {
|
||||
unsafe { dealloc(tmp_ptr, layout) };
|
||||
return Err(io::Error::last_os_error());
|
||||
};
|
||||
|
||||
tmp_buf[file_offset..(file_offset + buf_len)].copy_from_slice(buf);
|
||||
|
||||
// This can eventually replaced with write_at once its interface
|
||||
// has been stabilized.
|
||||
let ret = unsafe {
|
||||
::libc::pwrite64(
|
||||
self.file.as_raw_fd(),
|
||||
tmp_buf.as_ptr() as *const c_void,
|
||||
tmp_buf.len(),
|
||||
rounded_pos.try_into().unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
unsafe { dealloc(tmp_ptr, layout) };
|
||||
|
||||
if ret < 0 {
|
||||
return Err(io::Error::last_os_error());
|
||||
}
|
||||
|
||||
let written: usize = ret.try_into().unwrap();
|
||||
if written < file_offset {
|
||||
Ok(0)
|
||||
} else {
|
||||
let mut to_seek = written - file_offset;
|
||||
if to_seek > buf_len {
|
||||
to_seek = buf_len;
|
||||
}
|
||||
|
||||
self.seek(SeekFrom::Current(to_seek.try_into().unwrap()))
|
||||
.unwrap();
|
||||
Ok(to_seek.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
@ -126,10 +349,56 @@ impl Write for RawFile {
|
||||
}
|
||||
}
|
||||
|
||||
impl Seek for RawFile {
|
||||
fn seek(&mut self, newpos: SeekFrom) -> std::io::Result<u64> {
|
||||
match self.file.seek(newpos) {
|
||||
Ok(pos) => {
|
||||
self.position = pos;
|
||||
Ok(pos)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PunchHole for RawFile {
|
||||
fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
|
||||
self.file.punch_hole(offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl SeekHole for RawFile {
|
||||
fn seek_hole(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
|
||||
match self.file.seek_hole(offset) {
|
||||
Ok(pos) => {
|
||||
if let Some(p) = pos {
|
||||
self.position = p;
|
||||
}
|
||||
Ok(pos)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn seek_data(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
|
||||
match self.file.seek_data(offset) {
|
||||
Ok(pos) => {
|
||||
if let Some(p) = pos {
|
||||
self.position = p;
|
||||
}
|
||||
Ok(pos)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for RawFile {
|
||||
fn clone(&self) -> Self {
|
||||
RawFile {
|
||||
file: self.file.try_clone().expect("RawFile cloning failed"),
|
||||
alignment: self.alignment,
|
||||
position: self.position,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -937,7 +937,7 @@ impl DeviceManager {
|
||||
.map_err(DeviceManagerError::DetectImageType)?;
|
||||
match image_type {
|
||||
ImageType::Raw => {
|
||||
let raw_img = vm_virtio::RawFile::new(raw_img);
|
||||
let raw_img = vm_virtio::RawFile::new(raw_img, false);
|
||||
let dev = vm_virtio::Block::new(
|
||||
raw_img,
|
||||
disk_cfg.path.clone(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user