vm-virtio: block: Add support for alignment restrictions

Doing I/O on an image opened with O_DIRECT requires to adhere to
certain restrictions, requiring the following elements to be aligned:

 - Address of the source/destination memory buffer.
 - File offset.
 - Length of the data to be read/written.

The actual alignment value depends on various elements, and according
to open(2) "(...) there is currently no filesystem-independent
interface for an application to discover these restrictions (...)".

To discover such value, we iterate through a list of alignments
(currently, 512 and 4096) calling pread() with each one and checking
if the operation succeeded.

We also extend RawFile so it can be used as a backend for QcowFile,
so the later can be easily adapted to support O_DIRECT too.

Signed-off-by: Sergio Lopez <slp@redhat.com>
This commit is contained in:
Sergio Lopez 2020-01-13 14:10:51 +01:00 committed by Rob Bradford
parent e483cde1bb
commit c5a656c9dc
3 changed files with 286 additions and 15 deletions

View File

@ -87,7 +87,9 @@ impl VhostUserBlkBackend {
let image_id = build_disk_image_id(&PathBuf::from(&image_path));
let image_type = qcow::detect_image_type(&raw_img).unwrap();
let mut image = match image_type {
ImageType::Raw => Box::new(vm_virtio::RawFile::new(raw_img)) as Box<dyn DiskFile>,
ImageType::Raw => {
Box::new(vm_virtio::RawFile::new(raw_img, false)) as Box<dyn DiskFile>
}
ImageType::Qcow2 => Box::new(QcowFile::from(raw_img).unwrap()) as Box<dyn DiskFile>,
};

View File

@ -16,21 +16,24 @@ use super::{
use crate::VirtioInterrupt;
use arc_swap::ArcSwap;
use epoll;
use libc::EFD_NONBLOCK;
use libc::{c_void, EFD_NONBLOCK};
use std::alloc::{alloc_zeroed, dealloc, Layout};
use std::cmp;
use std::fs::File;
use std::convert::TryInto;
use std::fs::{File, Metadata};
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::os::linux::fs::MetadataExt;
use std::os::unix::io::AsRawFd;
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::PathBuf;
use std::result;
use std::slice;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use virtio_bindings::bindings::virtio_blk::*;
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap};
use vmm_sys_util::eventfd::EventFd;
use vmm_sys_util::{eventfd::EventFd, seek_hole::SeekHole, write_zeroes::PunchHole};
const CONFIG_SPACE_SIZE: usize = 8;
const SECTOR_SHIFT: u8 = 9;
@ -94,31 +97,251 @@ impl ExecuteError {
pub trait DiskFile: Read + Seek + Write + Clone {}
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
#[derive(Debug)]
pub struct RawFile {
file: File,
alignment: usize,
position: u64,
}
const BLK_ALIGNMENTS: [usize; 2] = [512, 4096];
fn is_valid_alignment(fd: RawFd, alignment: usize) -> bool {
let layout = Layout::from_size_align(alignment, alignment).unwrap();
let ptr = unsafe { alloc_zeroed(layout) };
let ret = unsafe {
::libc::pread(
fd,
ptr as *mut c_void,
alignment,
alignment.try_into().unwrap(),
)
};
unsafe { dealloc(ptr, layout) };
ret >= 0
}
impl RawFile {
pub fn new(file: File) -> Self {
RawFile { file }
pub fn new(file: File, direct_io: bool) -> Self {
// Assume no alignment restrictions if we aren't using O_DIRECT.
let mut alignment = 0;
if direct_io {
for align in &BLK_ALIGNMENTS {
if is_valid_alignment(file.as_raw_fd(), *align) {
alignment = *align;
break;
}
}
}
RawFile {
file,
alignment: alignment.try_into().unwrap(),
position: 0,
}
}
fn round_up(&self, offset: u64) -> u64 {
let align: u64 = self.alignment.try_into().unwrap();
((offset / (align + 1)) + 1) * align
}
fn round_down(&self, offset: u64) -> u64 {
let align: u64 = self.alignment.try_into().unwrap();
(offset / align) * align
}
fn is_aligned(&self, buf: &[u8]) -> bool {
if self.alignment == 0 {
return true;
}
let align64: u64 = self.alignment.try_into().unwrap();
(self.position % align64 == 0)
&& ((buf.as_ptr() as usize) % self.alignment == 0)
&& (buf.len() % self.alignment == 0)
}
pub fn set_len(&self, size: u64) -> std::io::Result<()> {
self.file.set_len(size)
}
pub fn metadata(&self) -> std::io::Result<Metadata> {
self.file.metadata()
}
pub fn try_clone(&self) -> std::io::Result<RawFile> {
Ok(RawFile {
file: self.file.try_clone().expect("RawFile cloning failed"),
alignment: self.alignment,
position: self.position,
})
}
pub fn sync_all(&self) -> std::io::Result<()> {
self.file.sync_all()
}
pub fn sync_data(&self) -> std::io::Result<()> {
self.file.sync_data()
}
}
impl Read for RawFile {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.file.read(buf)
}
}
if self.is_aligned(buf) {
match self.file.read(buf) {
Ok(r) => {
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
Ok(r)
}
Err(e) => Err(e),
}
} else {
let rounded_pos: u64 = self.round_down(self.position);
let file_offset: usize = self
.position
.checked_sub(rounded_pos)
.unwrap()
.try_into()
.unwrap();
let buf_len: usize = buf.len();
let rounded_len: usize = self
.round_up(
file_offset
.checked_add(buf_len)
.unwrap()
.try_into()
.unwrap(),
)
.try_into()
.unwrap();
impl Seek for RawFile {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.file.seek(pos)
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
let tmp_ptr = unsafe { alloc_zeroed(layout) };
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
// This can eventually replaced with read_at once its interface
// has been stabilized.
let ret = unsafe {
::libc::pread64(
self.file.as_raw_fd(),
tmp_buf.as_mut_ptr() as *mut c_void,
tmp_buf.len(),
rounded_pos.try_into().unwrap(),
)
};
if ret < 0 {
unsafe { dealloc(tmp_ptr, layout) };
return Err(io::Error::last_os_error());
}
let read: usize = ret.try_into().unwrap();
if read < file_offset {
unsafe { dealloc(tmp_ptr, layout) };
return Ok(0);
}
let mut to_copy = read - file_offset;
if to_copy > buf_len {
to_copy = buf_len;
}
buf.copy_from_slice(&tmp_buf[file_offset..(file_offset + buf_len)]);
unsafe { dealloc(tmp_ptr, layout) };
self.seek(SeekFrom::Current(to_copy.try_into().unwrap()))
.unwrap();
Ok(to_copy.try_into().unwrap())
}
}
}
impl Write for RawFile {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.file.write(buf)
if self.is_aligned(buf) {
match self.file.write(buf) {
Ok(r) => {
self.position = self.position.checked_add(r.try_into().unwrap()).unwrap();
Ok(r)
}
Err(e) => Err(e),
}
} else {
let rounded_pos: u64 = self.round_down(self.position);
let file_offset: usize = self
.position
.checked_sub(rounded_pos)
.unwrap()
.try_into()
.unwrap();
let buf_len: usize = buf.len();
let rounded_len: usize = self
.round_up(
file_offset
.checked_add(buf_len)
.unwrap()
.try_into()
.unwrap(),
)
.try_into()
.unwrap();
let layout = Layout::from_size_align(rounded_len, self.alignment).unwrap();
let tmp_ptr = unsafe { alloc_zeroed(layout) };
let tmp_buf = unsafe { slice::from_raw_parts_mut(tmp_ptr, rounded_len) };
// This can eventually replaced with read_at once its interface
// has been stabilized.
let ret = unsafe {
::libc::pread64(
self.file.as_raw_fd(),
tmp_buf.as_mut_ptr() as *mut c_void,
tmp_buf.len(),
rounded_pos.try_into().unwrap(),
)
};
if ret < 0 {
unsafe { dealloc(tmp_ptr, layout) };
return Err(io::Error::last_os_error());
};
tmp_buf[file_offset..(file_offset + buf_len)].copy_from_slice(buf);
// This can eventually replaced with write_at once its interface
// has been stabilized.
let ret = unsafe {
::libc::pwrite64(
self.file.as_raw_fd(),
tmp_buf.as_ptr() as *const c_void,
tmp_buf.len(),
rounded_pos.try_into().unwrap(),
)
};
unsafe { dealloc(tmp_ptr, layout) };
if ret < 0 {
return Err(io::Error::last_os_error());
}
let written: usize = ret.try_into().unwrap();
if written < file_offset {
Ok(0)
} else {
let mut to_seek = written - file_offset;
if to_seek > buf_len {
to_seek = buf_len;
}
self.seek(SeekFrom::Current(to_seek.try_into().unwrap()))
.unwrap();
Ok(to_seek.try_into().unwrap())
}
}
}
fn flush(&mut self) -> std::io::Result<()> {
@ -126,10 +349,56 @@ impl Write for RawFile {
}
}
impl Seek for RawFile {
fn seek(&mut self, newpos: SeekFrom) -> std::io::Result<u64> {
match self.file.seek(newpos) {
Ok(pos) => {
self.position = pos;
Ok(pos)
}
Err(e) => Err(e),
}
}
}
impl PunchHole for RawFile {
fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
self.file.punch_hole(offset, length)
}
}
impl SeekHole for RawFile {
fn seek_hole(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
match self.file.seek_hole(offset) {
Ok(pos) => {
if let Some(p) = pos {
self.position = p;
}
Ok(pos)
}
Err(e) => Err(e),
}
}
fn seek_data(&mut self, offset: u64) -> std::io::Result<Option<u64>> {
match self.file.seek_data(offset) {
Ok(pos) => {
if let Some(p) = pos {
self.position = p;
}
Ok(pos)
}
Err(e) => Err(e),
}
}
}
impl Clone for RawFile {
fn clone(&self) -> Self {
RawFile {
file: self.file.try_clone().expect("RawFile cloning failed"),
alignment: self.alignment,
position: self.position,
}
}
}

View File

@ -937,7 +937,7 @@ impl DeviceManager {
.map_err(DeviceManagerError::DetectImageType)?;
match image_type {
ImageType::Raw => {
let raw_img = vm_virtio::RawFile::new(raw_img);
let raw_img = vm_virtio::RawFile::new(raw_img, false);
let dev = vm_virtio::Block::new(
raw_img,
disk_cfg.path.clone(),