2019-05-06 16:31:15 +00:00
|
|
|
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
2019-05-08 10:22:53 +00:00
|
|
|
// found in the LICENSE-BSD-3-Clause file.
|
|
|
|
//
|
|
|
|
// Copyright © 2019 Intel Corporation
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
use epoll;
|
|
|
|
use libc::EFD_NONBLOCK;
|
|
|
|
use std::cmp;
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io::{self, Read, Seek, SeekFrom, Write};
|
|
|
|
use std::os::linux::fs::MetadataExt;
|
|
|
|
use std::os::unix::io::AsRawFd;
|
2019-05-10 07:27:56 +00:00
|
|
|
use std::path::PathBuf;
|
2019-05-06 16:31:15 +00:00
|
|
|
use std::result;
|
|
|
|
use std::sync::Arc;
|
|
|
|
use std::thread;
|
|
|
|
|
|
|
|
use super::Error as DeviceError;
|
|
|
|
use super::{
|
|
|
|
ActivateError, ActivateResult, DescriptorChain, DeviceEventT, Queue, VirtioDevice,
|
2019-07-26 18:48:07 +00:00
|
|
|
VirtioDeviceType, VirtioInterruptType,
|
2019-05-06 16:31:15 +00:00
|
|
|
};
|
2019-06-03 20:57:26 +00:00
|
|
|
use crate::VirtioInterrupt;
|
2019-05-06 16:31:15 +00:00
|
|
|
use virtio_bindings::virtio_blk::*;
|
|
|
|
use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap};
|
2019-08-02 14:23:52 +00:00
|
|
|
use vmm_sys_util::eventfd::EventFd;
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
const CONFIG_SPACE_SIZE: usize = 8;
|
|
|
|
const SECTOR_SHIFT: u8 = 9;
|
|
|
|
pub const SECTOR_SIZE: u64 = (0x01 as u64) << SECTOR_SHIFT;
|
|
|
|
const QUEUE_SIZE: u16 = 256;
|
|
|
|
const NUM_QUEUES: usize = 1;
|
|
|
|
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
|
|
|
|
|
|
|
|
// New descriptors are pending on the virtio queue.
|
|
|
|
const QUEUE_AVAIL_EVENT: DeviceEventT = 0;
|
|
|
|
// The device has been dropped.
|
|
|
|
pub const KILL_EVENT: DeviceEventT = 1;
|
|
|
|
// Number of DeviceEventT events supported by this implementation.
|
|
|
|
pub const BLOCK_EVENTS_COUNT: usize = 2;
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
enum Error {
|
|
|
|
/// Guest gave us bad memory addresses.
|
|
|
|
GuestMemory(GuestMemoryError),
|
|
|
|
/// Guest gave us offsets that would have overflowed a usize.
|
|
|
|
CheckedOffset(GuestAddress, usize),
|
|
|
|
/// Guest gave us a write only descriptor that protocol says to read from.
|
|
|
|
UnexpectedWriteOnlyDescriptor,
|
|
|
|
/// Guest gave us a read only descriptor that protocol says to write to.
|
|
|
|
UnexpectedReadOnlyDescriptor,
|
|
|
|
/// Guest gave us too few descriptors in a descriptor chain.
|
|
|
|
DescriptorChainTooShort,
|
|
|
|
/// Guest gave us a descriptor that was too short to use.
|
|
|
|
DescriptorLengthTooSmall,
|
|
|
|
/// Getting a block's metadata fails for any reason.
|
|
|
|
GetFileMetadata,
|
|
|
|
/// The requested operation would cause a seek beyond disk end.
|
|
|
|
InvalidOffset,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
enum ExecuteError {
|
|
|
|
BadRequest(Error),
|
|
|
|
Flush(io::Error),
|
|
|
|
Read(GuestMemoryError),
|
|
|
|
Seek(io::Error),
|
|
|
|
Write(GuestMemoryError),
|
|
|
|
Unsupported(u32),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ExecuteError {
|
|
|
|
fn status(&self) -> u32 {
|
|
|
|
match *self {
|
|
|
|
ExecuteError::BadRequest(_) => VIRTIO_BLK_S_IOERR,
|
|
|
|
ExecuteError::Flush(_) => VIRTIO_BLK_S_IOERR,
|
|
|
|
ExecuteError::Read(_) => VIRTIO_BLK_S_IOERR,
|
|
|
|
ExecuteError::Seek(_) => VIRTIO_BLK_S_IOERR,
|
|
|
|
ExecuteError::Write(_) => VIRTIO_BLK_S_IOERR,
|
|
|
|
ExecuteError::Unsupported(_) => VIRTIO_BLK_S_UNSUPP,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
pub trait DiskFile: Read + Seek + Write + Clone {}
|
|
|
|
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
|
|
|
|
|
|
|
|
pub struct RawFile {
|
|
|
|
file: File,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RawFile {
|
|
|
|
pub fn new(file: File) -> Self {
|
|
|
|
RawFile { file }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Read for RawFile {
|
|
|
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
|
|
self.file.read(buf)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Seek for RawFile {
|
|
|
|
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
|
|
|
|
self.file.seek(pos)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Write for RawFile {
|
|
|
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
|
|
|
self.file.write(buf)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn flush(&mut self) -> std::io::Result<()> {
|
|
|
|
self.file.flush()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Clone for RawFile {
|
|
|
|
fn clone(&self) -> Self {
|
|
|
|
RawFile {
|
|
|
|
file: self.file.try_clone().expect("RawFile cloning failed"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
|
|
|
enum RequestType {
|
|
|
|
In,
|
|
|
|
Out,
|
|
|
|
Flush,
|
|
|
|
GetDeviceID,
|
|
|
|
Unsupported(u32),
|
|
|
|
}
|
|
|
|
|
|
|
|
fn request_type(
|
|
|
|
mem: &GuestMemoryMmap,
|
|
|
|
desc_addr: GuestAddress,
|
|
|
|
) -> result::Result<RequestType, Error> {
|
|
|
|
let type_ = mem.read_obj(desc_addr).map_err(Error::GuestMemory)?;
|
|
|
|
match type_ {
|
|
|
|
VIRTIO_BLK_T_IN => Ok(RequestType::In),
|
|
|
|
VIRTIO_BLK_T_OUT => Ok(RequestType::Out),
|
|
|
|
VIRTIO_BLK_T_FLUSH => Ok(RequestType::Flush),
|
|
|
|
VIRTIO_BLK_T_GET_ID => Ok(RequestType::GetDeviceID),
|
|
|
|
t => Ok(RequestType::Unsupported(t)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn sector(mem: &GuestMemoryMmap, desc_addr: GuestAddress) -> result::Result<u64, Error> {
|
|
|
|
const SECTOR_OFFSET: usize = 8;
|
|
|
|
let addr = match mem.checked_offset(desc_addr, SECTOR_OFFSET) {
|
|
|
|
Some(v) => v,
|
|
|
|
None => return Err(Error::CheckedOffset(desc_addr, SECTOR_OFFSET)),
|
|
|
|
};
|
|
|
|
|
|
|
|
mem.read_obj(addr).map_err(Error::GuestMemory)
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
fn build_device_id(disk_path: &PathBuf) -> result::Result<String, Error> {
|
|
|
|
let blk_metadata = match disk_path.metadata() {
|
2019-05-06 16:31:15 +00:00
|
|
|
Err(_) => return Err(Error::GetFileMetadata),
|
|
|
|
Ok(m) => m,
|
|
|
|
};
|
|
|
|
// This is how kvmtool does it.
|
|
|
|
let device_id = format!(
|
|
|
|
"{}{}{}",
|
|
|
|
blk_metadata.st_dev(),
|
|
|
|
blk_metadata.st_rdev(),
|
|
|
|
blk_metadata.st_ino()
|
|
|
|
)
|
|
|
|
.to_owned();
|
|
|
|
Ok(device_id)
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
fn build_disk_image_id(disk_path: &PathBuf) -> Vec<u8> {
|
2019-05-06 16:31:15 +00:00
|
|
|
let mut default_disk_image_id = vec![0; VIRTIO_BLK_ID_BYTES as usize];
|
2019-05-10 07:27:56 +00:00
|
|
|
match build_device_id(disk_path) {
|
2019-05-06 16:31:15 +00:00
|
|
|
Err(_) => {
|
|
|
|
warn!("Could not generate device id. We'll use a default.");
|
|
|
|
}
|
|
|
|
Ok(m) => {
|
|
|
|
// The kernel only knows to read a maximum of VIRTIO_BLK_ID_BYTES.
|
|
|
|
// This will also zero out any leftover bytes.
|
|
|
|
let disk_id = m.as_bytes();
|
|
|
|
let bytes_to_copy = cmp::min(disk_id.len(), VIRTIO_BLK_ID_BYTES as usize);
|
|
|
|
default_disk_image_id[..bytes_to_copy].clone_from_slice(&disk_id[..bytes_to_copy])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
default_disk_image_id
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Request {
|
|
|
|
request_type: RequestType,
|
|
|
|
sector: u64,
|
|
|
|
data_addr: GuestAddress,
|
|
|
|
data_len: u32,
|
|
|
|
status_addr: GuestAddress,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Request {
|
|
|
|
fn parse(
|
|
|
|
avail_desc: &DescriptorChain,
|
|
|
|
mem: &GuestMemoryMmap,
|
|
|
|
) -> result::Result<Request, Error> {
|
|
|
|
// The head contains the request type which MUST be readable.
|
|
|
|
if avail_desc.is_write_only() {
|
|
|
|
return Err(Error::UnexpectedWriteOnlyDescriptor);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut req = Request {
|
|
|
|
request_type: request_type(&mem, avail_desc.addr)?,
|
|
|
|
sector: sector(&mem, avail_desc.addr)?,
|
|
|
|
data_addr: GuestAddress(0),
|
|
|
|
data_len: 0,
|
|
|
|
status_addr: GuestAddress(0),
|
|
|
|
};
|
|
|
|
|
|
|
|
let data_desc;
|
|
|
|
let status_desc;
|
|
|
|
let desc = avail_desc
|
|
|
|
.next_descriptor()
|
|
|
|
.ok_or(Error::DescriptorChainTooShort)?;
|
|
|
|
|
|
|
|
if !desc.has_next() {
|
|
|
|
status_desc = desc;
|
|
|
|
// Only flush requests are allowed to skip the data descriptor.
|
|
|
|
if req.request_type != RequestType::Flush {
|
|
|
|
return Err(Error::DescriptorChainTooShort);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
data_desc = desc;
|
|
|
|
status_desc = data_desc
|
|
|
|
.next_descriptor()
|
|
|
|
.ok_or(Error::DescriptorChainTooShort)?;
|
|
|
|
|
|
|
|
if data_desc.is_write_only() && req.request_type == RequestType::Out {
|
|
|
|
return Err(Error::UnexpectedWriteOnlyDescriptor);
|
|
|
|
}
|
|
|
|
if !data_desc.is_write_only() && req.request_type == RequestType::In {
|
|
|
|
return Err(Error::UnexpectedReadOnlyDescriptor);
|
|
|
|
}
|
|
|
|
if !data_desc.is_write_only() && req.request_type == RequestType::GetDeviceID {
|
|
|
|
return Err(Error::UnexpectedReadOnlyDescriptor);
|
|
|
|
}
|
|
|
|
|
|
|
|
req.data_addr = data_desc.addr;
|
|
|
|
req.data_len = data_desc.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The status MUST always be writable.
|
|
|
|
if !status_desc.is_write_only() {
|
|
|
|
return Err(Error::UnexpectedReadOnlyDescriptor);
|
|
|
|
}
|
|
|
|
|
|
|
|
if status_desc.len < 1 {
|
|
|
|
return Err(Error::DescriptorLengthTooSmall);
|
|
|
|
}
|
|
|
|
|
|
|
|
req.status_addr = status_desc.addr;
|
|
|
|
|
|
|
|
Ok(req)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(clippy::ptr_arg)]
|
|
|
|
fn execute<T: Seek + Read + Write>(
|
|
|
|
&self,
|
|
|
|
disk: &mut T,
|
|
|
|
disk_nsectors: u64,
|
|
|
|
mem: &GuestMemoryMmap,
|
|
|
|
disk_id: &Vec<u8>,
|
|
|
|
) -> result::Result<u32, ExecuteError> {
|
|
|
|
let mut top: u64 = u64::from(self.data_len) / SECTOR_SIZE;
|
|
|
|
if u64::from(self.data_len) % SECTOR_SIZE != 0 {
|
|
|
|
top += 1;
|
|
|
|
}
|
|
|
|
top = top
|
|
|
|
.checked_add(self.sector)
|
|
|
|
.ok_or(ExecuteError::BadRequest(Error::InvalidOffset))?;
|
|
|
|
if top > disk_nsectors {
|
|
|
|
return Err(ExecuteError::BadRequest(Error::InvalidOffset));
|
|
|
|
}
|
|
|
|
|
|
|
|
disk.seek(SeekFrom::Start(self.sector << SECTOR_SHIFT))
|
|
|
|
.map_err(ExecuteError::Seek)?;
|
|
|
|
|
|
|
|
match self.request_type {
|
|
|
|
RequestType::In => {
|
|
|
|
mem.read_exact_from(self.data_addr, disk, self.data_len as usize)
|
|
|
|
.map_err(ExecuteError::Read)?;
|
|
|
|
return Ok(self.data_len);
|
|
|
|
}
|
|
|
|
RequestType::Out => {
|
|
|
|
mem.write_all_to(self.data_addr, disk, self.data_len as usize)
|
|
|
|
.map_err(ExecuteError::Write)?;
|
|
|
|
}
|
|
|
|
RequestType::Flush => match disk.flush() {
|
|
|
|
Ok(_) => {
|
|
|
|
return Ok(0);
|
|
|
|
}
|
|
|
|
Err(e) => return Err(ExecuteError::Flush(e)),
|
|
|
|
},
|
|
|
|
RequestType::GetDeviceID => {
|
|
|
|
if (self.data_len as usize) < disk_id.len() {
|
|
|
|
return Err(ExecuteError::BadRequest(Error::InvalidOffset));
|
|
|
|
}
|
|
|
|
mem.write_slice(&disk_id.as_slice(), self.data_addr)
|
|
|
|
.map_err(ExecuteError::Write)?;
|
|
|
|
}
|
|
|
|
RequestType::Unsupported(t) => return Err(ExecuteError::Unsupported(t)),
|
|
|
|
};
|
|
|
|
Ok(0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
struct BlockEpollHandler<T: DiskFile> {
|
2019-05-06 16:31:15 +00:00
|
|
|
queues: Vec<Queue>,
|
|
|
|
mem: GuestMemoryMmap,
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_image: T,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: u64,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb: Arc<VirtioInterrupt>,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_image_id: Vec<u8>,
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> BlockEpollHandler<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
fn process_queue(&mut self, queue_index: usize) -> bool {
|
|
|
|
let queue = &mut self.queues[queue_index];
|
|
|
|
|
|
|
|
let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
|
|
|
|
let mut used_count = 0;
|
|
|
|
for avail_desc in queue.iter(&self.mem) {
|
|
|
|
let len;
|
|
|
|
match Request::parse(&avail_desc, &self.mem) {
|
|
|
|
Ok(request) => {
|
|
|
|
let status = match request.execute(
|
|
|
|
&mut self.disk_image,
|
|
|
|
self.disk_nsectors,
|
|
|
|
&self.mem,
|
|
|
|
&self.disk_image_id,
|
|
|
|
) {
|
|
|
|
Ok(l) => {
|
|
|
|
len = l;
|
|
|
|
VIRTIO_BLK_S_OK
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to execute request: {:?}", e);
|
|
|
|
len = 1; // We need at least 1 byte for the status.
|
|
|
|
e.status()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
// We use unwrap because the request parsing process already checked that the
|
|
|
|
// status_addr was valid.
|
|
|
|
self.mem.write_obj(status, request.status_addr).unwrap();
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to parse available descriptor chain: {:?}", e);
|
|
|
|
len = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
used_desc_heads[used_count] = (avail_desc.index, len);
|
|
|
|
used_count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for &(desc_index, len) in &used_desc_heads[..used_count] {
|
|
|
|
queue.add_used(&self.mem, desc_index, len);
|
|
|
|
}
|
|
|
|
used_count > 0
|
|
|
|
}
|
|
|
|
|
2019-06-03 20:57:26 +00:00
|
|
|
fn signal_used_queue(&self, queue_index: usize) -> result::Result<(), DeviceError> {
|
2019-07-26 18:48:07 +00:00
|
|
|
(self.interrupt_cb)(&VirtioInterruptType::Queue, Some(&self.queues[queue_index])).map_err(
|
|
|
|
|e| {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
DeviceError::FailedSignalingUsedQueue(e)
|
|
|
|
},
|
|
|
|
)
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(dead_code)]
|
2019-05-10 07:27:56 +00:00
|
|
|
fn update_disk_image(
|
|
|
|
&mut self,
|
|
|
|
disk_image: T,
|
|
|
|
disk_path: &PathBuf,
|
|
|
|
) -> result::Result<(), DeviceError> {
|
2019-05-06 16:31:15 +00:00
|
|
|
self.disk_image = disk_image;
|
|
|
|
self.disk_nsectors = self
|
|
|
|
.disk_image
|
|
|
|
.seek(SeekFrom::End(0))
|
|
|
|
.map_err(DeviceError::IoError)?
|
|
|
|
/ SECTOR_SIZE;
|
2019-05-10 07:27:56 +00:00
|
|
|
self.disk_image_id = build_disk_image_id(disk_path);
|
2019-05-06 16:31:15 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn run(&mut self, queue_evt: EventFd, kill_evt: EventFd) -> result::Result<(), DeviceError> {
|
|
|
|
// Create the epoll file descriptor
|
|
|
|
let epoll_fd = epoll::create(true).map_err(DeviceError::EpollCreateFd)?;
|
|
|
|
|
|
|
|
// Add events
|
|
|
|
epoll::ctl(
|
|
|
|
epoll_fd,
|
|
|
|
epoll::ControlOptions::EPOLL_CTL_ADD,
|
|
|
|
queue_evt.as_raw_fd(),
|
|
|
|
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(QUEUE_AVAIL_EVENT)),
|
|
|
|
)
|
|
|
|
.map_err(DeviceError::EpollCtl)?;
|
|
|
|
epoll::ctl(
|
|
|
|
epoll_fd,
|
|
|
|
epoll::ControlOptions::EPOLL_CTL_ADD,
|
|
|
|
kill_evt.as_raw_fd(),
|
|
|
|
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(KILL_EVENT)),
|
|
|
|
)
|
|
|
|
.map_err(DeviceError::EpollCtl)?;
|
|
|
|
|
|
|
|
const EPOLL_EVENTS_LEN: usize = 100;
|
|
|
|
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
|
|
|
|
|
|
|
|
'epoll: loop {
|
2019-08-01 20:08:47 +00:00
|
|
|
let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
|
|
|
|
Ok(res) => res,
|
|
|
|
Err(e) => {
|
|
|
|
if e.kind() == io::ErrorKind::Interrupted {
|
|
|
|
// It's well defined from the epoll_wait() syscall
|
|
|
|
// documentation that the epoll loop can be interrupted
|
|
|
|
// before any of the requested events occurred or the
|
|
|
|
// timeout expired. In both those cases, epoll_wait()
|
|
|
|
// returns an error of type EINTR, but this should not
|
|
|
|
// be considered as a regular error. Instead it is more
|
|
|
|
// appropriate to retry, by calling into epoll_wait().
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return Err(DeviceError::EpollWait(e));
|
|
|
|
}
|
|
|
|
};
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
for event in events.iter().take(num_events) {
|
|
|
|
let ev_type = event.data as u16;
|
|
|
|
|
|
|
|
match ev_type {
|
|
|
|
QUEUE_AVAIL_EVENT => {
|
|
|
|
if let Err(e) = queue_evt.read() {
|
|
|
|
error!("Failed to get queue event: {:?}", e);
|
|
|
|
break 'epoll;
|
|
|
|
} else if self.process_queue(0) {
|
2019-06-03 20:57:26 +00:00
|
|
|
if let Err(e) = self.signal_used_queue(0) {
|
2019-05-06 16:31:15 +00:00
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
break 'epoll;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
KILL_EVENT => {
|
|
|
|
debug!("KILL_EVENT received, stopping epoll loop");
|
|
|
|
break 'epoll;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
error!("Unknown event for virtio-block");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Virtio device for exposing block level read/write operations on a host file.
|
2019-05-10 07:27:56 +00:00
|
|
|
pub struct Block<T: DiskFile> {
|
2019-05-06 16:31:15 +00:00
|
|
|
kill_evt: Option<EventFd>,
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_image: Option<T>,
|
|
|
|
disk_path: PathBuf,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: u64,
|
|
|
|
avail_features: u64,
|
|
|
|
acked_features: u64,
|
|
|
|
config_space: Vec<u8>,
|
2019-05-08 15:04:12 +00:00
|
|
|
queue_evt: Option<EventFd>,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb: Option<Arc<VirtioInterrupt>>,
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn build_config_space(disk_size: u64) -> Vec<u8> {
|
|
|
|
// We only support disk size, which uses the first two words of the configuration space.
|
|
|
|
// If the image is not a multiple of the sector size, the tail bits are not exposed.
|
|
|
|
// The config space is little endian.
|
|
|
|
let mut config = Vec::with_capacity(CONFIG_SPACE_SIZE);
|
|
|
|
let num_sectors = disk_size >> SECTOR_SHIFT;
|
|
|
|
for i in 0..8 {
|
|
|
|
config.push((num_sectors >> (8 * i)) as u8);
|
|
|
|
}
|
|
|
|
config
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
/// Create a new virtio block device that operates on the given file.
|
|
|
|
///
|
|
|
|
/// The given file must be seekable and sizable.
|
2019-05-10 07:27:56 +00:00
|
|
|
pub fn new(
|
|
|
|
mut disk_image: T,
|
|
|
|
disk_path: PathBuf,
|
|
|
|
is_disk_read_only: bool,
|
|
|
|
) -> io::Result<Block<T>> {
|
2019-05-06 16:31:15 +00:00
|
|
|
let disk_size = disk_image.seek(SeekFrom::End(0))? as u64;
|
|
|
|
if disk_size % SECTOR_SIZE != 0 {
|
|
|
|
warn!(
|
|
|
|
"Disk size {} is not a multiple of sector size {}; \
|
|
|
|
the remainder will not be visible to the guest.",
|
|
|
|
disk_size, SECTOR_SIZE
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_BLK_F_FLUSH);
|
|
|
|
|
|
|
|
if is_disk_read_only {
|
|
|
|
avail_features |= 1u64 << VIRTIO_BLK_F_RO;
|
|
|
|
};
|
|
|
|
|
|
|
|
Ok(Block {
|
|
|
|
kill_evt: None,
|
|
|
|
disk_image: Some(disk_image),
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_path,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: disk_size / SECTOR_SIZE,
|
|
|
|
avail_features,
|
|
|
|
acked_features: 0u64,
|
|
|
|
config_space: build_config_space(disk_size),
|
2019-05-08 15:04:12 +00:00
|
|
|
queue_evt: None,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb: None,
|
2019-05-06 16:31:15 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> Drop for Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
fn drop(&mut self) {
|
|
|
|
if let Some(kill_evt) = self.kill_evt.take() {
|
|
|
|
// Ignore the result because there is nothing we can do about it.
|
|
|
|
let _ = kill_evt.write(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: 'static + DiskFile + Send> VirtioDevice for Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
fn device_type(&self) -> u32 {
|
|
|
|
VirtioDeviceType::TYPE_BLOCK as u32
|
|
|
|
}
|
|
|
|
|
|
|
|
fn queue_max_sizes(&self) -> &[u16] {
|
|
|
|
QUEUE_SIZES
|
|
|
|
}
|
|
|
|
|
|
|
|
fn features(&self, page: u32) -> u32 {
|
|
|
|
match page {
|
|
|
|
// Get the lower 32-bits of the features bitfield.
|
|
|
|
0 => self.avail_features as u32,
|
|
|
|
// Get the upper 32-bits of the features bitfield.
|
|
|
|
1 => (self.avail_features >> 32) as u32,
|
|
|
|
_ => {
|
|
|
|
warn!("Received request for unknown features page.");
|
|
|
|
0u32
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn ack_features(&mut self, page: u32, value: u32) {
|
|
|
|
let mut v = match page {
|
|
|
|
0 => u64::from(value),
|
|
|
|
1 => u64::from(value) << 32,
|
|
|
|
_ => {
|
|
|
|
warn!("Cannot acknowledge unknown features page.");
|
|
|
|
0u64
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Check if the guest is ACK'ing a feature that we didn't claim to have.
|
|
|
|
let unrequested_features = v & !self.avail_features;
|
|
|
|
if unrequested_features != 0 {
|
|
|
|
warn!("Received acknowledge request for unknown feature.");
|
|
|
|
|
|
|
|
// Don't count these features as acked.
|
|
|
|
v &= !unrequested_features;
|
|
|
|
}
|
|
|
|
self.acked_features |= v;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_config(&self, offset: u64, mut data: &mut [u8]) {
|
|
|
|
let config_len = self.config_space.len() as u64;
|
|
|
|
if offset >= config_len {
|
|
|
|
error!("Failed to read config space");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if let Some(end) = offset.checked_add(data.len() as u64) {
|
|
|
|
// This write can't fail, offset and end are checked against config_len.
|
|
|
|
data.write_all(&self.config_space[offset as usize..cmp::min(end, config_len) as usize])
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn write_config(&mut self, offset: u64, data: &[u8]) {
|
|
|
|
let data_len = data.len() as u64;
|
|
|
|
let config_len = self.config_space.len() as u64;
|
|
|
|
if offset + data_len > config_len {
|
|
|
|
error!("Failed to write config space");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
let (_, right) = self.config_space.split_at_mut(offset as usize);
|
|
|
|
right.copy_from_slice(&data[..]);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn activate(
|
|
|
|
&mut self,
|
|
|
|
mem: GuestMemoryMmap,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb: Arc<VirtioInterrupt>,
|
2019-05-06 16:31:15 +00:00
|
|
|
queues: Vec<Queue>,
|
|
|
|
mut queue_evts: Vec<EventFd>,
|
|
|
|
) -> ActivateResult {
|
|
|
|
if queues.len() != NUM_QUEUES || queue_evts.len() != NUM_QUEUES {
|
|
|
|
error!(
|
|
|
|
"Cannot perform activate. Expected {} queue(s), got {}",
|
|
|
|
NUM_QUEUES,
|
|
|
|
queues.len()
|
|
|
|
);
|
|
|
|
return Err(ActivateError::BadActivate);
|
|
|
|
}
|
|
|
|
|
|
|
|
let (self_kill_evt, kill_evt) =
|
|
|
|
match EventFd::new(EFD_NONBLOCK).and_then(|e| Ok((e.try_clone()?, e))) {
|
|
|
|
Ok(v) => v,
|
|
|
|
Err(e) => {
|
|
|
|
error!("failed creating kill EventFd pair: {}", e);
|
|
|
|
return Err(ActivateError::BadActivate);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
self.kill_evt = Some(self_kill_evt);
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
if let Some(disk_image) = self.disk_image.clone() {
|
|
|
|
let disk_image_id = build_disk_image_id(&self.disk_path);
|
2019-05-08 15:04:12 +00:00
|
|
|
|
|
|
|
// Save the interrupt EventFD as we need to return it on reset
|
|
|
|
// but clone it to pass into the thread.
|
2019-06-03 20:57:26 +00:00
|
|
|
self.interrupt_cb = Some(interrupt_cb);
|
|
|
|
let interrupt_cb = self.interrupt_cb.as_ref().unwrap().clone();
|
2019-05-08 15:04:12 +00:00
|
|
|
|
|
|
|
// Save the queue EventFD as we need to return it on reset
|
|
|
|
// but clone it to pass into the thread.
|
|
|
|
self.queue_evt = Some(queue_evts.remove(0));
|
|
|
|
let queue_evt = self.queue_evt.as_ref().unwrap().try_clone().map_err(|e| {
|
|
|
|
error!("failed to clone queue EventFd: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?;
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
let mut handler = BlockEpollHandler {
|
|
|
|
queues,
|
|
|
|
mem,
|
|
|
|
disk_image,
|
|
|
|
disk_nsectors: self.disk_nsectors,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_image_id,
|
|
|
|
};
|
|
|
|
|
|
|
|
let worker_result = thread::Builder::new()
|
|
|
|
.name("virtio_blk".to_string())
|
2019-05-08 15:04:12 +00:00
|
|
|
.spawn(move || handler.run(queue_evt, kill_evt));
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
if let Err(e) = worker_result {
|
|
|
|
error!("failed to spawn virtio_blk worker: {}", e);
|
|
|
|
return Err(ActivateError::BadActivate);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
Err(ActivateError::BadActivate)
|
|
|
|
}
|
2019-05-08 15:04:12 +00:00
|
|
|
|
2019-06-03 20:57:26 +00:00
|
|
|
fn reset(&mut self) -> Option<(Arc<VirtioInterrupt>, Vec<EventFd>)> {
|
2019-05-08 15:04:12 +00:00
|
|
|
if let Some(kill_evt) = self.kill_evt.take() {
|
|
|
|
// Ignore the result because there is nothing we can do about it.
|
|
|
|
let _ = kill_evt.write(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the interrupt and queue EventFDs
|
|
|
|
Some((
|
2019-06-03 20:57:26 +00:00
|
|
|
self.interrupt_cb.take().unwrap(),
|
2019-05-08 15:04:12 +00:00
|
|
|
vec![self.queue_evt.take().unwrap()],
|
|
|
|
))
|
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|