2019-05-06 16:31:15 +00:00
|
|
|
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
2019-05-08 10:22:53 +00:00
|
|
|
// found in the LICENSE-BSD-3-Clause file.
|
|
|
|
//
|
|
|
|
// Copyright © 2019 Intel Corporation
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
2019-05-06 16:31:15 +00:00
|
|
|
|
2019-12-31 10:49:11 +00:00
|
|
|
use super::Error as DeviceError;
|
|
|
|
use super::{
|
2020-07-22 13:38:28 +00:00
|
|
|
ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue,
|
|
|
|
VirtioDevice, VirtioDeviceType, VirtioInterruptType, EPOLL_HELPER_EVENT_LAST,
|
2019-12-31 10:49:11 +00:00
|
|
|
};
|
2020-08-04 02:45:53 +00:00
|
|
|
use crate::seccomp_filters::{get_seccomp_filter, Thread};
|
2019-12-31 10:49:11 +00:00
|
|
|
use crate::VirtioInterrupt;
|
2020-04-08 07:51:48 +00:00
|
|
|
use anyhow::anyhow;
|
2020-07-14 09:43:33 +00:00
|
|
|
use block_util::{build_disk_image_id, Request, RequestType, VirtioBlockConfig};
|
2020-07-10 14:43:12 +00:00
|
|
|
use libc::EFD_NONBLOCK;
|
2020-08-04 02:45:53 +00:00
|
|
|
use seccomp::{SeccompAction, SeccompFilter};
|
2020-06-24 13:24:25 +00:00
|
|
|
use std::collections::HashMap;
|
2019-05-06 16:31:15 +00:00
|
|
|
use std::io::{self, Read, Seek, SeekFrom, Write};
|
2020-06-24 13:24:25 +00:00
|
|
|
use std::num::Wrapping;
|
2020-01-24 17:17:25 +00:00
|
|
|
use std::ops::DerefMut;
|
2020-07-22 13:38:28 +00:00
|
|
|
use std::os::unix::io::AsRawFd;
|
2019-05-10 07:27:56 +00:00
|
|
|
use std::path::PathBuf;
|
2019-05-06 16:31:15 +00:00
|
|
|
use std::result;
|
2020-06-24 13:24:25 +00:00
|
|
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
2020-01-24 17:17:25 +00:00
|
|
|
use std::sync::{Arc, Mutex};
|
2019-05-06 16:31:15 +00:00
|
|
|
use std::thread;
|
2019-09-19 13:42:29 +00:00
|
|
|
use virtio_bindings::bindings::virtio_blk::*;
|
2020-05-15 15:58:31 +00:00
|
|
|
use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
|
2020-02-11 16:22:40 +00:00
|
|
|
use vm_memory::{
|
2020-07-14 09:43:33 +00:00
|
|
|
ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryError,
|
|
|
|
GuestMemoryMmap,
|
2020-02-11 16:22:40 +00:00
|
|
|
};
|
2020-04-08 07:51:48 +00:00
|
|
|
use vm_migration::{
|
|
|
|
Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable,
|
|
|
|
Transportable,
|
|
|
|
};
|
2020-07-10 14:43:12 +00:00
|
|
|
use vmm_sys_util::eventfd::EventFd;
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
const SECTOR_SHIFT: u8 = 9;
|
|
|
|
pub const SECTOR_SIZE: u64 = (0x01 as u64) << SECTOR_SHIFT;
|
|
|
|
|
|
|
|
// New descriptors are pending on the virtio queue.
|
2020-07-22 13:38:28 +00:00
|
|
|
const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
|
2019-05-06 16:31:15 +00:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
2019-10-24 05:30:44 +00:00
|
|
|
pub enum Error {
|
2019-05-06 16:31:15 +00:00
|
|
|
/// Guest gave us bad memory addresses.
|
|
|
|
GuestMemory(GuestMemoryError),
|
|
|
|
/// Guest gave us offsets that would have overflowed a usize.
|
|
|
|
CheckedOffset(GuestAddress, usize),
|
|
|
|
/// Guest gave us a write only descriptor that protocol says to read from.
|
|
|
|
UnexpectedWriteOnlyDescriptor,
|
|
|
|
/// Guest gave us a read only descriptor that protocol says to write to.
|
|
|
|
UnexpectedReadOnlyDescriptor,
|
|
|
|
/// Guest gave us too few descriptors in a descriptor chain.
|
|
|
|
DescriptorChainTooShort,
|
|
|
|
/// Guest gave us a descriptor that was too short to use.
|
|
|
|
DescriptorLengthTooSmall,
|
|
|
|
/// Getting a block's metadata fails for any reason.
|
|
|
|
GetFileMetadata,
|
|
|
|
/// The requested operation would cause a seek beyond disk end.
|
|
|
|
InvalidOffset,
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
pub trait DiskFile: Read + Seek + Write + Clone {}
|
|
|
|
impl<D: Read + Seek + Write + Clone> DiskFile for D {}
|
|
|
|
|
2020-06-24 13:24:25 +00:00
|
|
|
#[derive(Default, Clone)]
|
|
|
|
pub struct BlockCounters {
|
|
|
|
read_bytes: Arc<AtomicU64>,
|
|
|
|
read_ops: Arc<AtomicU64>,
|
|
|
|
write_bytes: Arc<AtomicU64>,
|
|
|
|
write_ops: Arc<AtomicU64>,
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
struct BlockEpollHandler<T: DiskFile> {
|
2020-01-24 17:17:25 +00:00
|
|
|
queue: Queue,
|
2020-02-11 16:22:40 +00:00
|
|
|
mem: GuestMemoryAtomic<GuestMemoryMmap>,
|
2020-01-24 17:17:25 +00:00
|
|
|
disk_image: Arc<Mutex<T>>,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: u64,
|
2020-01-13 17:52:19 +00:00
|
|
|
interrupt_cb: Arc<dyn VirtioInterrupt>,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_image_id: Vec<u8>,
|
2019-11-19 00:42:31 +00:00
|
|
|
kill_evt: EventFd,
|
|
|
|
pause_evt: EventFd,
|
2020-05-15 15:58:31 +00:00
|
|
|
event_idx: bool,
|
2020-05-20 16:04:52 +00:00
|
|
|
writeback: Arc<AtomicBool>,
|
2020-06-24 13:24:25 +00:00
|
|
|
counters: BlockCounters,
|
2020-07-22 13:38:28 +00:00
|
|
|
queue_evt: EventFd,
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> BlockEpollHandler<T> {
|
2020-01-24 17:17:25 +00:00
|
|
|
fn process_queue(&mut self) -> bool {
|
|
|
|
let queue = &mut self.queue;
|
2019-05-06 16:31:15 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let mut used_desc_heads = Vec::new();
|
2019-05-06 16:31:15 +00:00
|
|
|
let mut used_count = 0;
|
2020-02-11 16:22:40 +00:00
|
|
|
let mem = self.mem.memory();
|
2020-06-24 13:24:25 +00:00
|
|
|
let mut read_bytes = Wrapping(0);
|
|
|
|
let mut write_bytes = Wrapping(0);
|
|
|
|
let mut read_ops = Wrapping(0);
|
|
|
|
let mut write_ops = Wrapping(0);
|
|
|
|
|
2019-08-20 22:43:23 +00:00
|
|
|
for avail_desc in queue.iter(&mem) {
|
2019-05-06 16:31:15 +00:00
|
|
|
let len;
|
2019-08-20 22:43:23 +00:00
|
|
|
match Request::parse(&avail_desc, &mem) {
|
2020-05-20 16:04:52 +00:00
|
|
|
Ok(mut request) => {
|
|
|
|
request.set_writeback(self.writeback.load(Ordering::SeqCst));
|
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let mut disk_image_locked = self.disk_image.lock().unwrap();
|
|
|
|
let mut disk_image = disk_image_locked.deref_mut();
|
2019-05-06 16:31:15 +00:00
|
|
|
let status = match request.execute(
|
2020-01-24 17:17:25 +00:00
|
|
|
&mut disk_image,
|
2019-05-06 16:31:15 +00:00
|
|
|
self.disk_nsectors,
|
2019-08-20 22:43:23 +00:00
|
|
|
&mem,
|
2019-05-06 16:31:15 +00:00
|
|
|
&self.disk_image_id,
|
|
|
|
) {
|
|
|
|
Ok(l) => {
|
|
|
|
len = l;
|
2020-06-24 13:24:25 +00:00
|
|
|
match request.request_type {
|
|
|
|
RequestType::In => {
|
|
|
|
read_bytes += Wrapping(request.data_len as u64);
|
|
|
|
read_ops += Wrapping(1);
|
|
|
|
}
|
|
|
|
RequestType::Out => {
|
|
|
|
write_bytes += Wrapping(request.data_len as u64);
|
|
|
|
write_ops += Wrapping(1);
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
};
|
2019-05-06 16:31:15 +00:00
|
|
|
VIRTIO_BLK_S_OK
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to execute request: {:?}", e);
|
|
|
|
len = 1; // We need at least 1 byte for the status.
|
|
|
|
e.status()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
// We use unwrap because the request parsing process already checked that the
|
|
|
|
// status_addr was valid.
|
2019-08-20 22:43:23 +00:00
|
|
|
mem.write_obj(status, request.status_addr).unwrap();
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to parse available descriptor chain: {:?}", e);
|
|
|
|
len = 0;
|
|
|
|
}
|
|
|
|
}
|
2020-01-24 17:17:25 +00:00
|
|
|
used_desc_heads.push((avail_desc.index, len));
|
2019-05-06 16:31:15 +00:00
|
|
|
used_count += 1;
|
|
|
|
}
|
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
for &(desc_index, len) in used_desc_heads.iter() {
|
2019-08-20 22:43:23 +00:00
|
|
|
queue.add_used(&mem, desc_index, len);
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
2020-06-24 13:24:25 +00:00
|
|
|
|
|
|
|
self.counters
|
|
|
|
.write_bytes
|
|
|
|
.fetch_add(write_bytes.0, Ordering::AcqRel);
|
|
|
|
self.counters
|
|
|
|
.write_ops
|
|
|
|
.fetch_add(write_ops.0, Ordering::AcqRel);
|
|
|
|
|
|
|
|
self.counters
|
|
|
|
.read_bytes
|
|
|
|
.fetch_add(read_bytes.0, Ordering::AcqRel);
|
|
|
|
self.counters
|
|
|
|
.read_ops
|
|
|
|
.fetch_add(read_ops.0, Ordering::AcqRel);
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
used_count > 0
|
|
|
|
}
|
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
|
2020-01-13 17:52:19 +00:00
|
|
|
self.interrupt_cb
|
2020-01-24 17:17:25 +00:00
|
|
|
.trigger(&VirtioInterruptType::Queue, Some(&self.queue))
|
2020-01-13 17:52:19 +00:00
|
|
|
.map_err(|e| {
|
2019-07-26 18:48:07 +00:00
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
DeviceError::FailedSignalingUsedQueue(e)
|
2020-01-13 17:52:19 +00:00
|
|
|
})
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(dead_code)]
|
2019-05-10 07:27:56 +00:00
|
|
|
fn update_disk_image(
|
|
|
|
&mut self,
|
2020-01-24 17:17:25 +00:00
|
|
|
mut disk_image: T,
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_path: &PathBuf,
|
|
|
|
) -> result::Result<(), DeviceError> {
|
2020-01-24 17:17:25 +00:00
|
|
|
self.disk_nsectors = disk_image
|
2019-05-06 16:31:15 +00:00
|
|
|
.seek(SeekFrom::End(0))
|
|
|
|
.map_err(DeviceError::IoError)?
|
|
|
|
/ SECTOR_SIZE;
|
2019-05-10 07:27:56 +00:00
|
|
|
self.disk_image_id = build_disk_image_id(disk_path);
|
2020-01-24 17:17:25 +00:00
|
|
|
self.disk_image = Arc::new(Mutex::new(disk_image));
|
2019-05-06 16:31:15 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-07-22 13:38:28 +00:00
|
|
|
fn run(&mut self, paused: Arc<AtomicBool>) -> result::Result<(), EpollHelperError> {
|
|
|
|
let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
|
|
|
|
helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
|
|
|
|
helper.run(paused, self)?;
|
2020-06-22 14:00:02 +00:00
|
|
|
|
2020-07-22 13:38:28 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
|
2020-07-22 13:38:28 +00:00
|
|
|
impl<T: DiskFile> EpollHelperHandler for BlockEpollHandler<T> {
|
|
|
|
fn handle_event(&mut self, _helper: &mut EpollHelper, event: u16) -> bool {
|
|
|
|
match event {
|
|
|
|
QUEUE_AVAIL_EVENT => {
|
|
|
|
if let Err(e) = self.queue_evt.read() {
|
|
|
|
error!("Failed to get queue event: {:?}", e);
|
|
|
|
return true;
|
|
|
|
} else if self.event_idx {
|
|
|
|
// vm-virtio's Queue implementation only checks avail_index
|
|
|
|
// once, so to properly support EVENT_IDX we need to keep
|
|
|
|
// calling process_queue() until it stops finding new
|
|
|
|
// requests on the queue.
|
|
|
|
loop {
|
|
|
|
if self.process_queue() {
|
|
|
|
self.queue.update_avail_event(&self.mem.memory());
|
|
|
|
|
|
|
|
if self
|
|
|
|
.queue
|
|
|
|
.needs_notification(&self.mem.memory(), self.queue.next_used)
|
|
|
|
{
|
|
|
|
if let Err(e) = self.signal_used_queue() {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
return true;
|
2020-05-15 15:58:31 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-22 13:38:28 +00:00
|
|
|
} else {
|
|
|
|
break;
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-22 13:38:28 +00:00
|
|
|
} else if self.process_queue() {
|
|
|
|
if let Err(e) = self.signal_used_queue() {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
return true;
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-07-22 13:38:28 +00:00
|
|
|
_ => {
|
|
|
|
error!("Unexpected event: {}", event);
|
|
|
|
return true;
|
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
2020-07-22 13:38:28 +00:00
|
|
|
false
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Virtio device for exposing block level read/write operations on a host file.
|
2019-05-10 07:27:56 +00:00
|
|
|
pub struct Block<T: DiskFile> {
|
2020-04-27 09:21:15 +00:00
|
|
|
id: String,
|
2019-05-06 16:31:15 +00:00
|
|
|
kill_evt: Option<EventFd>,
|
2020-01-24 17:17:25 +00:00
|
|
|
disk_image: Arc<Mutex<T>>,
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_path: PathBuf,
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: u64,
|
|
|
|
avail_features: u64,
|
|
|
|
acked_features: u64,
|
2020-01-24 17:17:25 +00:00
|
|
|
config: VirtioBlockConfig,
|
|
|
|
queue_evts: Option<Vec<EventFd>>,
|
2020-01-13 17:52:19 +00:00
|
|
|
interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
|
2020-08-05 08:06:05 +00:00
|
|
|
epoll_threads: Option<Vec<thread::JoinHandle<()>>>,
|
2019-11-19 00:42:31 +00:00
|
|
|
pause_evt: Option<EventFd>,
|
|
|
|
paused: Arc<AtomicBool>,
|
2020-01-24 17:17:25 +00:00
|
|
|
queue_size: Vec<u16>,
|
2020-05-20 16:04:52 +00:00
|
|
|
writeback: Arc<AtomicBool>,
|
2020-06-24 13:24:25 +00:00
|
|
|
counters: BlockCounters,
|
2020-08-04 02:45:53 +00:00
|
|
|
seccomp_action: SeccompAction,
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 07:51:48 +00:00
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
pub struct BlockState {
|
|
|
|
pub disk_path: PathBuf,
|
|
|
|
pub disk_nsectors: u64,
|
|
|
|
pub avail_features: u64,
|
|
|
|
pub acked_features: u64,
|
|
|
|
pub config: VirtioBlockConfig,
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
/// Create a new virtio block device that operates on the given file.
|
|
|
|
///
|
|
|
|
/// The given file must be seekable and sizable.
|
2020-08-04 02:45:53 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2019-05-10 07:27:56 +00:00
|
|
|
pub fn new(
|
2020-04-27 09:21:15 +00:00
|
|
|
id: String,
|
2019-05-10 07:27:56 +00:00
|
|
|
mut disk_image: T,
|
|
|
|
disk_path: PathBuf,
|
|
|
|
is_disk_read_only: bool,
|
2019-10-02 18:18:39 +00:00
|
|
|
iommu: bool,
|
2020-01-24 17:17:25 +00:00
|
|
|
num_queues: usize,
|
|
|
|
queue_size: u16,
|
2020-08-04 02:45:53 +00:00
|
|
|
seccomp_action: SeccompAction,
|
2019-05-10 07:27:56 +00:00
|
|
|
) -> io::Result<Block<T>> {
|
2019-05-06 16:31:15 +00:00
|
|
|
let disk_size = disk_image.seek(SeekFrom::End(0))? as u64;
|
|
|
|
if disk_size % SECTOR_SIZE != 0 {
|
|
|
|
warn!(
|
|
|
|
"Disk size {} is not a multiple of sector size {}; \
|
|
|
|
the remainder will not be visible to the guest.",
|
|
|
|
disk_size, SECTOR_SIZE
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-05-15 15:58:31 +00:00
|
|
|
let mut avail_features = (1u64 << VIRTIO_F_VERSION_1)
|
|
|
|
| (1u64 << VIRTIO_BLK_F_FLUSH)
|
2020-05-20 16:04:52 +00:00
|
|
|
| (1u64 << VIRTIO_RING_F_EVENT_IDX)
|
|
|
|
| (1u64 << VIRTIO_BLK_F_CONFIG_WCE);
|
2019-05-06 16:31:15 +00:00
|
|
|
|
2019-10-02 18:18:39 +00:00
|
|
|
if iommu {
|
|
|
|
avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
|
|
|
|
}
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
if is_disk_read_only {
|
|
|
|
avail_features |= 1u64 << VIRTIO_BLK_F_RO;
|
2020-01-24 17:17:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let disk_nsectors = disk_size / SECTOR_SIZE;
|
|
|
|
let mut config = VirtioBlockConfig {
|
|
|
|
capacity: disk_nsectors,
|
2020-05-20 16:04:52 +00:00
|
|
|
writeback: 1,
|
2020-01-24 17:17:25 +00:00
|
|
|
..Default::default()
|
2019-05-06 16:31:15 +00:00
|
|
|
};
|
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
if num_queues > 1 {
|
|
|
|
avail_features |= 1u64 << VIRTIO_BLK_F_MQ;
|
|
|
|
config.num_queues = num_queues as u16;
|
|
|
|
}
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
Ok(Block {
|
2020-04-27 09:21:15 +00:00
|
|
|
id,
|
2019-05-06 16:31:15 +00:00
|
|
|
kill_evt: None,
|
2020-01-24 17:17:25 +00:00
|
|
|
disk_image: Arc::new(Mutex::new(disk_image)),
|
2019-05-10 07:27:56 +00:00
|
|
|
disk_path,
|
2020-01-24 17:17:25 +00:00
|
|
|
disk_nsectors,
|
2019-05-06 16:31:15 +00:00
|
|
|
avail_features,
|
|
|
|
acked_features: 0u64,
|
2020-01-24 17:17:25 +00:00
|
|
|
config,
|
|
|
|
queue_evts: None,
|
2019-06-03 20:57:26 +00:00
|
|
|
interrupt_cb: None,
|
2020-01-27 13:14:56 +00:00
|
|
|
epoll_threads: None,
|
2019-11-19 00:42:31 +00:00
|
|
|
pause_evt: None,
|
|
|
|
paused: Arc::new(AtomicBool::new(false)),
|
2020-01-24 17:17:25 +00:00
|
|
|
queue_size: vec![queue_size; num_queues],
|
2020-05-20 16:04:52 +00:00
|
|
|
writeback: Arc::new(AtomicBool::new(true)),
|
2020-06-24 13:24:25 +00:00
|
|
|
counters: BlockCounters::default(),
|
2020-08-04 02:45:53 +00:00
|
|
|
seccomp_action,
|
2019-05-06 16:31:15 +00:00
|
|
|
})
|
|
|
|
}
|
2020-04-08 07:51:48 +00:00
|
|
|
|
|
|
|
fn state(&self) -> BlockState {
|
|
|
|
BlockState {
|
|
|
|
disk_path: self.disk_path.clone(),
|
|
|
|
disk_nsectors: self.disk_nsectors,
|
|
|
|
avail_features: self.avail_features,
|
|
|
|
acked_features: self.acked_features,
|
|
|
|
config: self.config,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn set_state(&mut self, state: &BlockState) -> io::Result<()> {
|
|
|
|
self.disk_path = state.disk_path.clone();
|
|
|
|
self.disk_nsectors = state.disk_nsectors;
|
|
|
|
self.avail_features = state.avail_features;
|
|
|
|
self.acked_features = state.acked_features;
|
|
|
|
self.config = state.config;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2020-05-20 16:04:52 +00:00
|
|
|
|
|
|
|
fn update_writeback(&mut self) {
|
|
|
|
// Use writeback from config if VIRTIO_BLK_F_CONFIG_WCE
|
|
|
|
let writeback =
|
|
|
|
if self.acked_features & 1 << VIRTIO_BLK_F_CONFIG_WCE == 1 << VIRTIO_BLK_F_CONFIG_WCE {
|
|
|
|
self.config.writeback == 1
|
|
|
|
} else {
|
|
|
|
// Else check if VIRTIO_BLK_F_FLUSH negotiated
|
|
|
|
self.acked_features & 1 << VIRTIO_BLK_F_FLUSH == 1 << VIRTIO_BLK_F_FLUSH
|
|
|
|
};
|
|
|
|
|
|
|
|
info!(
|
|
|
|
"Changing cache mode to {}",
|
|
|
|
if writeback {
|
|
|
|
"writeback"
|
|
|
|
} else {
|
|
|
|
"writethrough"
|
|
|
|
}
|
|
|
|
);
|
|
|
|
self.writeback.store(writeback, Ordering::SeqCst);
|
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: DiskFile> Drop for Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
fn drop(&mut self) {
|
|
|
|
if let Some(kill_evt) = self.kill_evt.take() {
|
|
|
|
// Ignore the result because there is nothing we can do about it.
|
|
|
|
let _ = kill_evt.write(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-10 07:27:56 +00:00
|
|
|
impl<T: 'static + DiskFile + Send> VirtioDevice for Block<T> {
|
2019-05-06 16:31:15 +00:00
|
|
|
fn device_type(&self) -> u32 {
|
|
|
|
VirtioDeviceType::TYPE_BLOCK as u32
|
|
|
|
}
|
|
|
|
|
|
|
|
fn queue_max_sizes(&self) -> &[u16] {
|
2020-01-24 17:17:25 +00:00
|
|
|
self.queue_size.as_slice()
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
2020-01-23 10:14:38 +00:00
|
|
|
fn features(&self) -> u64 {
|
|
|
|
self.avail_features
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
2020-01-23 10:14:38 +00:00
|
|
|
fn ack_features(&mut self, value: u64) {
|
|
|
|
let mut v = value;
|
2019-05-06 16:31:15 +00:00
|
|
|
// Check if the guest is ACK'ing a feature that we didn't claim to have.
|
|
|
|
let unrequested_features = v & !self.avail_features;
|
|
|
|
if unrequested_features != 0 {
|
|
|
|
warn!("Received acknowledge request for unknown feature.");
|
|
|
|
|
|
|
|
// Don't count these features as acked.
|
|
|
|
v &= !unrequested_features;
|
|
|
|
}
|
|
|
|
self.acked_features |= v;
|
|
|
|
}
|
|
|
|
|
2020-07-16 09:34:51 +00:00
|
|
|
fn read_config(&self, offset: u64, data: &mut [u8]) {
|
|
|
|
self.read_config_from_slice(self.config.as_slice(), offset, data);
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn write_config(&mut self, offset: u64, data: &[u8]) {
|
2020-07-16 10:00:48 +00:00
|
|
|
// The "writeback" field is the only mutable field
|
|
|
|
let writeback_offset =
|
|
|
|
(&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64);
|
|
|
|
if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback)
|
|
|
|
{
|
|
|
|
error!(
|
|
|
|
"Attempt to write to read-only field: offset {:x} length {}",
|
|
|
|
offset,
|
|
|
|
data.len()
|
|
|
|
);
|
2019-05-06 16:31:15 +00:00
|
|
|
return;
|
|
|
|
}
|
2020-07-16 10:00:48 +00:00
|
|
|
|
|
|
|
self.config.writeback = data[0];
|
2020-05-20 16:04:52 +00:00
|
|
|
self.update_writeback();
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn activate(
|
|
|
|
&mut self,
|
2020-02-11 16:22:40 +00:00
|
|
|
mem: GuestMemoryAtomic<GuestMemoryMmap>,
|
2020-01-13 17:52:19 +00:00
|
|
|
interrupt_cb: Arc<dyn VirtioInterrupt>,
|
2020-01-24 17:17:25 +00:00
|
|
|
mut queues: Vec<Queue>,
|
2019-05-06 16:31:15 +00:00
|
|
|
mut queue_evts: Vec<EventFd>,
|
|
|
|
) -> ActivateResult {
|
2020-01-24 17:17:25 +00:00
|
|
|
if queues.len() != self.queue_size.len() || queue_evts.len() != self.queue_size.len() {
|
2019-05-06 16:31:15 +00:00
|
|
|
error!(
|
|
|
|
"Cannot perform activate. Expected {} queue(s), got {}",
|
2020-01-24 17:17:25 +00:00
|
|
|
self.queue_size.len(),
|
2019-05-06 16:31:15 +00:00
|
|
|
queues.len()
|
|
|
|
);
|
|
|
|
return Err(ActivateError::BadActivate);
|
|
|
|
}
|
|
|
|
|
2019-11-19 00:42:31 +00:00
|
|
|
let (self_kill_evt, kill_evt) = EventFd::new(EFD_NONBLOCK)
|
|
|
|
.and_then(|e| Ok((e.try_clone()?, e)))
|
|
|
|
.map_err(|e| {
|
|
|
|
error!("failed creating kill EventFd pair: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?;
|
|
|
|
|
2019-05-06 16:31:15 +00:00
|
|
|
self.kill_evt = Some(self_kill_evt);
|
|
|
|
|
2019-11-19 00:42:31 +00:00
|
|
|
let (self_pause_evt, pause_evt) = EventFd::new(EFD_NONBLOCK)
|
|
|
|
.and_then(|e| Ok((e.try_clone()?, e)))
|
|
|
|
.map_err(|e| {
|
|
|
|
error!("failed creating pause EventFd pair: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?;
|
|
|
|
self.pause_evt = Some(self_pause_evt);
|
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let disk_image_id = build_disk_image_id(&self.disk_path);
|
2019-05-08 15:04:12 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let mut tmp_queue_evts: Vec<EventFd> = Vec::new();
|
|
|
|
for queue_evt in queue_evts.iter() {
|
|
|
|
// Save the queue EventFD as we need to return it on reset
|
2019-05-08 15:04:12 +00:00
|
|
|
// but clone it to pass into the thread.
|
2020-01-24 17:17:25 +00:00
|
|
|
tmp_queue_evts.push(queue_evt.try_clone().map_err(|e| {
|
|
|
|
error!("failed to clone queue EventFd: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?);
|
|
|
|
}
|
|
|
|
self.queue_evts = Some(tmp_queue_evts);
|
2019-05-08 15:04:12 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let mut tmp_queue_evts: Vec<EventFd> = Vec::new();
|
|
|
|
for queue_evt in queue_evts.iter() {
|
2019-05-08 15:04:12 +00:00
|
|
|
// Save the queue EventFD as we need to return it on reset
|
|
|
|
// but clone it to pass into the thread.
|
2020-01-24 17:17:25 +00:00
|
|
|
tmp_queue_evts.push(queue_evt.try_clone().map_err(|e| {
|
2019-05-08 15:04:12 +00:00
|
|
|
error!("failed to clone queue EventFd: {}", e);
|
|
|
|
ActivateError::BadActivate
|
2020-01-24 17:17:25 +00:00
|
|
|
})?);
|
|
|
|
}
|
|
|
|
self.queue_evts = Some(tmp_queue_evts);
|
2019-05-08 15:04:12 +00:00
|
|
|
|
2020-05-15 15:58:31 +00:00
|
|
|
let event_idx = self.acked_features & 1u64 << VIRTIO_RING_F_EVENT_IDX
|
|
|
|
== 1u64 << VIRTIO_RING_F_EVENT_IDX;
|
2020-05-20 16:04:52 +00:00
|
|
|
self.update_writeback();
|
2020-05-15 15:58:31 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
let mut epoll_threads = Vec::new();
|
|
|
|
for _ in 0..self.queue_size.len() {
|
2020-07-22 13:38:28 +00:00
|
|
|
let queue_evt = queue_evts.remove(0);
|
2019-05-06 16:31:15 +00:00
|
|
|
let mut handler = BlockEpollHandler {
|
2020-01-24 17:17:25 +00:00
|
|
|
queue: queues.remove(0),
|
|
|
|
mem: mem.clone(),
|
|
|
|
disk_image: self.disk_image.clone(),
|
2019-05-06 16:31:15 +00:00
|
|
|
disk_nsectors: self.disk_nsectors,
|
2020-01-24 17:17:25 +00:00
|
|
|
interrupt_cb: interrupt_cb.clone(),
|
|
|
|
disk_image_id: disk_image_id.clone(),
|
|
|
|
kill_evt: kill_evt.try_clone().unwrap(),
|
|
|
|
pause_evt: pause_evt.try_clone().unwrap(),
|
2020-05-15 15:58:31 +00:00
|
|
|
event_idx,
|
2020-05-20 16:04:52 +00:00
|
|
|
writeback: self.writeback.clone(),
|
2020-06-24 13:24:25 +00:00
|
|
|
counters: self.counters.clone(),
|
2020-07-22 13:38:28 +00:00
|
|
|
queue_evt,
|
2019-05-06 16:31:15 +00:00
|
|
|
};
|
|
|
|
|
2020-05-20 09:40:54 +00:00
|
|
|
handler.queue.set_event_idx(event_idx);
|
|
|
|
|
2019-11-19 00:42:31 +00:00
|
|
|
let paused = self.paused.clone();
|
2020-08-04 02:45:53 +00:00
|
|
|
|
|
|
|
// Retrieve seccomp filter for virtio_blk thread
|
2020-08-04 18:34:09 +00:00
|
|
|
let virtio_blk_seccomp_filter =
|
|
|
|
get_seccomp_filter(&self.seccomp_action, Thread::VirtioBlk)
|
|
|
|
.map_err(ActivateError::CreateSeccompFilter)?;
|
2020-08-04 02:45:53 +00:00
|
|
|
|
2019-11-19 00:42:31 +00:00
|
|
|
thread::Builder::new()
|
2019-05-06 16:31:15 +00:00
|
|
|
.name("virtio_blk".to_string())
|
2020-08-04 02:45:53 +00:00
|
|
|
.spawn(move || {
|
2020-08-05 08:06:05 +00:00
|
|
|
if let Err(e) = SeccompFilter::apply(virtio_blk_seccomp_filter) {
|
|
|
|
error!("Error applying seccomp filter: {:?}", e);
|
|
|
|
} else if let Err(e) = handler.run(paused) {
|
|
|
|
error!("Error running worker: {:?}", e);
|
|
|
|
}
|
2020-08-04 02:45:53 +00:00
|
|
|
})
|
2020-01-27 12:56:05 +00:00
|
|
|
.map(|thread| epoll_threads.push(thread))
|
2019-11-19 00:42:31 +00:00
|
|
|
.map_err(|e| {
|
|
|
|
error!("failed to clone the virtio-blk epoll thread: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?;
|
2020-01-24 17:17:25 +00:00
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
// Save the interrupt EventFD as we need to return it on reset
|
|
|
|
// but clone it to pass into the thread.
|
|
|
|
self.interrupt_cb = Some(interrupt_cb);
|
2020-01-27 12:56:05 +00:00
|
|
|
|
2020-01-24 17:17:25 +00:00
|
|
|
self.epoll_threads = Some(epoll_threads);
|
|
|
|
|
|
|
|
Ok(())
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
2019-05-08 15:04:12 +00:00
|
|
|
|
2020-01-13 17:52:19 +00:00
|
|
|
fn reset(&mut self) -> Option<(Arc<dyn VirtioInterrupt>, Vec<EventFd>)> {
|
2019-11-19 00:42:31 +00:00
|
|
|
// We first must resume the virtio thread if it was paused.
|
|
|
|
if self.pause_evt.take().is_some() {
|
|
|
|
self.resume().ok()?;
|
|
|
|
}
|
|
|
|
|
2019-05-08 15:04:12 +00:00
|
|
|
if let Some(kill_evt) = self.kill_evt.take() {
|
|
|
|
// Ignore the result because there is nothing we can do about it.
|
|
|
|
let _ = kill_evt.write(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the interrupt and queue EventFDs
|
|
|
|
Some((
|
2019-06-03 20:57:26 +00:00
|
|
|
self.interrupt_cb.take().unwrap(),
|
2020-01-24 17:17:25 +00:00
|
|
|
self.queue_evts.take().unwrap(),
|
2019-05-08 15:04:12 +00:00
|
|
|
))
|
|
|
|
}
|
2020-06-24 13:24:25 +00:00
|
|
|
|
|
|
|
fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
|
|
|
|
let mut counters = HashMap::new();
|
|
|
|
|
|
|
|
counters.insert(
|
|
|
|
"read_bytes",
|
|
|
|
Wrapping(self.counters.read_bytes.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"write_bytes",
|
|
|
|
Wrapping(self.counters.write_bytes.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"read_ops",
|
|
|
|
Wrapping(self.counters.read_ops.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"write_ops",
|
|
|
|
Wrapping(self.counters.write_ops.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
|
|
|
|
Some(counters)
|
|
|
|
}
|
2019-05-06 16:31:15 +00:00
|
|
|
}
|
2019-11-19 00:42:31 +00:00
|
|
|
|
2020-01-27 17:59:39 +00:00
|
|
|
virtio_pausable!(Block, T: 'static + DiskFile + Send);
|
2020-04-08 07:51:48 +00:00
|
|
|
impl<T: 'static + DiskFile + Send> Snapshottable for Block<T> {
|
|
|
|
fn id(&self) -> String {
|
2020-04-27 09:21:15 +00:00
|
|
|
self.id.clone()
|
2020-04-08 07:51:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn snapshot(&self) -> std::result::Result<Snapshot, MigratableError> {
|
|
|
|
let snapshot =
|
|
|
|
serde_json::to_vec(&self.state()).map_err(|e| MigratableError::Snapshot(e.into()))?;
|
|
|
|
|
2020-04-27 09:21:15 +00:00
|
|
|
let mut block_snapshot = Snapshot::new(self.id.as_str());
|
2020-04-08 07:51:48 +00:00
|
|
|
block_snapshot.add_data_section(SnapshotDataSection {
|
2020-04-27 09:21:15 +00:00
|
|
|
id: format!("{}-section", self.id),
|
2020-04-08 07:51:48 +00:00
|
|
|
snapshot,
|
|
|
|
});
|
|
|
|
|
|
|
|
Ok(block_snapshot)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
|
2020-04-27 09:21:15 +00:00
|
|
|
if let Some(block_section) = snapshot.snapshot_data.get(&format!("{}-section", self.id)) {
|
2020-04-08 07:51:48 +00:00
|
|
|
let block_state = match serde_json::from_slice(&block_section.snapshot) {
|
|
|
|
Ok(state) => state,
|
|
|
|
Err(error) => {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Could not deserialize BLOCK {}",
|
|
|
|
error
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
return self.set_state(&block_state).map_err(|e| {
|
|
|
|
MigratableError::Restore(anyhow!("Could not restore BLOCK state {:?}", e))
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Could not find BLOCK snapshot section"
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
}
|
2019-05-01 16:59:51 +00:00
|
|
|
impl<T: 'static + DiskFile + Send> Transportable for Block<T> {}
|
2019-11-19 00:42:31 +00:00
|
|
|
impl<T: 'static + DiskFile + Send> Migratable for Block<T> {}
|