2020-07-30 10:40:09 +00:00
|
|
|
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE-BSD-3-Clause file.
|
|
|
|
//
|
|
|
|
// Copyright © 2020 Intel Corporation
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
|
|
|
|
|
|
|
use super::Error as DeviceError;
|
|
|
|
use super::{
|
2021-10-21 10:41:16 +00:00
|
|
|
ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
|
2021-02-26 20:06:10 +00:00
|
|
|
RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
|
|
|
|
EPOLL_HELPER_EVENT_LAST,
|
2020-07-30 10:40:09 +00:00
|
|
|
};
|
2021-09-03 10:43:30 +00:00
|
|
|
use crate::seccomp_filters::Thread;
|
|
|
|
use crate::thread_helper::spawn_virtio_thread;
|
2021-06-02 19:08:04 +00:00
|
|
|
use crate::GuestMemoryMmap;
|
2020-07-30 10:40:09 +00:00
|
|
|
use crate::VirtioInterrupt;
|
2021-01-20 09:58:30 +00:00
|
|
|
use block_util::{
|
|
|
|
async_io::AsyncIo, async_io::AsyncIoError, async_io::DiskFile, build_disk_image_id, Request,
|
|
|
|
RequestType, VirtioBlockConfig,
|
|
|
|
};
|
2021-03-22 18:23:51 +00:00
|
|
|
use rate_limiter::{RateLimiter, TokenType};
|
2021-09-03 10:43:30 +00:00
|
|
|
use seccompiler::SeccompAction;
|
2021-01-20 09:58:30 +00:00
|
|
|
use std::io;
|
2020-07-30 10:40:09 +00:00
|
|
|
use std::num::Wrapping;
|
2021-01-20 09:58:30 +00:00
|
|
|
use std::os::unix::io::AsRawFd;
|
2020-07-30 10:40:09 +00:00
|
|
|
use std::path::PathBuf;
|
|
|
|
use std::result;
|
|
|
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
2020-08-11 14:05:06 +00:00
|
|
|
use std::sync::{Arc, Barrier};
|
2021-02-26 20:06:10 +00:00
|
|
|
use std::{collections::HashMap, convert::TryInto};
|
2021-05-06 13:34:31 +00:00
|
|
|
use versionize::{VersionMap, Versionize, VersionizeResult};
|
|
|
|
use versionize_derive::Versionize;
|
2020-07-30 10:40:09 +00:00
|
|
|
use virtio_bindings::bindings::virtio_blk::*;
|
2021-10-21 10:41:16 +00:00
|
|
|
use virtio_queue::Queue;
|
2021-06-02 19:08:04 +00:00
|
|
|
use vm_memory::{ByteValued, Bytes, GuestAddressSpace, GuestMemoryAtomic};
|
2021-05-06 13:34:31 +00:00
|
|
|
use vm_migration::VersionMapped;
|
2021-04-08 09:20:10 +00:00
|
|
|
use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
|
2020-07-30 10:40:09 +00:00
|
|
|
use vmm_sys_util::eventfd::EventFd;
|
|
|
|
|
|
|
|
const SECTOR_SHIFT: u8 = 9;
|
2021-01-02 19:55:08 +00:00
|
|
|
pub const SECTOR_SIZE: u64 = 0x01 << SECTOR_SHIFT;
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
// New descriptors are pending on the virtio queue.
|
|
|
|
const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
|
|
|
|
// New completed tasks are pending on the completion ring.
|
2021-01-20 09:58:30 +00:00
|
|
|
const COMPLETION_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
|
2021-02-26 20:06:10 +00:00
|
|
|
// New 'wake up' event from the rate limiter
|
|
|
|
const RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum Error {
|
2020-07-30 09:58:29 +00:00
|
|
|
/// Failed to parse the request.
|
|
|
|
RequestParsing(block_util::Error),
|
|
|
|
/// Failed to execute the request.
|
|
|
|
RequestExecuting(block_util::ExecuteError),
|
2022-01-19 10:16:45 +00:00
|
|
|
/// Failed to complete the request.
|
|
|
|
RequestCompleting(block_util::Error),
|
2020-07-30 09:58:29 +00:00
|
|
|
/// Missing the expected entry in the list of requests.
|
|
|
|
MissingEntryRequestList,
|
|
|
|
/// The asynchronous request returned with failure.
|
|
|
|
AsyncRequestFailure,
|
2021-01-20 09:58:30 +00:00
|
|
|
/// Failed synchronizing the file
|
|
|
|
Fsync(AsyncIoError),
|
2021-10-21 10:41:16 +00:00
|
|
|
/// Failed adding used index
|
|
|
|
QueueAddUsed(virtio_queue::Error),
|
|
|
|
/// Failed creating an iterator over the queue
|
|
|
|
QueueIterator(virtio_queue::Error),
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
pub type Result<T> = result::Result<T, Error>;
|
|
|
|
|
2020-07-30 10:40:09 +00:00
|
|
|
#[derive(Default, Clone)]
|
|
|
|
pub struct BlockCounters {
|
|
|
|
read_bytes: Arc<AtomicU64>,
|
|
|
|
read_ops: Arc<AtomicU64>,
|
|
|
|
write_bytes: Arc<AtomicU64>,
|
|
|
|
write_ops: Arc<AtomicU64>,
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
struct BlockEpollHandler {
|
2021-10-21 10:41:16 +00:00
|
|
|
queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>,
|
2020-07-30 10:40:09 +00:00
|
|
|
mem: GuestMemoryAtomic<GuestMemoryMmap>,
|
2021-01-20 09:58:30 +00:00
|
|
|
disk_image: Box<dyn AsyncIo>,
|
2020-07-30 10:40:09 +00:00
|
|
|
disk_nsectors: u64,
|
|
|
|
interrupt_cb: Arc<dyn VirtioInterrupt>,
|
|
|
|
disk_image_id: Vec<u8>,
|
|
|
|
kill_evt: EventFd,
|
|
|
|
pause_evt: EventFd,
|
|
|
|
writeback: Arc<AtomicBool>,
|
|
|
|
counters: BlockCounters,
|
|
|
|
queue_evt: EventFd,
|
|
|
|
request_list: HashMap<u16, Request>,
|
2021-02-26 20:06:10 +00:00
|
|
|
rate_limiter: Option<RateLimiter>,
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl BlockEpollHandler {
|
2020-07-30 09:58:29 +00:00
|
|
|
fn process_queue_submit(&mut self) -> Result<bool> {
|
2020-07-30 10:40:09 +00:00
|
|
|
let queue = &mut self.queue;
|
|
|
|
|
|
|
|
let mut used_desc_heads = Vec::new();
|
|
|
|
let mut used_count = 0;
|
|
|
|
|
2021-10-21 10:41:16 +00:00
|
|
|
let mut avail_iter = queue.iter().map_err(Error::QueueIterator)?;
|
|
|
|
for mut desc_chain in &mut avail_iter {
|
|
|
|
let mut request = Request::parse(&mut desc_chain).map_err(Error::RequestParsing)?;
|
2021-02-26 20:06:10 +00:00
|
|
|
|
|
|
|
if let Some(rate_limiter) = &mut self.rate_limiter {
|
|
|
|
// If limiter.consume() fails it means there is no more TokenType::Ops
|
|
|
|
// budget and rate limiting is in effect.
|
|
|
|
if !rate_limiter.consume(1, TokenType::Ops) {
|
|
|
|
// Stop processing the queue and return this descriptor chain to the
|
|
|
|
// avail ring, for later processing.
|
2021-10-21 10:41:16 +00:00
|
|
|
avail_iter.go_to_previous_position();
|
2021-02-26 20:06:10 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Exercise the rate limiter only if this request is of data transfer type.
|
|
|
|
if request.request_type == RequestType::In
|
|
|
|
|| request.request_type == RequestType::Out
|
|
|
|
{
|
|
|
|
let mut bytes = Wrapping(0);
|
|
|
|
for (_, data_len) in &request.data_descriptors {
|
|
|
|
bytes += Wrapping(*data_len as u64);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If limiter.consume() fails it means there is no more TokenType::Bytes
|
|
|
|
// budget and rate limiting is in effect.
|
|
|
|
if !rate_limiter.consume(bytes.0, TokenType::Bytes) {
|
|
|
|
// Revert the OPS consume().
|
|
|
|
rate_limiter.manual_replenish(1, TokenType::Ops);
|
|
|
|
// Stop processing the queue and return this descriptor chain to the
|
|
|
|
// avail ring, for later processing.
|
2021-10-21 10:41:16 +00:00
|
|
|
avail_iter.go_to_previous_position();
|
2021-02-26 20:06:10 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-12-01 16:15:26 +00:00
|
|
|
request.set_writeback(self.writeback.load(Ordering::Acquire));
|
2021-01-20 09:58:30 +00:00
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
if request
|
2021-01-20 09:58:30 +00:00
|
|
|
.execute_async(
|
2021-10-21 10:41:16 +00:00
|
|
|
desc_chain.memory(),
|
2020-07-30 09:58:29 +00:00
|
|
|
self.disk_nsectors,
|
2021-01-20 09:58:30 +00:00
|
|
|
self.disk_image.as_mut(),
|
2020-07-30 09:58:29 +00:00
|
|
|
&self.disk_image_id,
|
2021-10-21 10:41:16 +00:00
|
|
|
desc_chain.head_index() as u64,
|
2020-07-30 09:58:29 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::RequestExecuting)?
|
|
|
|
{
|
2021-10-21 10:41:16 +00:00
|
|
|
self.request_list.insert(desc_chain.head_index(), request);
|
2020-07-30 09:58:29 +00:00
|
|
|
} else {
|
|
|
|
// We use unwrap because the request parsing process already
|
|
|
|
// checked that the status_addr was valid.
|
2021-10-21 10:41:16 +00:00
|
|
|
desc_chain
|
|
|
|
.memory()
|
|
|
|
.write_obj(VIRTIO_BLK_S_OK, request.status_addr)
|
|
|
|
.unwrap();
|
2020-07-30 10:40:09 +00:00
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
// If no asynchronous operation has been submitted, we can
|
|
|
|
// simply return the used descriptor.
|
2021-10-21 10:41:16 +00:00
|
|
|
used_desc_heads.push((desc_chain.head_index(), 0));
|
2020-07-30 09:58:29 +00:00
|
|
|
used_count += 1;
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for &(desc_index, len) in used_desc_heads.iter() {
|
2021-10-21 10:41:16 +00:00
|
|
|
queue
|
|
|
|
.add_used(desc_index, len)
|
|
|
|
.map_err(Error::QueueAddUsed)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
Ok(used_count > 0)
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
fn process_queue_complete(&mut self) -> Result<bool> {
|
2020-07-30 10:40:09 +00:00
|
|
|
let queue = &mut self.queue;
|
|
|
|
|
|
|
|
let mut used_desc_heads = Vec::new();
|
|
|
|
let mut used_count = 0;
|
|
|
|
let mem = self.mem.memory();
|
|
|
|
let mut read_bytes = Wrapping(0);
|
|
|
|
let mut write_bytes = Wrapping(0);
|
|
|
|
let mut read_ops = Wrapping(0);
|
|
|
|
let mut write_ops = Wrapping(0);
|
|
|
|
|
2021-01-20 09:58:30 +00:00
|
|
|
let completion_list = self.disk_image.complete();
|
|
|
|
for (user_data, result) in completion_list {
|
|
|
|
let desc_index = user_data as u16;
|
2022-01-19 10:16:45 +00:00
|
|
|
let mut request = self
|
2020-07-30 09:58:29 +00:00
|
|
|
.request_list
|
|
|
|
.remove(&desc_index)
|
|
|
|
.ok_or(Error::MissingEntryRequestList)?;
|
2022-01-19 10:16:45 +00:00
|
|
|
request.complete_async().map_err(Error::RequestCompleting)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
let (status, len) = if result >= 0 {
|
|
|
|
match request.request_type {
|
|
|
|
RequestType::In => {
|
2020-09-14 13:02:04 +00:00
|
|
|
for (_, data_len) in &request.data_descriptors {
|
|
|
|
read_bytes += Wrapping(*data_len as u64);
|
|
|
|
}
|
2020-07-30 10:40:09 +00:00
|
|
|
read_ops += Wrapping(1);
|
|
|
|
}
|
|
|
|
RequestType::Out => {
|
|
|
|
if !request.writeback {
|
2021-01-20 09:58:30 +00:00
|
|
|
self.disk_image.fsync(None).map_err(Error::Fsync)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
2020-09-14 13:02:04 +00:00
|
|
|
for (_, data_len) in &request.data_descriptors {
|
|
|
|
write_bytes += Wrapping(*data_len as u64);
|
|
|
|
}
|
2020-07-30 10:40:09 +00:00
|
|
|
write_ops += Wrapping(1);
|
|
|
|
}
|
|
|
|
_ => {}
|
2020-07-30 09:58:29 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 10:40:09 +00:00
|
|
|
(VIRTIO_BLK_S_OK, result as u32)
|
|
|
|
} else {
|
|
|
|
error!(
|
|
|
|
"Request failed: {:?}",
|
|
|
|
io::Error::from_raw_os_error(-result)
|
|
|
|
);
|
2020-07-30 09:58:29 +00:00
|
|
|
return Err(Error::AsyncRequestFailure);
|
2020-07-30 10:40:09 +00:00
|
|
|
};
|
2020-07-30 09:58:29 +00:00
|
|
|
|
2020-07-30 10:40:09 +00:00
|
|
|
// We use unwrap because the request parsing process already
|
|
|
|
// checked that the status_addr was valid.
|
|
|
|
mem.write_obj(status, request.status_addr).unwrap();
|
|
|
|
|
2021-01-20 09:58:30 +00:00
|
|
|
used_desc_heads.push((desc_index as u16, len));
|
2020-07-30 10:40:09 +00:00
|
|
|
used_count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for &(desc_index, len) in used_desc_heads.iter() {
|
2021-10-21 10:41:16 +00:00
|
|
|
queue
|
|
|
|
.add_used(desc_index, len)
|
|
|
|
.map_err(Error::QueueAddUsed)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
self.counters
|
|
|
|
.write_bytes
|
|
|
|
.fetch_add(write_bytes.0, Ordering::AcqRel);
|
|
|
|
self.counters
|
|
|
|
.write_ops
|
|
|
|
.fetch_add(write_ops.0, Ordering::AcqRel);
|
|
|
|
|
|
|
|
self.counters
|
|
|
|
.read_bytes
|
|
|
|
.fetch_add(read_bytes.0, Ordering::AcqRel);
|
|
|
|
self.counters
|
|
|
|
.read_ops
|
|
|
|
.fetch_add(read_ops.0, Ordering::AcqRel);
|
|
|
|
|
2020-07-30 09:58:29 +00:00
|
|
|
Ok(used_count > 0)
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
|
|
|
|
self.interrupt_cb
|
|
|
|
.trigger(&VirtioInterruptType::Queue, Some(&self.queue))
|
|
|
|
.map_err(|e| {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
DeviceError::FailedSignalingUsedQueue(e)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-08-11 14:05:06 +00:00
|
|
|
fn run(
|
|
|
|
&mut self,
|
|
|
|
paused: Arc<AtomicBool>,
|
|
|
|
paused_sync: Arc<Barrier>,
|
|
|
|
) -> result::Result<(), EpollHelperError> {
|
2020-07-30 10:40:09 +00:00
|
|
|
let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
|
|
|
|
helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
|
2021-01-20 09:58:30 +00:00
|
|
|
helper.add_event(self.disk_image.notifier().as_raw_fd(), COMPLETION_EVENT)?;
|
2021-02-26 20:06:10 +00:00
|
|
|
if let Some(rate_limiter) = &self.rate_limiter {
|
|
|
|
helper.add_event(rate_limiter.as_raw_fd(), RATE_LIMITER_EVENT)?;
|
|
|
|
}
|
2020-08-11 14:05:06 +00:00
|
|
|
helper.run(paused, paused_sync, self)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl EpollHelperHandler for BlockEpollHandler {
|
2020-08-11 17:12:02 +00:00
|
|
|
fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
|
|
|
|
let ev_type = event.data as u16;
|
|
|
|
match ev_type {
|
2020-07-30 10:40:09 +00:00
|
|
|
QUEUE_AVAIL_EVENT => {
|
|
|
|
if let Err(e) = self.queue_evt.read() {
|
|
|
|
error!("Failed to get queue event: {:?}", e);
|
|
|
|
return true;
|
2020-07-30 09:58:29 +00:00
|
|
|
}
|
|
|
|
|
2021-02-26 20:06:10 +00:00
|
|
|
let rate_limit_reached =
|
|
|
|
self.rate_limiter.as_ref().map_or(false, |r| r.is_blocked());
|
|
|
|
|
|
|
|
// Process the queue only when the rate limit is not reached
|
|
|
|
if !rate_limit_reached {
|
|
|
|
match self.process_queue_submit() {
|
|
|
|
Ok(needs_notification) => {
|
|
|
|
if needs_notification {
|
|
|
|
if let Err(e) = self.signal_used_queue() {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
return true;
|
|
|
|
}
|
2020-07-30 09:58:29 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-26 20:06:10 +00:00
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to process queue (submit): {:?}", e);
|
|
|
|
return true;
|
|
|
|
}
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-01-20 09:58:30 +00:00
|
|
|
COMPLETION_EVENT => {
|
|
|
|
if let Err(e) = self.disk_image.notifier().read() {
|
2020-07-30 10:40:09 +00:00
|
|
|
error!("Failed to get queue event: {:?}", e);
|
|
|
|
return true;
|
2020-07-30 09:58:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
match self.process_queue_complete() {
|
|
|
|
Ok(needs_notification) => {
|
|
|
|
if needs_notification {
|
|
|
|
if let Err(e) = self.signal_used_queue() {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to process queue (complete): {:?}", e);
|
2020-07-30 10:40:09 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-26 20:06:10 +00:00
|
|
|
RATE_LIMITER_EVENT => {
|
|
|
|
if let Some(rate_limiter) = &mut self.rate_limiter {
|
|
|
|
// Upon rate limiter event, call the rate limiter handler
|
|
|
|
// and restart processing the queue.
|
|
|
|
if rate_limiter.event_handler().is_ok() {
|
|
|
|
match self.process_queue_submit() {
|
|
|
|
Ok(needs_notification) => {
|
|
|
|
if needs_notification {
|
|
|
|
if let Err(e) = self.signal_used_queue() {
|
|
|
|
error!("Failed to signal used queue: {:?}", e);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to process queue (submit): {:?}", e);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error!("Unexpected 'RATE_LIMITER_EVENT' when rate_limiter is not enabled.");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2020-07-30 10:40:09 +00:00
|
|
|
_ => {
|
2020-08-11 17:12:02 +00:00
|
|
|
error!("Unexpected event: {}", ev_type);
|
2020-07-30 10:40:09 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Virtio device for exposing block level read/write operations on a host file.
|
2021-01-22 10:15:13 +00:00
|
|
|
pub struct Block {
|
2020-09-03 09:37:36 +00:00
|
|
|
common: VirtioCommon,
|
2020-07-30 10:40:09 +00:00
|
|
|
id: String,
|
2021-01-20 09:58:30 +00:00
|
|
|
disk_image: Box<dyn DiskFile>,
|
2020-07-30 10:40:09 +00:00
|
|
|
disk_path: PathBuf,
|
|
|
|
disk_nsectors: u64,
|
|
|
|
config: VirtioBlockConfig,
|
|
|
|
writeback: Arc<AtomicBool>,
|
|
|
|
counters: BlockCounters,
|
2020-08-18 00:10:03 +00:00
|
|
|
seccomp_action: SeccompAction,
|
2021-02-26 20:06:10 +00:00
|
|
|
rate_limiter_config: Option<RateLimiterConfig>,
|
2021-09-07 15:10:48 +00:00
|
|
|
exit_evt: EventFd,
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2021-05-11 14:02:43 +00:00
|
|
|
#[derive(Versionize)]
|
2020-07-30 10:40:09 +00:00
|
|
|
pub struct BlockState {
|
2021-04-23 09:55:05 +00:00
|
|
|
pub disk_path: String,
|
2020-07-30 10:40:09 +00:00
|
|
|
pub disk_nsectors: u64,
|
|
|
|
pub avail_features: u64,
|
|
|
|
pub acked_features: u64,
|
|
|
|
pub config: VirtioBlockConfig,
|
|
|
|
}
|
|
|
|
|
2021-05-06 13:34:31 +00:00
|
|
|
impl VersionMapped for BlockState {}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl Block {
|
2020-07-30 10:40:09 +00:00
|
|
|
/// Create a new virtio block device that operates on the given file.
|
2020-08-18 00:10:03 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2020-07-30 10:40:09 +00:00
|
|
|
pub fn new(
|
|
|
|
id: String,
|
2021-01-20 09:58:30 +00:00
|
|
|
mut disk_image: Box<dyn DiskFile>,
|
2020-07-30 10:40:09 +00:00
|
|
|
disk_path: PathBuf,
|
|
|
|
is_disk_read_only: bool,
|
|
|
|
iommu: bool,
|
|
|
|
num_queues: usize,
|
|
|
|
queue_size: u16,
|
2020-08-18 00:10:03 +00:00
|
|
|
seccomp_action: SeccompAction,
|
2021-02-26 20:06:10 +00:00
|
|
|
rate_limiter_config: Option<RateLimiterConfig>,
|
2021-09-07 15:10:48 +00:00
|
|
|
exit_evt: EventFd,
|
2020-07-30 10:40:09 +00:00
|
|
|
) -> io::Result<Self> {
|
2021-01-20 09:58:30 +00:00
|
|
|
let disk_size = disk_image.size().map_err(|e| {
|
|
|
|
io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
format!("Failed getting disk size: {}", e),
|
|
|
|
)
|
|
|
|
})?;
|
2020-07-30 10:40:09 +00:00
|
|
|
if disk_size % SECTOR_SIZE != 0 {
|
|
|
|
warn!(
|
|
|
|
"Disk size {} is not a multiple of sector size {}; \
|
|
|
|
the remainder will not be visible to the guest.",
|
|
|
|
disk_size, SECTOR_SIZE
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut avail_features = (1u64 << VIRTIO_F_VERSION_1)
|
|
|
|
| (1u64 << VIRTIO_BLK_F_FLUSH)
|
2021-12-07 16:43:29 +00:00
|
|
|
| (1u64 << VIRTIO_BLK_F_CONFIG_WCE)
|
|
|
|
| (1u64 << VIRTIO_BLK_F_BLK_SIZE)
|
|
|
|
| (1u64 << VIRTIO_BLK_F_TOPOLOGY);
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
if iommu {
|
|
|
|
avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if is_disk_read_only {
|
|
|
|
avail_features |= 1u64 << VIRTIO_BLK_F_RO;
|
|
|
|
}
|
|
|
|
|
2021-12-07 16:43:29 +00:00
|
|
|
let topology = disk_image.topology();
|
|
|
|
info!("Disk topology: {:?}", topology);
|
|
|
|
|
|
|
|
let logical_block_size = if topology.logical_block_size > 512 {
|
|
|
|
topology.logical_block_size
|
|
|
|
} else {
|
|
|
|
512
|
|
|
|
};
|
|
|
|
|
|
|
|
// Calculate the exponent that maps physical block to logical block
|
|
|
|
let mut physical_block_exp = 0;
|
|
|
|
let mut size = logical_block_size;
|
|
|
|
while size < topology.physical_block_size {
|
|
|
|
physical_block_exp += 1;
|
|
|
|
size <<= 1;
|
|
|
|
}
|
|
|
|
|
2020-07-30 10:40:09 +00:00
|
|
|
let disk_nsectors = disk_size / SECTOR_SIZE;
|
|
|
|
let mut config = VirtioBlockConfig {
|
|
|
|
capacity: disk_nsectors,
|
|
|
|
writeback: 1,
|
2021-12-07 16:43:29 +00:00
|
|
|
blk_size: topology.logical_block_size as u32,
|
|
|
|
physical_block_exp,
|
|
|
|
min_io_size: (topology.minimum_io_size / logical_block_size) as u16,
|
|
|
|
opt_io_size: (topology.optimal_io_size / logical_block_size) as u32,
|
2020-07-30 10:40:09 +00:00
|
|
|
..Default::default()
|
|
|
|
};
|
|
|
|
|
|
|
|
if num_queues > 1 {
|
|
|
|
avail_features |= 1u64 << VIRTIO_BLK_F_MQ;
|
|
|
|
config.num_queues = num_queues as u16;
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
Ok(Block {
|
2020-09-03 09:37:36 +00:00
|
|
|
common: VirtioCommon {
|
2021-03-25 16:54:09 +00:00
|
|
|
device_type: VirtioDeviceType::Block as u32,
|
2020-09-03 09:37:36 +00:00
|
|
|
avail_features,
|
2020-09-04 08:37:37 +00:00
|
|
|
paused_sync: Some(Arc::new(Barrier::new(num_queues + 1))),
|
|
|
|
queue_sizes: vec![queue_size; num_queues],
|
2021-01-19 06:11:07 +00:00
|
|
|
min_queues: 1,
|
2020-09-03 15:56:32 +00:00
|
|
|
..Default::default()
|
2020-09-03 09:37:36 +00:00
|
|
|
},
|
2020-07-30 10:40:09 +00:00
|
|
|
id,
|
|
|
|
disk_image,
|
|
|
|
disk_path,
|
|
|
|
disk_nsectors,
|
|
|
|
config,
|
|
|
|
writeback: Arc::new(AtomicBool::new(true)),
|
|
|
|
counters: BlockCounters::default(),
|
2020-08-18 00:10:03 +00:00
|
|
|
seccomp_action,
|
2021-02-26 20:06:10 +00:00
|
|
|
rate_limiter_config,
|
2021-09-07 15:10:48 +00:00
|
|
|
exit_evt,
|
2020-07-30 10:40:09 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
fn state(&self) -> BlockState {
|
|
|
|
BlockState {
|
2021-04-23 09:55:05 +00:00
|
|
|
disk_path: self.disk_path.to_str().unwrap().to_owned(),
|
2020-07-30 10:40:09 +00:00
|
|
|
disk_nsectors: self.disk_nsectors,
|
2020-09-03 09:37:36 +00:00
|
|
|
avail_features: self.common.avail_features,
|
|
|
|
acked_features: self.common.acked_features,
|
2020-07-30 10:40:09 +00:00
|
|
|
config: self.config,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-11 16:00:53 +00:00
|
|
|
fn set_state(&mut self, state: &BlockState) {
|
2021-04-23 09:55:05 +00:00
|
|
|
self.disk_path = state.disk_path.clone().into();
|
2020-07-30 10:40:09 +00:00
|
|
|
self.disk_nsectors = state.disk_nsectors;
|
2020-09-03 09:37:36 +00:00
|
|
|
self.common.avail_features = state.avail_features;
|
|
|
|
self.common.acked_features = state.acked_features;
|
2020-07-30 10:40:09 +00:00
|
|
|
self.config = state.config;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn update_writeback(&mut self) {
|
|
|
|
// Use writeback from config if VIRTIO_BLK_F_CONFIG_WCE
|
2020-09-03 09:37:36 +00:00
|
|
|
let writeback = if self.common.feature_acked(VIRTIO_BLK_F_CONFIG_WCE.into()) {
|
|
|
|
self.config.writeback == 1
|
|
|
|
} else {
|
|
|
|
// Else check if VIRTIO_BLK_F_FLUSH negotiated
|
|
|
|
self.common.feature_acked(VIRTIO_BLK_F_FLUSH.into())
|
|
|
|
};
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
info!(
|
|
|
|
"Changing cache mode to {}",
|
|
|
|
if writeback {
|
|
|
|
"writeback"
|
|
|
|
} else {
|
|
|
|
"writethrough"
|
|
|
|
}
|
|
|
|
);
|
2020-12-01 16:15:26 +00:00
|
|
|
self.writeback.store(writeback, Ordering::Release);
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl Drop for Block {
|
2020-07-30 10:40:09 +00:00
|
|
|
fn drop(&mut self) {
|
2020-09-04 08:37:37 +00:00
|
|
|
if let Some(kill_evt) = self.common.kill_evt.take() {
|
2020-07-30 10:40:09 +00:00
|
|
|
// Ignore the result because there is nothing we can do about it.
|
|
|
|
let _ = kill_evt.write(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl VirtioDevice for Block {
|
2020-07-30 10:40:09 +00:00
|
|
|
fn device_type(&self) -> u32 {
|
2020-09-04 08:37:37 +00:00
|
|
|
self.common.device_type
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn queue_max_sizes(&self) -> &[u16] {
|
2020-09-04 08:37:37 +00:00
|
|
|
&self.common.queue_sizes
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn features(&self) -> u64 {
|
2020-09-03 09:37:36 +00:00
|
|
|
self.common.avail_features
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn ack_features(&mut self, value: u64) {
|
2020-09-03 09:37:36 +00:00
|
|
|
self.common.ack_features(value)
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_config(&self, offset: u64, data: &mut [u8]) {
|
|
|
|
self.read_config_from_slice(self.config.as_slice(), offset, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn write_config(&mut self, offset: u64, data: &[u8]) {
|
|
|
|
// The "writeback" field is the only mutable field
|
|
|
|
let writeback_offset =
|
|
|
|
(&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64);
|
|
|
|
if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback)
|
|
|
|
{
|
|
|
|
error!(
|
|
|
|
"Attempt to write to read-only field: offset {:x} length {}",
|
|
|
|
offset,
|
|
|
|
data.len()
|
|
|
|
);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.config.writeback = data[0];
|
|
|
|
self.update_writeback();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn activate(
|
|
|
|
&mut self,
|
|
|
|
mem: GuestMemoryAtomic<GuestMemoryMmap>,
|
|
|
|
interrupt_cb: Arc<dyn VirtioInterrupt>,
|
2021-10-21 10:41:16 +00:00
|
|
|
mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
|
2020-07-30 10:40:09 +00:00
|
|
|
mut queue_evts: Vec<EventFd>,
|
|
|
|
) -> ActivateResult {
|
2020-09-04 08:37:37 +00:00
|
|
|
self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
let disk_image_id = build_disk_image_id(&self.disk_path);
|
|
|
|
self.update_writeback();
|
|
|
|
|
|
|
|
let mut epoll_threads = Vec::new();
|
2021-01-18 13:55:37 +00:00
|
|
|
for i in 0..queues.len() {
|
2020-07-30 10:40:09 +00:00
|
|
|
let queue_evt = queue_evts.remove(0);
|
2021-01-18 13:55:37 +00:00
|
|
|
let queue = queues.remove(0);
|
2021-10-21 10:41:16 +00:00
|
|
|
let queue_size = queue.state.size;
|
2021-06-02 18:08:06 +00:00
|
|
|
let (kill_evt, pause_evt) = self.common.dup_eventfds();
|
2020-09-04 08:37:37 +00:00
|
|
|
|
2021-02-26 20:06:10 +00:00
|
|
|
let rate_limiter: Option<RateLimiter> = self
|
|
|
|
.rate_limiter_config
|
|
|
|
.map(RateLimiterConfig::try_into)
|
|
|
|
.transpose()
|
|
|
|
.map_err(ActivateError::CreateRateLimiter)?;
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
let mut handler = BlockEpollHandler {
|
2021-01-18 13:55:37 +00:00
|
|
|
queue,
|
2020-07-30 10:40:09 +00:00
|
|
|
mem: mem.clone(),
|
2021-01-20 09:58:30 +00:00
|
|
|
disk_image: self
|
|
|
|
.disk_image
|
|
|
|
.new_async_io(queue_size as u32)
|
|
|
|
.map_err(|e| {
|
|
|
|
error!("failed to create new AsyncIo: {}", e);
|
|
|
|
ActivateError::BadActivate
|
|
|
|
})?,
|
2020-07-30 10:40:09 +00:00
|
|
|
disk_nsectors: self.disk_nsectors,
|
|
|
|
interrupt_cb: interrupt_cb.clone(),
|
|
|
|
disk_image_id: disk_image_id.clone(),
|
2020-09-04 08:37:37 +00:00
|
|
|
kill_evt,
|
|
|
|
pause_evt,
|
2020-07-30 10:40:09 +00:00
|
|
|
writeback: self.writeback.clone(),
|
|
|
|
counters: self.counters.clone(),
|
|
|
|
queue_evt,
|
2021-01-18 13:55:37 +00:00
|
|
|
request_list: HashMap::with_capacity(queue_size.into()),
|
2021-02-26 20:06:10 +00:00
|
|
|
rate_limiter,
|
2020-07-30 10:40:09 +00:00
|
|
|
};
|
|
|
|
|
2020-09-04 08:37:37 +00:00
|
|
|
let paused = self.common.paused.clone();
|
|
|
|
let paused_sync = self.common.paused_sync.clone();
|
2020-07-30 10:40:09 +00:00
|
|
|
|
2021-09-03 10:43:30 +00:00
|
|
|
spawn_virtio_thread(
|
|
|
|
&format!("{}_q{}", self.id.clone(), i),
|
|
|
|
&self.seccomp_action,
|
|
|
|
Thread::VirtioBlock,
|
|
|
|
&mut epoll_threads,
|
2021-09-07 15:10:48 +00:00
|
|
|
&self.exit_evt,
|
2021-09-03 10:43:30 +00:00
|
|
|
move || {
|
2021-08-17 00:20:11 +00:00
|
|
|
if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
|
2020-08-17 19:45:17 +00:00
|
|
|
error!("Error running worker: {:?}", e);
|
|
|
|
}
|
2021-09-03 10:43:30 +00:00
|
|
|
},
|
|
|
|
)?;
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
2020-09-04 08:37:37 +00:00
|
|
|
self.common.epoll_threads = Some(epoll_threads);
|
2021-02-18 15:10:51 +00:00
|
|
|
event!("virtio-device", "activated", "id", &self.id);
|
2020-07-30 10:40:09 +00:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-01-18 12:38:08 +00:00
|
|
|
fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
|
2021-02-18 15:10:51 +00:00
|
|
|
let result = self.common.reset();
|
|
|
|
event!("virtio-device", "reset", "id", &self.id);
|
|
|
|
result
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
|
|
|
|
let mut counters = HashMap::new();
|
|
|
|
|
|
|
|
counters.insert(
|
|
|
|
"read_bytes",
|
|
|
|
Wrapping(self.counters.read_bytes.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"write_bytes",
|
|
|
|
Wrapping(self.counters.write_bytes.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"read_ops",
|
|
|
|
Wrapping(self.counters.read_ops.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
counters.insert(
|
|
|
|
"write_ops",
|
|
|
|
Wrapping(self.counters.write_ops.load(Ordering::Acquire)),
|
|
|
|
);
|
|
|
|
|
|
|
|
Some(counters)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl Pausable for Block {
|
2020-09-04 08:37:37 +00:00
|
|
|
fn pause(&mut self) -> result::Result<(), MigratableError> {
|
|
|
|
self.common.pause()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn resume(&mut self) -> result::Result<(), MigratableError> {
|
|
|
|
self.common.resume()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 10:15:13 +00:00
|
|
|
impl Snapshottable for Block {
|
2020-07-30 10:40:09 +00:00
|
|
|
fn id(&self) -> String {
|
|
|
|
self.id.clone()
|
|
|
|
}
|
|
|
|
|
2020-08-21 12:31:58 +00:00
|
|
|
fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
|
2021-05-06 13:34:31 +00:00
|
|
|
Snapshot::new_from_versioned_state(&self.id(), &self.state())
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
|
2021-05-06 13:34:31 +00:00
|
|
|
self.set_state(&snapshot.to_versioned_state(&self.id)?);
|
2021-04-08 09:20:10 +00:00
|
|
|
Ok(())
|
2020-07-30 10:40:09 +00:00
|
|
|
}
|
|
|
|
}
|
2021-01-22 10:15:13 +00:00
|
|
|
impl Transportable for Block {}
|
|
|
|
impl Migratable for Block {}
|