cloud-hypervisor/virtio-devices/src/pmem.rs
Sebastien Boeuf 0162d73ed8 virtio-queue: Update crate based on latest rust-vmm/vm-virtio
This crate contains up to date definition of the Queue, AvailIter,
DescriptorChain and Descriptor structures forked from the upstream
crate rust-vmm/vm-virtio 27b18af01ee2d9564626e084a758a2b496d2c618.

The following patches have been applied on top of this base in order to
make it work correctly with Cloud Hypervisor requirements:

- Add MSI vector field to the Queue

  In order to help with MSI/MSI-X support, it is convenient to store the
  value of the interrupt vector inside the Queue directly.

- Handle address translations

  For devices with access to data in memory being translated, we add to
  the Queue the ability to translate the address stored in the
  descriptor.
  It is very helpful as it performs the translation right after the
  untranslated address is read from memory, avoiding any errors from
  happening from the consumer's crate perspective. It also allows the
  consumer to reduce greatly the amount of duplicated code for applying
  the translation in many different places.

- Add helpers for Queue structure

  They are meant to help crate's consumers getting/setting information
  about the Queue.

These patches can be found on the 'ch' branch from the Cloud Hypervisor
fork: https://github.com/cloud-hypervisor/vm-virtio.git

This patch takes care of updating the Cloud Hypervisor code in
virtio-devices and vm-virtio to build correctly with the latest version
of virtio-queue.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
2022-01-06 10:02:40 +00:00

456 lines
14 KiB
Rust

// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use super::Error as DeviceError;
use super::{
ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
};
use crate::seccomp_filters::Thread;
use crate::thread_helper::spawn_virtio_thread;
use crate::{GuestMemoryMmap, MmapRegion};
use crate::{VirtioInterrupt, VirtioInterruptType};
use seccompiler::SeccompAction;
use std::fmt::{self, Display};
use std::fs::File;
use std::io;
use std::mem::size_of;
use std::os::unix::io::AsRawFd;
use std::result;
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, Barrier};
use versionize::{VersionMap, Versionize, VersionizeResult};
use versionize_derive::Versionize;
use virtio_queue::{DescriptorChain, Queue};
use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError,
GuestMemoryLoadGuard,
};
use vm_migration::VersionMapped;
use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
use vmm_sys_util::eventfd::EventFd;
const QUEUE_SIZE: u16 = 256;
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
// New descriptors are pending on the virtio queue.
const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
#[derive(Copy, Clone, Debug, Default, Versionize)]
#[repr(C)]
struct VirtioPmemConfig {
start: u64,
size: u64,
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemConfig {}
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct VirtioPmemReq {
type_: u32,
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemReq {}
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct VirtioPmemResp {
ret: u32,
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemResp {}
#[derive(Debug)]
enum Error {
/// Guest gave us bad memory addresses.
GuestMemory(GuestMemoryError),
/// Guest gave us a write only descriptor that protocol says to read from.
UnexpectedWriteOnlyDescriptor,
/// Guest gave us a read only descriptor that protocol says to write to.
UnexpectedReadOnlyDescriptor,
/// Guest gave us too few descriptors in a descriptor chain.
DescriptorChainTooShort,
/// Guest gave us a buffer that was too short to use.
BufferLengthTooSmall,
/// Guest sent us invalid request.
InvalidRequest,
}
impl Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
BufferLengthTooSmall => write!(f, "buffer length too small"),
DescriptorChainTooShort => write!(f, "descriptor chain too short"),
GuestMemory(e) => write!(f, "bad guest memory address: {}", e),
InvalidRequest => write!(f, "invalid request"),
UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"),
UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"),
}
}
}
#[derive(Debug, PartialEq)]
enum RequestType {
Flush,
}
struct Request {
type_: RequestType,
status_addr: GuestAddress,
}
impl Request {
fn parse(
desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
) -> result::Result<Request, Error> {
let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
// The descriptor contains the request type which MUST be readable.
if desc.is_write_only() {
return Err(Error::UnexpectedWriteOnlyDescriptor);
}
if desc.len() as usize != size_of::<VirtioPmemReq>() {
return Err(Error::InvalidRequest);
}
let request: VirtioPmemReq = desc_chain
.memory()
.read_obj(desc.addr())
.map_err(Error::GuestMemory)?;
let request_type = match request.type_ {
VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
_ => return Err(Error::InvalidRequest),
};
let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
// The status MUST always be writable
if !status_desc.is_write_only() {
return Err(Error::UnexpectedReadOnlyDescriptor);
}
if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() {
return Err(Error::BufferLengthTooSmall);
}
Ok(Request {
type_: request_type,
status_addr: status_desc.addr(),
})
}
}
struct PmemEpollHandler {
queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>,
disk: File,
interrupt_cb: Arc<dyn VirtioInterrupt>,
queue_evt: EventFd,
kill_evt: EventFd,
pause_evt: EventFd,
}
impl PmemEpollHandler {
fn process_queue(&mut self) -> bool {
let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
let mut used_count = 0;
for mut desc_chain in self.queue.iter().unwrap() {
let len = match Request::parse(&mut desc_chain) {
Ok(ref req) if (req.type_ == RequestType::Flush) => {
let status_code = match self.disk.sync_all() {
Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
Err(e) => {
error!("failed flushing disk image: {}", e);
VIRTIO_PMEM_RESP_TYPE_EIO
}
};
let resp = VirtioPmemResp { ret: status_code };
match desc_chain.memory().write_obj(resp, req.status_addr) {
Ok(_) => size_of::<VirtioPmemResp>() as u32,
Err(e) => {
error!("bad guest memory address: {}", e);
0
}
}
}
Ok(ref req) => {
// Currently, there is only one virtio-pmem request, FLUSH.
error!("Invalid virtio request type {:?}", req.type_);
0
}
Err(e) => {
error!("Failed to parse available descriptor chain: {:?}", e);
0
}
};
used_desc_heads[used_count] = (desc_chain.head_index(), len);
used_count += 1;
}
for &(desc_index, len) in &used_desc_heads[..used_count] {
self.queue.add_used(desc_index, len).unwrap();
}
used_count > 0
}
fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
self.interrupt_cb
.trigger(&VirtioInterruptType::Queue, Some(&self.queue))
.map_err(|e| {
error!("Failed to signal used queue: {:?}", e);
DeviceError::FailedSignalingUsedQueue(e)
})
}
fn run(
&mut self,
paused: Arc<AtomicBool>,
paused_sync: Arc<Barrier>,
) -> result::Result<(), EpollHelperError> {
let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
helper.run(paused, paused_sync, self)?;
Ok(())
}
}
impl EpollHelperHandler for PmemEpollHandler {
fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
let ev_type = event.data as u16;
match ev_type {
QUEUE_AVAIL_EVENT => {
if let Err(e) = self.queue_evt.read() {
error!("Failed to get queue event: {:?}", e);
return true;
} else if self.process_queue() {
if let Err(e) = self.signal_used_queue() {
error!("Failed to signal used queue: {:?}", e);
return true;
}
}
}
_ => {
error!("Unexpected event: {}", ev_type);
return true;
}
}
false
}
}
pub struct Pmem {
common: VirtioCommon,
id: String,
disk: Option<File>,
config: VirtioPmemConfig,
mapping: UserspaceMapping,
seccomp_action: SeccompAction,
exit_evt: EventFd,
// Hold ownership of the memory that is allocated for the device
// which will be automatically dropped when the device is dropped
_region: MmapRegion,
}
#[derive(Versionize)]
pub struct PmemState {
avail_features: u64,
acked_features: u64,
config: VirtioPmemConfig,
}
impl VersionMapped for PmemState {}
impl Pmem {
#[allow(clippy::too_many_arguments)]
pub fn new(
id: String,
disk: File,
addr: GuestAddress,
mapping: UserspaceMapping,
_region: MmapRegion,
iommu: bool,
seccomp_action: SeccompAction,
exit_evt: EventFd,
) -> io::Result<Pmem> {
let config = VirtioPmemConfig {
start: addr.raw_value().to_le(),
size: (_region.size() as u64).to_le(),
};
let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
if iommu {
avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
}
Ok(Pmem {
common: VirtioCommon {
device_type: VirtioDeviceType::Pmem as u32,
queue_sizes: QUEUE_SIZES.to_vec(),
paused_sync: Some(Arc::new(Barrier::new(2))),
avail_features,
min_queues: 1,
..Default::default()
},
id,
disk: Some(disk),
config,
mapping,
seccomp_action,
_region,
exit_evt,
})
}
fn state(&self) -> PmemState {
PmemState {
avail_features: self.common.avail_features,
acked_features: self.common.acked_features,
config: self.config,
}
}
fn set_state(&mut self, state: &PmemState) {
self.common.avail_features = state.avail_features;
self.common.acked_features = state.acked_features;
self.config = state.config;
}
}
impl Drop for Pmem {
fn drop(&mut self) {
if let Some(kill_evt) = self.common.kill_evt.take() {
// Ignore the result because there is nothing we can do about it.
let _ = kill_evt.write(1);
}
}
}
impl VirtioDevice for Pmem {
fn device_type(&self) -> u32 {
self.common.device_type
}
fn queue_max_sizes(&self) -> &[u16] {
&self.common.queue_sizes
}
fn features(&self) -> u64 {
self.common.avail_features
}
fn ack_features(&mut self, value: u64) {
self.common.ack_features(value)
}
fn read_config(&self, offset: u64, data: &mut [u8]) {
self.read_config_from_slice(self.config.as_slice(), offset, data);
}
fn activate(
&mut self,
_mem: GuestMemoryAtomic<GuestMemoryMmap>,
interrupt_cb: Arc<dyn VirtioInterrupt>,
mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
mut queue_evts: Vec<EventFd>,
) -> ActivateResult {
self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
let (kill_evt, pause_evt) = self.common.dup_eventfds();
if let Some(disk) = self.disk.as_ref() {
let disk = disk.try_clone().map_err(|e| {
error!("failed cloning pmem disk: {}", e);
ActivateError::BadActivate
})?;
let mut handler = PmemEpollHandler {
queue: queues.remove(0),
disk,
interrupt_cb,
queue_evt: queue_evts.remove(0),
kill_evt,
pause_evt,
};
let paused = self.common.paused.clone();
let paused_sync = self.common.paused_sync.clone();
let mut epoll_threads = Vec::new();
spawn_virtio_thread(
&self.id,
&self.seccomp_action,
Thread::VirtioPmem,
&mut epoll_threads,
&self.exit_evt,
move || {
if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
error!("Error running worker: {:?}", e);
}
},
)?;
self.common.epoll_threads = Some(epoll_threads);
event!("virtio-device", "activated", "id", &self.id);
return Ok(());
}
Err(ActivateError::BadActivate)
}
fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
let result = self.common.reset();
event!("virtio-device", "reset", "id", &self.id);
result
}
fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
vec![self.mapping.clone()]
}
}
impl Pausable for Pmem {
fn pause(&mut self) -> result::Result<(), MigratableError> {
self.common.pause()
}
fn resume(&mut self) -> result::Result<(), MigratableError> {
self.common.resume()
}
}
impl Snapshottable for Pmem {
fn id(&self) -> String {
self.id.clone()
}
fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
Snapshot::new_from_versioned_state(&self.id, &self.state())
}
fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
self.set_state(&snapshot.to_versioned_state(&self.id)?);
Ok(())
}
}
impl Transportable for Pmem {}
impl Migratable for Pmem {}