vm-virtio: Add virtio-pmem implementation

This commit introduces the implementation of the virtio-pmem device
based on the pending proposal of the virtio specification here:
https://lists.oasis-open.org/archives/virtio-dev/201903/msg00083.html

It is also based on the kernel patches coming along with the virtio
proposal: https://lkml.org/lkml/2019/6/12/624

And it is based off of the current crosvm implementation found in
devices/src/virtio/pmem.rs relying on commit
bb340d9a94d48514cbe310d05e1ce539aae31264

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2019-06-18 14:40:57 -07:00 committed by Rob Bradford
parent c0336e83ea
commit 8862d61042
2 changed files with 428 additions and 0 deletions

View File

@ -23,6 +23,7 @@ mod block;
mod device;
pub mod fs;
pub mod net;
mod pmem;
mod queue;
mod rng;
@ -32,6 +33,7 @@ pub use self::block::*;
pub use self::device::*;
pub use self::fs::*;
pub use self::net::*;
pub use self::pmem::*;
pub use self::queue::*;
pub use self::rng::*;
@ -60,6 +62,7 @@ enum VirtioDeviceType {
TYPE_INPUT = 18,
TYPE_VSOCK = 19,
TYPE_FS = 26,
TYPE_PMEM = 27,
}
// In order to use the `{}` marker, the trait `fmt::Display` must be implemented
@ -76,6 +79,7 @@ impl fmt::Display for VirtioDeviceType {
VirtioDeviceType::TYPE_9P => "9p",
VirtioDeviceType::TYPE_VSOCK => "vsock",
VirtioDeviceType::TYPE_FS => "fs",
VirtioDeviceType::TYPE_PMEM => "pmem",
_ => return Err(std::fmt::Error),
};
write!(f, "{}", output)

424
vm-virtio/src/pmem.rs Normal file
View File

@ -0,0 +1,424 @@
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use epoll;
use libc::EFD_NONBLOCK;
use std::cmp;
use std::fmt::{self, Display};
use std::fs::File;
use std::io::{self, Write};
use std::mem::size_of;
use std::os::unix::io::AsRawFd;
use std::result;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use super::Error as DeviceError;
use super::{
ActivateError, ActivateResult, DescriptorChain, DeviceEventT, Queue, VirtioDevice,
VirtioDeviceType, INTERRUPT_STATUS_USED_RING, VIRTIO_F_VERSION_1,
};
use crate::VirtioInterrupt;
use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap, GuestUsize,
};
use vmm_sys_util::EventFd;
const QUEUE_SIZE: u16 = 256;
const NUM_QUEUES: usize = 1;
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
// New descriptors are pending on the virtio queue.
const QUEUE_AVAIL_EVENT: DeviceEventT = 0;
// The device has been dropped.
const KILL_EVENT: DeviceEventT = 1;
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct VirtioPmemConfig {
start: u64,
size: u64,
}
// Safe because it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemConfig {}
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct VirtioPmemReq {
type_: u32,
}
// Safe because it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemReq {}
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct VirtioPmemResp {
ret: u32,
}
// Safe because it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioPmemResp {}
#[derive(Debug)]
enum Error {
/// Guest gave us bad memory addresses.
GuestMemory(GuestMemoryError),
/// Guest gave us a write only descriptor that protocol says to read from.
UnexpectedWriteOnlyDescriptor,
/// Guest gave us a read only descriptor that protocol says to write to.
UnexpectedReadOnlyDescriptor,
/// Guest gave us too few descriptors in a descriptor chain.
DescriptorChainTooShort,
/// Guest gave us a buffer that was too short to use.
BufferLengthTooSmall,
/// Guest sent us invalid request.
InvalidRequest,
}
impl Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
BufferLengthTooSmall => write!(f, "buffer length too small"),
DescriptorChainTooShort => write!(f, "descriptor chain too short"),
GuestMemory(e) => write!(f, "bad guest memory address: {}", e),
InvalidRequest => write!(f, "invalid request"),
UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"),
UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"),
}
}
}
#[derive(Debug, PartialEq)]
enum RequestType {
Flush,
}
struct Request {
type_: RequestType,
status_addr: GuestAddress,
}
impl Request {
fn parse(
avail_desc: &DescriptorChain,
mem: &GuestMemoryMmap,
) -> result::Result<Request, Error> {
// The head contains the request type which MUST be readable.
if avail_desc.is_write_only() {
return Err(Error::UnexpectedWriteOnlyDescriptor);
}
if avail_desc.len as usize != size_of::<VirtioPmemReq>() {
return Err(Error::InvalidRequest);
}
let request: VirtioPmemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?;
let request_type = match request.type_ {
VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
_ => return Err(Error::InvalidRequest),
};
let status_desc = avail_desc
.next_descriptor()
.ok_or(Error::DescriptorChainTooShort)?;
// The status MUST always be writable
if !status_desc.is_write_only() {
return Err(Error::UnexpectedReadOnlyDescriptor);
}
if (status_desc.len as usize) < size_of::<VirtioPmemResp>() {
return Err(Error::BufferLengthTooSmall);
}
Ok(Request {
type_: request_type,
status_addr: status_desc.addr,
})
}
}
struct PmemEpollHandler {
queue: Queue,
mem: GuestMemoryMmap,
disk: File,
interrupt_status: Arc<AtomicUsize>,
interrupt_cb: Arc<VirtioInterrupt>,
queue_evt: EventFd,
kill_evt: EventFd,
}
impl PmemEpollHandler {
fn process_queue(&mut self) -> bool {
let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
let mut used_count = 0;
for avail_desc in self.queue.iter(&self.mem) {
let len = match Request::parse(&avail_desc, &self.mem) {
Ok(ref req) if (req.type_ == RequestType::Flush) => {
let status_code = match self.disk.sync_all() {
Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
Err(e) => {
error!("failed flushing disk image: {}", e);
VIRTIO_PMEM_RESP_TYPE_EIO
}
};
let resp = VirtioPmemResp { ret: status_code };
match self.mem.write_obj(resp, req.status_addr) {
Ok(_) => size_of::<VirtioPmemResp>() as u32,
Err(e) => {
error!("bad guest memory address: {}", e);
0
}
}
}
Ok(ref req) => {
// Currently, there is only one virtio-pmem request, FLUSH.
error!("Invalid virtio request type {:?}", req.type_);
0
}
Err(e) => {
error!("Failed to parse available descriptor chain: {:?}", e);
0
}
};
used_desc_heads[used_count] = (avail_desc.index, len);
used_count += 1;
}
for &(desc_index, len) in &used_desc_heads[..used_count] {
self.queue.add_used(&self.mem, desc_index, len);
}
used_count > 0
}
fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
self.interrupt_status
.fetch_or(INTERRUPT_STATUS_USED_RING as usize, Ordering::SeqCst);
(self.interrupt_cb)(&self.queue).map_err(|e| {
error!("Failed to signal used queue: {:?}", e);
DeviceError::FailedSignalingUsedQueue(e)
})
}
fn run(&mut self) -> result::Result<(), DeviceError> {
// Create the epoll file descriptor
let epoll_fd = epoll::create(true).map_err(DeviceError::EpollCreateFd)?;
// Add events
epoll::ctl(
epoll_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
self.queue_evt.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(QUEUE_AVAIL_EVENT)),
)
.map_err(DeviceError::EpollCtl)?;
epoll::ctl(
epoll_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
self.kill_evt.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(KILL_EVENT)),
)
.map_err(DeviceError::EpollCtl)?;
const EPOLL_EVENTS_LEN: usize = 100;
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
'epoll: loop {
let num_events =
epoll::wait(epoll_fd, -1, &mut events[..]).map_err(DeviceError::EpollWait)?;
for event in events.iter().take(num_events) {
let ev_type = event.data as u16;
match ev_type {
QUEUE_AVAIL_EVENT => {
if let Err(e) = self.queue_evt.read() {
error!("Failed to get queue event: {:?}", e);
break 'epoll;
} else if self.process_queue() {
if let Err(e) = self.signal_used_queue() {
error!("Failed to signal used queue: {:?}", e);
break 'epoll;
}
}
}
KILL_EVENT => {
debug!("kill_evt received, stopping epoll loop");
break 'epoll;
}
_ => {
error!("Unknown event for virtio-block");
}
}
}
}
Ok(())
}
}
pub struct Pmem {
kill_evt: Option<EventFd>,
disk: Option<File>,
avail_features: u64,
acked_features: u64,
config: VirtioPmemConfig,
}
impl Pmem {
pub fn new(disk: File, addr: GuestAddress, size: GuestUsize) -> io::Result<Pmem> {
let config = VirtioPmemConfig {
start: addr.raw_value().to_le(),
size: size.to_le(),
};
Ok(Pmem {
kill_evt: None,
disk: Some(disk),
avail_features: 1u64 << VIRTIO_F_VERSION_1,
acked_features: 0u64,
config,
})
}
}
impl Drop for Pmem {
fn drop(&mut self) {
if let Some(kill_evt) = self.kill_evt.take() {
// Ignore the result because there is nothing we can do about it.
let _ = kill_evt.write(1);
}
}
}
impl VirtioDevice for Pmem {
fn device_type(&self) -> u32 {
VirtioDeviceType::TYPE_PMEM as u32
}
fn queue_max_sizes(&self) -> &[u16] {
QUEUE_SIZES
}
fn features(&self, page: u32) -> u32 {
match page {
// Get the lower 32-bits of the features bitfield.
0 => self.avail_features as u32,
// Get the upper 32-bits of the features bitfield.
1 => (self.avail_features >> 32) as u32,
_ => {
warn!("Received request for unknown features page.");
0u32
}
}
}
fn ack_features(&mut self, page: u32, value: u32) {
let mut v = match page {
0 => u64::from(value),
1 => u64::from(value) << 32,
_ => {
warn!("Cannot acknowledge unknown features page.");
0u64
}
};
// Check if the guest is ACK'ing a feature that we didn't claim to have.
let unrequested_features = v & !self.avail_features;
if unrequested_features != 0 {
warn!("Received acknowledge request for unknown feature.");
// Don't count these features as acked.
v &= !unrequested_features;
}
self.acked_features |= v;
}
fn read_config(&self, offset: u64, mut data: &mut [u8]) {
let config_slice = self.config.as_slice();
let config_len = config_slice.len() as u64;
if offset >= config_len {
error!("Failed to read config space");
return;
}
if let Some(end) = offset.checked_add(data.len() as u64) {
// This write can't fail, offset and end are checked against config_len.
data.write_all(&config_slice[offset as usize..cmp::min(end, config_len) as usize])
.unwrap();
}
}
fn write_config(&mut self, _offset: u64, _data: &[u8]) {
warn!("virtio-pmem device configuration is read-only");
}
fn activate(
&mut self,
mem: GuestMemoryMmap,
interrupt_cb: Arc<VirtioInterrupt>,
status: Arc<AtomicUsize>,
mut queues: Vec<Queue>,
mut queue_evts: Vec<EventFd>,
) -> ActivateResult {
if queues.len() != NUM_QUEUES || queue_evts.len() != NUM_QUEUES {
error!(
"Cannot perform activate. Expected {} queue(s), got {}",
NUM_QUEUES,
queues.len()
);
return Err(ActivateError::BadActivate);
}
let (self_kill_evt, kill_evt) =
match EventFd::new(EFD_NONBLOCK).and_then(|e| Ok((e.try_clone()?, e))) {
Ok(v) => v,
Err(e) => {
error!("failed creating kill EventFd pair: {}", e);
return Err(ActivateError::BadActivate);
}
};
self.kill_evt = Some(self_kill_evt);
if let Some(disk) = self.disk.take() {
let mut handler = PmemEpollHandler {
queue: queues.remove(0),
mem,
disk,
interrupt_status: status,
interrupt_cb,
queue_evt: queue_evts.remove(0),
kill_evt,
};
let worker_result = thread::Builder::new()
.name("virtio_pmem".to_string())
.spawn(move || handler.run());
if let Err(e) = worker_result {
error!("failed to spawn virtio_pmem worker: {}", e);
return Err(ActivateError::BadActivate);;
}
return Ok(());
}
Err(ActivateError::BadActivate)
}
}