From 8862d6104210c37fa7292ba2c41f565482774212 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 18 Jun 2019 14:40:57 -0700 Subject: [PATCH] vm-virtio: Add virtio-pmem implementation This commit introduces the implementation of the virtio-pmem device based on the pending proposal of the virtio specification here: https://lists.oasis-open.org/archives/virtio-dev/201903/msg00083.html It is also based on the kernel patches coming along with the virtio proposal: https://lkml.org/lkml/2019/6/12/624 And it is based off of the current crosvm implementation found in devices/src/virtio/pmem.rs relying on commit bb340d9a94d48514cbe310d05e1ce539aae31264 Signed-off-by: Sebastien Boeuf --- vm-virtio/src/lib.rs | 4 + vm-virtio/src/pmem.rs | 424 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 428 insertions(+) create mode 100644 vm-virtio/src/pmem.rs diff --git a/vm-virtio/src/lib.rs b/vm-virtio/src/lib.rs index 73522c445..d9d31afda 100755 --- a/vm-virtio/src/lib.rs +++ b/vm-virtio/src/lib.rs @@ -23,6 +23,7 @@ mod block; mod device; pub mod fs; pub mod net; +mod pmem; mod queue; mod rng; @@ -32,6 +33,7 @@ pub use self::block::*; pub use self::device::*; pub use self::fs::*; pub use self::net::*; +pub use self::pmem::*; pub use self::queue::*; pub use self::rng::*; @@ -60,6 +62,7 @@ enum VirtioDeviceType { TYPE_INPUT = 18, TYPE_VSOCK = 19, TYPE_FS = 26, + TYPE_PMEM = 27, } // In order to use the `{}` marker, the trait `fmt::Display` must be implemented @@ -76,6 +79,7 @@ impl fmt::Display for VirtioDeviceType { VirtioDeviceType::TYPE_9P => "9p", VirtioDeviceType::TYPE_VSOCK => "vsock", VirtioDeviceType::TYPE_FS => "fs", + VirtioDeviceType::TYPE_PMEM => "pmem", _ => return Err(std::fmt::Error), }; write!(f, "{}", output) diff --git a/vm-virtio/src/pmem.rs b/vm-virtio/src/pmem.rs new file mode 100644 index 000000000..152724734 --- /dev/null +++ b/vm-virtio/src/pmem.rs @@ -0,0 +1,424 @@ +// Copyright 2019 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use epoll; +use libc::EFD_NONBLOCK; +use std::cmp; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{self, Write}; +use std::mem::size_of; +use std::os::unix::io::AsRawFd; +use std::result; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread; + +use super::Error as DeviceError; +use super::{ + ActivateError, ActivateResult, DescriptorChain, DeviceEventT, Queue, VirtioDevice, + VirtioDeviceType, INTERRUPT_STATUS_USED_RING, VIRTIO_F_VERSION_1, +}; +use crate::VirtioInterrupt; +use vm_memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap, GuestUsize, +}; +use vmm_sys_util::EventFd; + +const QUEUE_SIZE: u16 = 256; +const NUM_QUEUES: usize = 1; +const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; + +const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; +const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; +const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; + +// New descriptors are pending on the virtio queue. +const QUEUE_AVAIL_EVENT: DeviceEventT = 0; +// The device has been dropped. +const KILL_EVENT: DeviceEventT = 1; + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +struct VirtioPmemConfig { + start: u64, + size: u64, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioPmemConfig {} + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +struct VirtioPmemReq { + type_: u32, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioPmemReq {} + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +struct VirtioPmemResp { + ret: u32, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioPmemResp {} + +#[derive(Debug)] +enum Error { + /// Guest gave us bad memory addresses. + GuestMemory(GuestMemoryError), + /// Guest gave us a write only descriptor that protocol says to read from. + UnexpectedWriteOnlyDescriptor, + /// Guest gave us a read only descriptor that protocol says to write to. + UnexpectedReadOnlyDescriptor, + /// Guest gave us too few descriptors in a descriptor chain. + DescriptorChainTooShort, + /// Guest gave us a buffer that was too short to use. + BufferLengthTooSmall, + /// Guest sent us invalid request. + InvalidRequest, +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + + match self { + BufferLengthTooSmall => write!(f, "buffer length too small"), + DescriptorChainTooShort => write!(f, "descriptor chain too short"), + GuestMemory(e) => write!(f, "bad guest memory address: {}", e), + InvalidRequest => write!(f, "invalid request"), + UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"), + UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"), + } + } +} + +#[derive(Debug, PartialEq)] +enum RequestType { + Flush, +} + +struct Request { + type_: RequestType, + status_addr: GuestAddress, +} + +impl Request { + fn parse( + avail_desc: &DescriptorChain, + mem: &GuestMemoryMmap, + ) -> result::Result { + // The head contains the request type which MUST be readable. + if avail_desc.is_write_only() { + return Err(Error::UnexpectedWriteOnlyDescriptor); + } + + if avail_desc.len as usize != size_of::() { + return Err(Error::InvalidRequest); + } + + let request: VirtioPmemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?; + + let request_type = match request.type_ { + VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, + _ => return Err(Error::InvalidRequest), + }; + + let status_desc = avail_desc + .next_descriptor() + .ok_or(Error::DescriptorChainTooShort)?; + + // The status MUST always be writable + if !status_desc.is_write_only() { + return Err(Error::UnexpectedReadOnlyDescriptor); + } + + if (status_desc.len as usize) < size_of::() { + return Err(Error::BufferLengthTooSmall); + } + + Ok(Request { + type_: request_type, + status_addr: status_desc.addr, + }) + } +} + +struct PmemEpollHandler { + queue: Queue, + mem: GuestMemoryMmap, + disk: File, + interrupt_status: Arc, + interrupt_cb: Arc, + queue_evt: EventFd, + kill_evt: EventFd, +} + +impl PmemEpollHandler { + fn process_queue(&mut self) -> bool { + let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; + let mut used_count = 0; + for avail_desc in self.queue.iter(&self.mem) { + let len = match Request::parse(&avail_desc, &self.mem) { + Ok(ref req) if (req.type_ == RequestType::Flush) => { + let status_code = match self.disk.sync_all() { + Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, + Err(e) => { + error!("failed flushing disk image: {}", e); + VIRTIO_PMEM_RESP_TYPE_EIO + } + }; + + let resp = VirtioPmemResp { ret: status_code }; + match self.mem.write_obj(resp, req.status_addr) { + Ok(_) => size_of::() as u32, + Err(e) => { + error!("bad guest memory address: {}", e); + 0 + } + } + } + Ok(ref req) => { + // Currently, there is only one virtio-pmem request, FLUSH. + error!("Invalid virtio request type {:?}", req.type_); + 0 + } + Err(e) => { + error!("Failed to parse available descriptor chain: {:?}", e); + 0 + } + }; + + used_desc_heads[used_count] = (avail_desc.index, len); + used_count += 1; + } + + for &(desc_index, len) in &used_desc_heads[..used_count] { + self.queue.add_used(&self.mem, desc_index, len); + } + used_count > 0 + } + + fn signal_used_queue(&self) -> result::Result<(), DeviceError> { + self.interrupt_status + .fetch_or(INTERRUPT_STATUS_USED_RING as usize, Ordering::SeqCst); + (self.interrupt_cb)(&self.queue).map_err(|e| { + error!("Failed to signal used queue: {:?}", e); + DeviceError::FailedSignalingUsedQueue(e) + }) + } + + fn run(&mut self) -> result::Result<(), DeviceError> { + // Create the epoll file descriptor + let epoll_fd = epoll::create(true).map_err(DeviceError::EpollCreateFd)?; + + // Add events + epoll::ctl( + epoll_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + self.queue_evt.as_raw_fd(), + epoll::Event::new(epoll::Events::EPOLLIN, u64::from(QUEUE_AVAIL_EVENT)), + ) + .map_err(DeviceError::EpollCtl)?; + epoll::ctl( + epoll_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + self.kill_evt.as_raw_fd(), + epoll::Event::new(epoll::Events::EPOLLIN, u64::from(KILL_EVENT)), + ) + .map_err(DeviceError::EpollCtl)?; + + const EPOLL_EVENTS_LEN: usize = 100; + let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; + + 'epoll: loop { + let num_events = + epoll::wait(epoll_fd, -1, &mut events[..]).map_err(DeviceError::EpollWait)?; + + for event in events.iter().take(num_events) { + let ev_type = event.data as u16; + + match ev_type { + QUEUE_AVAIL_EVENT => { + if let Err(e) = self.queue_evt.read() { + error!("Failed to get queue event: {:?}", e); + break 'epoll; + } else if self.process_queue() { + if let Err(e) = self.signal_used_queue() { + error!("Failed to signal used queue: {:?}", e); + break 'epoll; + } + } + } + KILL_EVENT => { + debug!("kill_evt received, stopping epoll loop"); + break 'epoll; + } + _ => { + error!("Unknown event for virtio-block"); + } + } + } + } + + Ok(()) + } +} + +pub struct Pmem { + kill_evt: Option, + disk: Option, + avail_features: u64, + acked_features: u64, + config: VirtioPmemConfig, +} + +impl Pmem { + pub fn new(disk: File, addr: GuestAddress, size: GuestUsize) -> io::Result { + let config = VirtioPmemConfig { + start: addr.raw_value().to_le(), + size: size.to_le(), + }; + + Ok(Pmem { + kill_evt: None, + disk: Some(disk), + avail_features: 1u64 << VIRTIO_F_VERSION_1, + acked_features: 0u64, + config, + }) + } +} + +impl Drop for Pmem { + fn drop(&mut self) { + if let Some(kill_evt) = self.kill_evt.take() { + // Ignore the result because there is nothing we can do about it. + let _ = kill_evt.write(1); + } + } +} + +impl VirtioDevice for Pmem { + fn device_type(&self) -> u32 { + VirtioDeviceType::TYPE_PMEM as u32 + } + + fn queue_max_sizes(&self) -> &[u16] { + QUEUE_SIZES + } + + fn features(&self, page: u32) -> u32 { + match page { + // Get the lower 32-bits of the features bitfield. + 0 => self.avail_features as u32, + // Get the upper 32-bits of the features bitfield. + 1 => (self.avail_features >> 32) as u32, + _ => { + warn!("Received request for unknown features page."); + 0u32 + } + } + } + + fn ack_features(&mut self, page: u32, value: u32) { + let mut v = match page { + 0 => u64::from(value), + 1 => u64::from(value) << 32, + _ => { + warn!("Cannot acknowledge unknown features page."); + 0u64 + } + }; + + // Check if the guest is ACK'ing a feature that we didn't claim to have. + let unrequested_features = v & !self.avail_features; + if unrequested_features != 0 { + warn!("Received acknowledge request for unknown feature."); + + // Don't count these features as acked. + v &= !unrequested_features; + } + self.acked_features |= v; + } + + fn read_config(&self, offset: u64, mut data: &mut [u8]) { + let config_slice = self.config.as_slice(); + let config_len = config_slice.len() as u64; + if offset >= config_len { + error!("Failed to read config space"); + return; + } + + if let Some(end) = offset.checked_add(data.len() as u64) { + // This write can't fail, offset and end are checked against config_len. + data.write_all(&config_slice[offset as usize..cmp::min(end, config_len) as usize]) + .unwrap(); + } + } + + fn write_config(&mut self, _offset: u64, _data: &[u8]) { + warn!("virtio-pmem device configuration is read-only"); + } + + fn activate( + &mut self, + mem: GuestMemoryMmap, + interrupt_cb: Arc, + status: Arc, + mut queues: Vec, + mut queue_evts: Vec, + ) -> ActivateResult { + if queues.len() != NUM_QUEUES || queue_evts.len() != NUM_QUEUES { + error!( + "Cannot perform activate. Expected {} queue(s), got {}", + NUM_QUEUES, + queues.len() + ); + return Err(ActivateError::BadActivate); + } + + let (self_kill_evt, kill_evt) = + match EventFd::new(EFD_NONBLOCK).and_then(|e| Ok((e.try_clone()?, e))) { + Ok(v) => v, + Err(e) => { + error!("failed creating kill EventFd pair: {}", e); + return Err(ActivateError::BadActivate); + } + }; + self.kill_evt = Some(self_kill_evt); + + if let Some(disk) = self.disk.take() { + let mut handler = PmemEpollHandler { + queue: queues.remove(0), + mem, + disk, + interrupt_status: status, + interrupt_cb, + queue_evt: queue_evts.remove(0), + kill_evt, + }; + + let worker_result = thread::Builder::new() + .name("virtio_pmem".to_string()) + .spawn(move || handler.run()); + + if let Err(e) = worker_result { + error!("failed to spawn virtio_pmem worker: {}", e); + return Err(ActivateError::BadActivate);; + } + + return Ok(()); + } + Err(ActivateError::BadActivate) + } +}