cloud-hypervisor/virtio-devices/src/mem.rs

1086 lines
34 KiB
Rust
Raw Normal View History

// Copyright (c) 2020 Ant Financial
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::Error as DeviceError;
use super::{
ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
};
use crate::seccomp_filters::Thread;
use crate::thread_helper::spawn_virtio_thread;
use crate::{GuestMemoryMmap, GuestRegionMmap};
use crate::{VirtioInterrupt, VirtioInterruptType};
use anyhow::anyhow;
use libc::EFD_NONBLOCK;
use seccompiler::SeccompAction;
use std::collections::BTreeMap;
use std::io;
use std::mem::size_of;
use std::os::unix::io::{AsRawFd, RawFd};
use std::result;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::mpsc;
use std::sync::{Arc, Barrier, Mutex};
use versionize::{VersionMap, Versionize, VersionizeResult};
use versionize_derive::Versionize;
use virtio_queue::{DescriptorChain, Queue};
use vm_device::dma_mapping::ExternalDmaMapping;
use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError,
virtio-queue: Update crate based on latest rust-vmm/vm-virtio This crate contains up to date definition of the Queue, AvailIter, DescriptorChain and Descriptor structures forked from the upstream crate rust-vmm/vm-virtio 27b18af01ee2d9564626e084a758a2b496d2c618. The following patches have been applied on top of this base in order to make it work correctly with Cloud Hypervisor requirements: - Add MSI vector field to the Queue In order to help with MSI/MSI-X support, it is convenient to store the value of the interrupt vector inside the Queue directly. - Handle address translations For devices with access to data in memory being translated, we add to the Queue the ability to translate the address stored in the descriptor. It is very helpful as it performs the translation right after the untranslated address is read from memory, avoiding any errors from happening from the consumer's crate perspective. It also allows the consumer to reduce greatly the amount of duplicated code for applying the translation in many different places. - Add helpers for Queue structure They are meant to help crate's consumers getting/setting information about the Queue. These patches can be found on the 'ch' branch from the Cloud Hypervisor fork: https://github.com/cloud-hypervisor/vm-virtio.git This patch takes care of updating the Cloud Hypervisor code in virtio-devices and vm-virtio to build correctly with the latest version of virtio-queue. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
2021-12-21 09:03:15 +00:00
GuestMemoryLoadGuard, GuestMemoryRegion,
};
use vm_migration::protocol::MemoryRangeTable;
use vm_migration::{
Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped,
};
use vmm_sys_util::eventfd::EventFd;
const QUEUE_SIZE: u16 = 128;
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
// 128MiB is the standard memory block size in Linux. A virtio-mem region must
// be aligned on this size, and the region size must be a multiple of it.
pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
// Use 2 MiB alignment so transparent hugepages can be used by KVM.
const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
// Request processed successfully, applicable for
// - VIRTIO_MEM_REQ_PLUG
// - VIRTIO_MEM_REQ_UNPLUG
// - VIRTIO_MEM_REQ_UNPLUG_ALL
// - VIRTIO_MEM_REQ_STATE
const VIRTIO_MEM_RESP_ACK: u16 = 0;
// Request denied - e.g. trying to plug more than requested, applicable for
// - VIRTIO_MEM_REQ_PLUG
const VIRTIO_MEM_RESP_NACK: u16 = 1;
// Request cannot be processed right now, try again later, applicable for
// - VIRTIO_MEM_REQ_PLUG
// - VIRTIO_MEM_REQ_UNPLUG
// - VIRTIO_MEM_REQ_UNPLUG_ALL
#[allow(unused)]
const VIRTIO_MEM_RESP_BUSY: u16 = 2;
// Error in request (e.g. addresses/alignment), applicable for
// - VIRTIO_MEM_REQ_PLUG
// - VIRTIO_MEM_REQ_UNPLUG
// - VIRTIO_MEM_REQ_STATE
const VIRTIO_MEM_RESP_ERROR: u16 = 3;
// State of memory blocks is "plugged"
const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
// State of memory blocks is "unplugged"
const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
// State of memory blocks is "mixed"
const VIRTIO_MEM_STATE_MIXED: u16 = 2;
// request to plug memory blocks
const VIRTIO_MEM_REQ_PLUG: u16 = 0;
// request to unplug memory blocks
const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
// request to unplug all blocks and shrink the usable size
const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
// request information about the plugged state of memory blocks
const VIRTIO_MEM_REQ_STATE: u16 = 3;
// Get resize event.
const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
// New descriptors are pending on the virtio queue.
const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
// Virtio features
const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
#[derive(Debug)]
pub enum Error {
// Guest gave us bad memory addresses.
GuestMemory(GuestMemoryError),
// Guest gave us a write only descriptor that protocol says to read from.
UnexpectedWriteOnlyDescriptor,
// Guest gave us a read only descriptor that protocol says to write to.
UnexpectedReadOnlyDescriptor,
// Guest gave us too few descriptors in a descriptor chain.
DescriptorChainTooShort,
// Guest gave us a buffer that was too short to use.
BufferLengthTooSmall,
// Guest sent us invalid request.
InvalidRequest,
// Failed to EventFd write.
EventFdWriteFail(std::io::Error),
// Failed to EventFd try_clone.
EventFdTryCloneFail(std::io::Error),
// Failed to MpscRecv.
MpscRecvFail(mpsc::RecvError),
// Resize invalid argument
ResizeError(anyhow::Error),
// Fail to resize trigger
ResizeTriggerFail(DeviceError),
// Invalid configuration
ValidateError(anyhow::Error),
// Failed discarding memory range
DiscardMemoryRange(std::io::Error),
// Failed DMA mapping.
DmaMap(std::io::Error),
// Failed DMA unmapping.
DmaUnmap(std::io::Error),
// Invalid DMA mapping handler
InvalidDmaMappingHandler,
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
struct VirtioMemReq {
req_type: u16,
padding: [u16; 3],
addr: u64,
nb_blocks: u16,
padding_1: [u16; 3],
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioMemReq {}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
struct VirtioMemResp {
resp_type: u16,
padding: [u16; 3],
state: u16,
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioMemResp {}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, Versionize)]
pub struct VirtioMemConfig {
// Block size and alignment. Cannot change.
block_size: u64,
// Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
node_id: u16,
padding: [u8; 6],
// Start address of the memory region. Cannot change.
addr: u64,
// Region size (maximum). Cannot change.
region_size: u64,
// Currently usable region size. Can grow up to region_size. Can
// shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
// update will be sent).
usable_region_size: u64,
// Currently used size. Changes due to plug/unplug requests, but no
// config updates will be sent.
plugged_size: u64,
// Requested size. New plug requests cannot exceed it. Can change.
requested_size: u64,
}
// SAFETY: it only has data and has no implicit padding.
unsafe impl ByteValued for VirtioMemConfig {}
impl VirtioMemConfig {
fn validate(&self) -> result::Result<(), Error> {
if self.addr % self.block_size != 0 {
return Err(Error::ValidateError(anyhow!(
"addr 0x{:x} is not aligned on block_size 0x{:x}",
self.addr,
self.block_size
)));
}
if self.region_size % self.block_size != 0 {
return Err(Error::ValidateError(anyhow!(
"region_size 0x{:x} is not aligned on block_size 0x{:x}",
self.region_size,
self.block_size
)));
}
if self.usable_region_size % self.block_size != 0 {
return Err(Error::ValidateError(anyhow!(
"usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
self.usable_region_size,
self.block_size
)));
}
if self.plugged_size % self.block_size != 0 {
return Err(Error::ValidateError(anyhow!(
"plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
self.plugged_size,
self.block_size
)));
}
if self.requested_size % self.block_size != 0 {
return Err(Error::ValidateError(anyhow!(
"requested_size 0x{:x} is not aligned on block_size 0x{:x}",
self.requested_size,
self.block_size
)));
}
Ok(())
}
fn resize(&mut self, size: u64) -> result::Result<(), Error> {
if self.requested_size == size {
return Err(Error::ResizeError(anyhow!(
"new size 0x{:x} and requested_size are identical",
size
)));
} else if size > self.region_size {
return Err(Error::ResizeError(anyhow!(
"new size 0x{:x} is bigger than region_size 0x{:x}",
size,
self.region_size
)));
} else if size % (self.block_size as u64) != 0 {
return Err(Error::ResizeError(anyhow!(
"new size 0x{:x} is not aligned on block_size 0x{:x}",
size,
self.block_size
)));
}
self.requested_size = size;
Ok(())
}
fn is_valid_range(&self, addr: u64, size: u64) -> bool {
// Start address must be aligned on block_size, the size must be
// greater than 0, and all blocks covered by the request must be
// in the usable region.
if addr % self.block_size != 0
|| size == 0
|| (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
{
return false;
}
true
}
}
struct Request {
req: VirtioMemReq,
status_addr: GuestAddress,
}
impl Request {
fn parse(
virtio-queue: Update crate based on latest rust-vmm/vm-virtio This crate contains up to date definition of the Queue, AvailIter, DescriptorChain and Descriptor structures forked from the upstream crate rust-vmm/vm-virtio 27b18af01ee2d9564626e084a758a2b496d2c618. The following patches have been applied on top of this base in order to make it work correctly with Cloud Hypervisor requirements: - Add MSI vector field to the Queue In order to help with MSI/MSI-X support, it is convenient to store the value of the interrupt vector inside the Queue directly. - Handle address translations For devices with access to data in memory being translated, we add to the Queue the ability to translate the address stored in the descriptor. It is very helpful as it performs the translation right after the untranslated address is read from memory, avoiding any errors from happening from the consumer's crate perspective. It also allows the consumer to reduce greatly the amount of duplicated code for applying the translation in many different places. - Add helpers for Queue structure They are meant to help crate's consumers getting/setting information about the Queue. These patches can be found on the 'ch' branch from the Cloud Hypervisor fork: https://github.com/cloud-hypervisor/vm-virtio.git This patch takes care of updating the Cloud Hypervisor code in virtio-devices and vm-virtio to build correctly with the latest version of virtio-queue. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
2021-12-21 09:03:15 +00:00
desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
) -> result::Result<Request, Error> {
let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
// The descriptor contains the request type which MUST be readable.
if desc.is_write_only() {
return Err(Error::UnexpectedWriteOnlyDescriptor);
}
if desc.len() as usize != size_of::<VirtioMemReq>() {
return Err(Error::InvalidRequest);
}
let req: VirtioMemReq = desc_chain
.memory()
.read_obj(desc.addr())
.map_err(Error::GuestMemory)?;
let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
// The status MUST always be writable
if !status_desc.is_write_only() {
return Err(Error::UnexpectedReadOnlyDescriptor);
}
if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
return Err(Error::BufferLengthTooSmall);
}
Ok(Request {
req,
status_addr: status_desc.addr(),
})
}
fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 {
let resp = VirtioMemResp {
resp_type,
state,
..Default::default()
};
match mem.write_obj(resp, self.status_addr) {
Ok(_) => size_of::<VirtioMemResp>() as u32,
Err(e) => {
error!("bad guest memory address: {}", e);
0
}
}
}
}
pub struct ResizeSender {
hotplugged_size: Arc<AtomicU64>,
tx: mpsc::Sender<Result<(), Error>>,
evt: EventFd,
}
impl ResizeSender {
fn size(&self) -> u64 {
self.hotplugged_size.load(Ordering::Acquire)
}
fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> {
self.tx.send(r)
}
}
impl Clone for ResizeSender {
fn clone(&self) -> Self {
ResizeSender {
hotplugged_size: self.hotplugged_size.clone(),
tx: self.tx.clone(),
evt: self
.evt
.try_clone()
.expect("Failed cloning EventFd from ResizeSender"),
}
}
}
pub struct Resize {
hotplugged_size: Arc<AtomicU64>,
tx: mpsc::Sender<Result<(), Error>>,
rx: mpsc::Receiver<Result<(), Error>>,
evt: EventFd,
}
impl Resize {
pub fn new(hotplugged_size: u64) -> io::Result<Self> {
let (tx, rx) = mpsc::channel();
Ok(Resize {
hotplugged_size: Arc::new(AtomicU64::new(hotplugged_size)),
tx,
rx,
evt: EventFd::new(EFD_NONBLOCK)?,
})
}
pub fn new_resize_sender(&self) -> Result<ResizeSender, Error> {
Ok(ResizeSender {
hotplugged_size: self.hotplugged_size.clone(),
tx: self.tx.clone(),
evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?,
})
}
pub fn work(&self, desired_size: u64) -> Result<(), Error> {
self.hotplugged_size.store(desired_size, Ordering::Release);
self.evt.write(1).map_err(Error::EventFdWriteFail)?;
self.rx.recv().map_err(Error::MpscRecvFail)?
}
}
#[derive(Clone, Versionize)]
pub struct BlocksState {
bitmap: Vec<bool>,
}
impl BlocksState {
pub fn new(region_size: u64) -> Self {
BlocksState {
bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
}
}
fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
for state in self
.bitmap
.iter()
.skip(first_block_index)
.take(nb_blocks as usize)
{
if *state != plug {
return false;
}
}
true
}
fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
for state in self
.bitmap
.iter_mut()
.skip(first_block_index)
.take(nb_blocks as usize)
{
*state = plug;
}
}
fn inner(&self) -> &Vec<bool> {
&self.bitmap
}
pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
let mut bitmap: Vec<u64> = Vec::new();
let mut i = 0;
for (j, bit) in self.bitmap.iter().enumerate() {
if j % 64 == 0 {
bitmap.push(0);
if j != 0 {
i += 1;
}
}
if *bit == plugged {
bitmap[i] |= 1 << (j % 64);
}
}
MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
}
}
struct MemEpollHandler {
host_addr: u64,
host_fd: Option<RawFd>,
blocks_state: Arc<Mutex<BlocksState>>,
config: Arc<Mutex<VirtioMemConfig>>,
resize: ResizeSender,
queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>,
interrupt_cb: Arc<dyn VirtioInterrupt>,
queue_evt: EventFd,
kill_evt: EventFd,
pause_evt: EventFd,
hugepages: bool,
dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
}
impl MemEpollHandler {
fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
// Use fallocate if the memory region is backed by a file.
if let Some(fd) = self.host_fd {
let res = unsafe {
libc::fallocate64(
fd,
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
offset as libc::off64_t,
size as libc::off64_t,
)
};
if res != 0 {
let err = io::Error::last_os_error();
error!("Deallocating file space failed: {}", err);
return Err(Error::DiscardMemoryRange(err));
}
}
// Only use madvise if the memory region is not allocated with
// hugepages.
if !self.hugepages {
let res = unsafe {
libc::madvise(
(self.host_addr + offset) as *mut libc::c_void,
size as libc::size_t,
libc::MADV_DONTNEED,
)
};
if res != 0 {
let err = io::Error::last_os_error();
error!("Advising kernel about pages range failed: {}", err);
return Err(Error::DiscardMemoryRange(err));
}
}
Ok(())
}
fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
let mut config = self.config.lock().unwrap();
let size: u64 = nb_blocks as u64 * config.block_size;
if plug && (config.plugged_size + size > config.requested_size) {
return VIRTIO_MEM_RESP_NACK;
}
if !config.is_valid_range(addr, size) {
return VIRTIO_MEM_RESP_ERROR;
}
let offset = addr - config.addr;
let first_block_index = (offset / config.block_size) as usize;
if !self
.blocks_state
.lock()
.unwrap()
.is_range_state(first_block_index, nb_blocks, !plug)
{
return VIRTIO_MEM_RESP_ERROR;
}
if !plug {
if let Err(e) = self.discard_memory_range(offset, size) {
error!("failed discarding memory range: {:?}", e);
return VIRTIO_MEM_RESP_ERROR;
}
}
self.blocks_state
.lock()
.unwrap()
.set_range(first_block_index, nb_blocks, plug);
let handlers = self.dma_mapping_handlers.lock().unwrap();
if plug {
let mut gpa = addr;
for _ in 0..nb_blocks {
for (_, handler) in handlers.iter() {
if let Err(e) = handler.map(gpa, gpa, config.block_size) {
error!(
"failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
gpa, config.block_size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
gpa += config.block_size;
}
config.plugged_size += size;
} else {
for (_, handler) in handlers.iter() {
if let Err(e) = handler.unmap(addr, size) {
error!(
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
addr, size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
config.plugged_size -= size;
}
VIRTIO_MEM_RESP_ACK
}
fn unplug_all(&mut self) -> u16 {
let mut config = self.config.lock().unwrap();
if let Err(e) = self.discard_memory_range(0, config.region_size) {
error!("failed discarding memory range: {:?}", e);
return VIRTIO_MEM_RESP_ERROR;
}
// Remaining plugged blocks are unmapped.
if config.plugged_size > 0 {
let handlers = self.dma_mapping_handlers.lock().unwrap();
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
for (_, handler) in handlers.iter() {
if let Err(e) = handler.unmap(gpa, config.block_size) {
error!(
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
gpa, config.block_size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
}
}
}
self.blocks_state.lock().unwrap().set_range(
0,
(config.region_size / config.block_size) as u16,
false,
);
config.plugged_size = 0;
VIRTIO_MEM_RESP_ACK
}
fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
let config = self.config.lock().unwrap();
let size: u64 = nb_blocks as u64 * config.block_size;
let resp_type = if config.is_valid_range(addr, size) {
VIRTIO_MEM_RESP_ACK
} else {
VIRTIO_MEM_RESP_ERROR
};
let offset = addr - config.addr;
let first_block_index = (offset / config.block_size) as usize;
let resp_state =
if self
.blocks_state
.lock()
.unwrap()
.is_range_state(first_block_index, nb_blocks, true)
{
VIRTIO_MEM_STATE_PLUGGED
} else if self.blocks_state.lock().unwrap().is_range_state(
first_block_index,
nb_blocks,
false,
) {
VIRTIO_MEM_STATE_UNPLUGGED
} else {
VIRTIO_MEM_STATE_MIXED
};
(resp_type, resp_state)
}
fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
self.interrupt_cb.trigger(int_type).map_err(|e| {
error!("Failed to signal used queue: {:?}", e);
DeviceError::FailedSignalingUsedQueue(e)
})
}
fn process_queue(&mut self) -> bool {
let mut request_list = Vec::new();
let mut used_count = 0;
for mut desc_chain in self.queue.iter().unwrap() {
request_list.push((
desc_chain.head_index(),
Request::parse(&mut desc_chain),
desc_chain.memory().clone(),
));
}
for (head_index, request, memory) in request_list {
let len = match request {
Err(e) => {
error!("failed parse VirtioMemReq: {:?}", e);
0
}
Ok(r) => match r.req.req_type {
VIRTIO_MEM_REQ_PLUG => {
let resp_type =
self.state_change_request(r.req.addr, r.req.nb_blocks, true);
r.send_response(&memory, resp_type, 0u16)
}
VIRTIO_MEM_REQ_UNPLUG => {
let resp_type =
self.state_change_request(r.req.addr, r.req.nb_blocks, false);
r.send_response(&memory, resp_type, 0u16)
}
VIRTIO_MEM_REQ_UNPLUG_ALL => {
let resp_type = self.unplug_all();
r.send_response(&memory, resp_type, 0u16)
}
VIRTIO_MEM_REQ_STATE => {
let (resp_type, resp_state) =
self.state_request(r.req.addr, r.req.nb_blocks);
r.send_response(&memory, resp_type, resp_state)
}
_ => {
error!("VirtioMemReq unknown request type {:?}", r.req.req_type);
0
}
},
};
self.queue.add_used(head_index, len).unwrap();
used_count += 1;
}
used_count > 0
}
fn run(
&mut self,
paused: Arc<AtomicBool>,
paused_sync: Arc<Barrier>,
) -> result::Result<(), EpollHelperError> {
let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
helper.add_event(self.resize.evt.as_raw_fd(), RESIZE_EVENT)?;
helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
helper.run(paused, paused_sync, self)?;
Ok(())
}
}
impl EpollHelperHandler for MemEpollHandler {
fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
let ev_type = event.data as u16;
match ev_type {
RESIZE_EVENT => {
if let Err(e) = self.resize.evt.read() {
error!("Failed to get resize event: {:?}", e);
return true;
} else {
let size = self.resize.size();
let mut config = self.config.lock().unwrap();
let mut signal_error = false;
let mut r = config.resize(size);
r = match r {
Err(e) => Err(e),
_ => match self.signal(VirtioInterruptType::Config) {
Err(e) => {
signal_error = true;
Err(Error::ResizeTriggerFail(e))
}
_ => Ok(()),
},
};
if let Err(e) = self.resize.send(r) {
error!("Sending \"resize\" response: {:?}", e);
return true;
}
if signal_error {
return true;
}
}
}
QUEUE_AVAIL_EVENT => {
if let Err(e) = self.queue_evt.read() {
error!("Failed to get queue event: {:?}", e);
return true;
} else if self.process_queue() {
if let Err(e) = self.signal(VirtioInterruptType::Queue(0)) {
error!("Failed to signal used queue: {:?}", e);
return true;
}
}
}
_ => {
error!("Unexpected event: {}", ev_type);
return true;
}
}
false
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord)]
pub enum VirtioMemMappingSource {
Container,
Device(u32),
}
#[derive(Versionize)]
pub struct MemState {
pub avail_features: u64,
pub acked_features: u64,
pub config: VirtioMemConfig,
pub blocks_state: BlocksState,
}
impl VersionMapped for MemState {}
pub struct Mem {
common: VirtioCommon,
id: String,
resize: ResizeSender,
host_addr: u64,
host_fd: Option<RawFd>,
config: Arc<Mutex<VirtioMemConfig>>,
seccomp_action: SeccompAction,
hugepages: bool,
dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
blocks_state: Arc<Mutex<BlocksState>>,
exit_evt: EventFd,
}
impl Mem {
// Create a new virtio-mem device.
#[allow(clippy::too_many_arguments)]
pub fn new(
id: String,
region: &Arc<GuestRegionMmap>,
resize: ResizeSender,
seccomp_action: SeccompAction,
numa_node_id: Option<u16>,
initial_size: u64,
hugepages: bool,
exit_evt: EventFd,
blocks_state: Arc<Mutex<BlocksState>>,
) -> io::Result<Mem> {
let region_len = region.len();
if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Virtio-mem size is not aligned with {}",
VIRTIO_MEM_ALIGN_SIZE
),
));
}
let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
let mut config = VirtioMemConfig {
block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
addr: region.start_addr().raw_value(),
region_size: region.len(),
usable_region_size: region.len(),
plugged_size: 0,
requested_size: 0,
..Default::default()
};
if initial_size != 0 {
config.resize(initial_size).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!(
"Failed to resize virtio-mem configuration to {}: {:?}",
initial_size, e
),
)
})?;
}
if let Some(node_id) = numa_node_id {
avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
config.node_id = node_id;
}
// Make sure the virtio-mem configuration complies with the
// specification.
config.validate().map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("Invalid virtio-mem configuration: {:?}", e),
)
})?;
let host_fd = region
.file_offset()
.map(|f_offset| f_offset.file().as_raw_fd());
Ok(Mem {
common: VirtioCommon {
device_type: VirtioDeviceType::Mem as u32,
avail_features,
paused_sync: Some(Arc::new(Barrier::new(2))),
queue_sizes: QUEUE_SIZES.to_vec(),
min_queues: 1,
..Default::default()
},
id,
resize,
host_addr: region.as_ptr() as u64,
host_fd,
config: Arc::new(Mutex::new(config)),
seccomp_action,
hugepages,
dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
blocks_state,
exit_evt,
})
}
pub fn add_dma_mapping_handler(
&mut self,
source: VirtioMemMappingSource,
handler: Arc<dyn ExternalDmaMapping>,
) -> result::Result<(), Error> {
let config = self.config.lock().unwrap();
if config.plugged_size > 0 {
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
handler
.map(gpa, gpa, config.block_size)
.map_err(Error::DmaMap)?;
}
}
}
self.dma_mapping_handlers
.lock()
.unwrap()
.insert(source, handler);
Ok(())
}
pub fn remove_dma_mapping_handler(
&mut self,
source: VirtioMemMappingSource,
) -> result::Result<(), Error> {
let handler = self
.dma_mapping_handlers
.lock()
.unwrap()
.remove(&source)
.ok_or(Error::InvalidDmaMappingHandler)?;
let config = self.config.lock().unwrap();
if config.plugged_size > 0 {
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
handler
.unmap(gpa, config.block_size)
.map_err(Error::DmaUnmap)?;
}
}
}
Ok(())
}
fn state(&self) -> MemState {
MemState {
avail_features: self.common.avail_features,
acked_features: self.common.acked_features,
config: *(self.config.lock().unwrap()),
blocks_state: self.blocks_state.lock().unwrap().clone(),
}
}
fn set_state(&mut self, state: &MemState) {
self.common.avail_features = state.avail_features;
self.common.acked_features = state.acked_features;
*(self.config.lock().unwrap()) = state.config;
*(self.blocks_state.lock().unwrap()) = state.blocks_state.clone();
}
}
impl Drop for Mem {
fn drop(&mut self) {
if let Some(kill_evt) = self.common.kill_evt.take() {
// Ignore the result because there is nothing we can do about it.
let _ = kill_evt.write(1);
}
}
}
impl VirtioDevice for Mem {
fn device_type(&self) -> u32 {
self.common.device_type
}
fn queue_max_sizes(&self) -> &[u16] {
&self.common.queue_sizes
}
fn features(&self) -> u64 {
self.common.avail_features
}
fn ack_features(&mut self, value: u64) {
self.common.ack_features(value)
}
fn read_config(&self, offset: u64, data: &mut [u8]) {
self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
}
fn activate(
&mut self,
_mem: GuestMemoryAtomic<GuestMemoryMmap>,
interrupt_cb: Arc<dyn VirtioInterrupt>,
mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
mut queue_evts: Vec<EventFd>,
) -> ActivateResult {
self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
let (kill_evt, pause_evt) = self.common.dup_eventfds();
let mut handler = MemEpollHandler {
host_addr: self.host_addr,
host_fd: self.host_fd,
blocks_state: Arc::clone(&self.blocks_state),
config: self.config.clone(),
resize: self.resize.clone(),
queue: queues.remove(0),
interrupt_cb,
queue_evt: queue_evts.remove(0),
kill_evt,
pause_evt,
hugepages: self.hugepages,
dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
};
let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
for range in unplugged_memory_ranges.regions() {
handler
.discard_memory_range(range.gpa, range.length)
.map_err(|e| {
error!(
"failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
range.gpa,
range.gpa + range.length - 1,
e
);
ActivateError::BadActivate
})?;
}
let paused = self.common.paused.clone();
let paused_sync = self.common.paused_sync.clone();
let mut epoll_threads = Vec::new();
spawn_virtio_thread(
&self.id,
&self.seccomp_action,
Thread::VirtioMem,
&mut epoll_threads,
&self.exit_evt,
move || {
if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
error!("Error running worker: {:?}", e);
}
},
)?;
self.common.epoll_threads = Some(epoll_threads);
event!("virtio-device", "activated", "id", &self.id);
Ok(())
}
fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
let result = self.common.reset();
event!("virtio-device", "reset", "id", &self.id);
result
}
}
impl Pausable for Mem {
fn pause(&mut self) -> result::Result<(), MigratableError> {
self.common.pause()
}
fn resume(&mut self) -> result::Result<(), MigratableError> {
self.common.resume()
}
}
impl Snapshottable for Mem {
fn id(&self) -> String {
self.id.clone()
}
fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
Snapshot::new_from_versioned_state(&self.id(), &self.state())
}
fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
self.set_state(&snapshot.to_versioned_state(&self.id)?);
Ok(())
}
}
impl Transportable for Mem {}
impl Migratable for Mem {}