virtio-devices: mem: Accept handlers to update DMA mappings

Create two functions for registering/unregistering DMA mapping handlers,
each handler being associated with a VFIO device.

Whenever the plugged_size is modified (which means triggered by the
virtio-mem driver in the guest), the virtio-mem backend is responsible
for updating the DMA mappings related to every VFIO device through the
handler previously provided.

It's important to update the map when the handler is either registered
or unregistered as well, as we don't want to miss some plugged memory
that would have been added before the VFIO device is added to the VM.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2021-03-02 18:47:01 +01:00
parent 080ea31813
commit 61f9a4ec6c
2 changed files with 166 additions and 20 deletions

View File

@ -24,6 +24,7 @@ use crate::{VirtioInterrupt, VirtioInterruptType};
use anyhow::anyhow; use anyhow::anyhow;
use libc::EFD_NONBLOCK; use libc::EFD_NONBLOCK;
use seccomp::{SeccompAction, SeccompFilter}; use seccomp::{SeccompAction, SeccompFilter};
use std::collections::BTreeMap;
use std::io; use std::io;
use std::mem::size_of; use std::mem::size_of;
use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::io::{AsRawFd, RawFd};
@ -32,6 +33,7 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::mpsc; use std::sync::mpsc;
use std::sync::{Arc, Barrier, Mutex}; use std::sync::{Arc, Barrier, Mutex};
use std::thread; use std::thread;
use vm_device::dma_mapping::ExternalDmaMapping;
use vm_memory::{ use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
GuestMemoryError, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, GuestMemoryError, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap,
@ -124,6 +126,12 @@ pub enum Error {
ValidateError(anyhow::Error), ValidateError(anyhow::Error),
// Failed discarding memory range // Failed discarding memory range
DiscardMemoryRange(std::io::Error), DiscardMemoryRange(std::io::Error),
// Failed DMA mapping.
DmaMap(std::io::Error),
// Failed DMA unmapping.
DmaUnmap(std::io::Error),
// Invalid DMA mapping handler
InvalidDmaMappingHandler,
} }
#[repr(C)] #[repr(C)]
@ -401,12 +409,16 @@ impl BlocksState {
*state = plug; *state = plug;
} }
} }
fn inner(&self) -> &Vec<bool> {
&self.0
}
} }
struct MemEpollHandler { struct MemEpollHandler {
host_addr: u64, host_addr: u64,
host_fd: Option<RawFd>, host_fd: Option<RawFd>,
blocks_state: BlocksState, blocks_state: Arc<Mutex<BlocksState>>,
config: Arc<Mutex<VirtioMemConfig>>, config: Arc<Mutex<VirtioMemConfig>>,
resize: ResizeSender, resize: ResizeSender,
queue: Queue, queue: Queue,
@ -416,6 +428,7 @@ struct MemEpollHandler {
kill_evt: EventFd, kill_evt: EventFd,
pause_evt: EventFd, pause_evt: EventFd,
hugepages: bool, hugepages: bool,
dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
} }
impl MemEpollHandler { impl MemEpollHandler {
@ -473,6 +486,8 @@ impl MemEpollHandler {
let first_block_index = (offset / config.block_size) as usize; let first_block_index = (offset / config.block_size) as usize;
if !self if !self
.blocks_state .blocks_state
.lock()
.unwrap()
.is_range_state(first_block_index, nb_blocks, !plug) .is_range_state(first_block_index, nb_blocks, !plug)
{ {
return VIRTIO_MEM_RESP_ERROR; return VIRTIO_MEM_RESP_ERROR;
@ -486,11 +501,40 @@ impl MemEpollHandler {
} }
self.blocks_state self.blocks_state
.lock()
.unwrap()
.set_range(first_block_index, nb_blocks, plug); .set_range(first_block_index, nb_blocks, plug);
if plug { if plug {
let handlers = self.dma_mapping_handlers.lock().unwrap();
let mut gpa = addr;
for _ in 0..nb_blocks {
for (_, handler) in handlers.iter() {
if let Err(e) = handler.map(gpa, gpa, config.block_size) {
error!(
"failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
gpa, config.block_size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
gpa += config.block_size;
}
config.plugged_size += size; config.plugged_size += size;
} else { } else {
let handlers = self.dma_mapping_handlers.lock().unwrap();
for (_, handler) in handlers.iter() {
if let Err(e) = handler.unmap(addr, size) {
error!(
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
addr, size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
config.plugged_size -= size; config.plugged_size -= size;
} }
@ -504,8 +548,30 @@ impl MemEpollHandler {
return VIRTIO_MEM_RESP_ERROR; return VIRTIO_MEM_RESP_ERROR;
} }
self.blocks_state // Remaining plugged blocks are unmapped.
.set_range(0, (config.region_size / config.block_size) as u16, false); if config.plugged_size > 0 {
let handlers = self.dma_mapping_handlers.lock().unwrap();
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
for (_, handler) in handlers.iter() {
if let Err(e) = handler.unmap(gpa, config.block_size) {
error!(
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
gpa, config.block_size, e
);
return VIRTIO_MEM_RESP_ERROR;
}
}
}
}
}
self.blocks_state.lock().unwrap().set_range(
0,
(config.region_size / config.block_size) as u16,
false,
);
config.plugged_size = 0; config.plugged_size = 0;
@ -524,19 +590,23 @@ impl MemEpollHandler {
let offset = addr - config.addr; let offset = addr - config.addr;
let first_block_index = (offset / config.block_size) as usize; let first_block_index = (offset / config.block_size) as usize;
let resp_state = if self let resp_state =
.blocks_state if self
.is_range_state(first_block_index, nb_blocks, true) .blocks_state
{ .lock()
VIRTIO_MEM_STATE_PLUGGED .unwrap()
} else if self .is_range_state(first_block_index, nb_blocks, true)
.blocks_state {
.is_range_state(first_block_index, nb_blocks, false) VIRTIO_MEM_STATE_PLUGGED
{ } else if self.blocks_state.lock().unwrap().is_range_state(
VIRTIO_MEM_STATE_UNPLUGGED first_block_index,
} else { nb_blocks,
VIRTIO_MEM_STATE_MIXED false,
}; ) {
VIRTIO_MEM_STATE_UNPLUGGED
} else {
VIRTIO_MEM_STATE_MIXED
};
(resp_type, resp_state) (resp_type, resp_state)
} }
@ -675,6 +745,8 @@ pub struct Mem {
config: Arc<Mutex<VirtioMemConfig>>, config: Arc<Mutex<VirtioMemConfig>>,
seccomp_action: SeccompAction, seccomp_action: SeccompAction,
hugepages: bool, hugepages: bool,
dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
blocks_state: Arc<Mutex<BlocksState>>,
} }
impl Mem { impl Mem {
@ -758,8 +830,64 @@ impl Mem {
config: Arc::new(Mutex::new(config)), config: Arc::new(Mutex::new(config)),
seccomp_action, seccomp_action,
hugepages, hugepages,
dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
blocks_state: Arc::new(Mutex::new(BlocksState(vec![
false;
(config.region_size / config.block_size)
as usize
]))),
}) })
} }
pub fn add_dma_mapping_handler(
&mut self,
device_id: u32,
handler: Arc<dyn ExternalDmaMapping>,
) -> result::Result<(), Error> {
let config = self.config.lock().unwrap();
if config.plugged_size > 0 {
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
handler
.map(gpa, gpa, config.block_size)
.map_err(Error::DmaMap)?;
}
}
}
self.dma_mapping_handlers
.lock()
.unwrap()
.insert(device_id, handler);
Ok(())
}
pub fn remove_dma_mapping_handler(&mut self, device_id: u32) -> result::Result<(), Error> {
let handler = self
.dma_mapping_handlers
.lock()
.unwrap()
.remove(&device_id)
.ok_or(Error::InvalidDmaMappingHandler)?;
let config = self.config.lock().unwrap();
if config.plugged_size > 0 {
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
if *plugged {
let gpa = config.addr + (idx as u64 * config.block_size);
handler
.unmap(gpa, config.block_size)
.map_err(Error::DmaUnmap)?;
}
}
}
Ok(())
}
} }
impl Drop for Mem { impl Drop for Mem {
@ -825,10 +953,7 @@ impl VirtioDevice for Mem {
let mut handler = MemEpollHandler { let mut handler = MemEpollHandler {
host_addr: self.host_addr, host_addr: self.host_addr,
host_fd: self.host_fd, host_fd: self.host_fd,
blocks_state: BlocksState(vec![ blocks_state: Arc::clone(&self.blocks_state),
false;
(config.region_size / config.block_size) as usize
]),
config: self.config.clone(), config: self.config.clone(),
resize: self.resize.clone(), resize: self.resize.clone(),
queue: queues.remove(0), queue: queues.remove(0),
@ -838,6 +963,7 @@ impl VirtioDevice for Mem {
kill_evt, kill_evt,
pause_evt, pause_evt,
hugepages: self.hugepages, hugepages: self.hugepages,
dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
}; };
handler handler

View File

@ -55,6 +55,24 @@ const SYS_IO_URING_ENTER: i64 = 426;
// See include/uapi/asm-generic/ioctls.h in the kernel code. // See include/uapi/asm-generic/ioctls.h in the kernel code.
const FIONBIO: u64 = 0x5421; const FIONBIO: u64 = 0x5421;
// See include/uapi/linux/vfio.h in the kernel code.
const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71;
const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72;
fn create_virtio_iommu_ioctl_seccomp_rule() -> Vec<SeccompRule> {
or![
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_MAP_DMA).unwrap()],
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA).unwrap()],
]
}
fn create_virtio_mem_ioctl_seccomp_rule() -> Vec<SeccompRule> {
or![
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_MAP_DMA).unwrap()],
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA).unwrap()],
]
}
fn virtio_balloon_thread_rules() -> Vec<SyscallRuleSet> { fn virtio_balloon_thread_rules() -> Vec<SyscallRuleSet> {
vec![ vec![
allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_brk),
@ -156,6 +174,7 @@ fn virtio_iommu_thread_rules() -> Vec<SyscallRuleSet> {
allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_epoll_wait),
allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_exit),
allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_futex),
allow_syscall_if(libc::SYS_ioctl, create_virtio_iommu_ioctl_seccomp_rule()),
allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_madvise),
allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mmap),
allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_mprotect),
@ -179,6 +198,7 @@ fn virtio_mem_thread_rules() -> Vec<SyscallRuleSet> {
allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_exit),
allow_syscall(libc::SYS_fallocate), allow_syscall(libc::SYS_fallocate),
allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_futex),
allow_syscall_if(libc::SYS_ioctl, create_virtio_mem_ioctl_seccomp_rule()),
allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_madvise),
allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_munmap),
allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_read),