vm-virtio: Translate addresses for devices attached to IOMMU

In case some virtio devices are attached to the virtual IOMMU, their
vring addresses need to be translated from IOVA into GPA. Otherwise it
makes no sense to try to access them, and they would cause out of range
errors.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2019-10-02 00:10:42 -07:00 committed by Samuel Ortiz
parent 6566c739e1
commit 0acb1e329d
5 changed files with 94 additions and 20 deletions

View File

@ -22,6 +22,9 @@ pub type VirtioInterrupt = Box<
+ Sync, + Sync,
>; >;
pub type VirtioIommuRemapping =
Box<dyn Fn(u64) -> std::result::Result<u64, std::io::Error> + Send + Sync>;
#[derive(Clone)] #[derive(Clone)]
pub struct VirtioSharedMemory { pub struct VirtioSharedMemory {
pub offset: u64, pub offset: u64,
@ -83,4 +86,18 @@ pub trait VirtioDevice: Send {
fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> { fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> {
None None
} }
fn iommu_translate(&self, addr: u64) -> u64 {
addr
}
}
/// Trait providing address translation the same way a physical DMA remapping
/// table would provide translation between an IOVA and a physical address.
/// The goal of this trait is to be used by virtio devices to perform the
/// address translation before they try to read from the guest physical address.
/// On the other side, the implementation itself should be provided by the code
/// emulating the IOMMU for the guest.
pub trait DmaRemapping: Send + Sync {
fn translate(&self, id: u32, addr: u64) -> std::result::Result<u64, std::io::Error>;
} }

View File

@ -11,7 +11,9 @@
use std::cmp::min; use std::cmp::min;
use std::num::Wrapping; use std::num::Wrapping;
use std::sync::atomic::{fence, Ordering}; use std::sync::atomic::{fence, Ordering};
use std::sync::Arc;
use crate::device::VirtioIommuRemapping;
use vm_memory::{ use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize, Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize,
}; };
@ -44,6 +46,7 @@ pub struct DescriptorChain<'a> {
desc_table: GuestAddress, desc_table: GuestAddress,
queue_size: u16, queue_size: u16,
ttl: u16, // used to prevent infinite chain cycles ttl: u16, // used to prevent infinite chain cycles
iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
/// Reference to guest memory /// Reference to guest memory
pub mem: &'a GuestMemoryMmap, pub mem: &'a GuestMemoryMmap,
@ -71,6 +74,7 @@ impl<'a> DescriptorChain<'a> {
desc_table: GuestAddress, desc_table: GuestAddress,
queue_size: u16, queue_size: u16,
index: u16, index: u16,
iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
) -> Option<DescriptorChain> { ) -> Option<DescriptorChain> {
if index >= queue_size { if index >= queue_size {
return None; return None;
@ -91,16 +95,25 @@ impl<'a> DescriptorChain<'a> {
return None; return None;
} }
}; };
// Translate address if necessary
let desc_addr = if let Some(iommu_mapping_cb) = &iommu_mapping_cb {
(iommu_mapping_cb)(desc.addr).unwrap()
} else {
desc.addr
};
let chain = DescriptorChain { let chain = DescriptorChain {
mem, mem,
desc_table, desc_table,
queue_size, queue_size,
ttl: queue_size, ttl: queue_size,
index, index,
addr: GuestAddress(desc.addr), addr: GuestAddress(desc_addr),
len: desc.len, len: desc.len,
flags: desc.flags, flags: desc.flags,
next: desc.next, next: desc.next,
iommu_mapping_cb,
}; };
if chain.is_valid() { if chain.is_valid() {
@ -137,12 +150,17 @@ impl<'a> DescriptorChain<'a> {
/// the head of the next _available_ descriptor chain. /// the head of the next _available_ descriptor chain.
pub fn next_descriptor(&self) -> Option<DescriptorChain<'a>> { pub fn next_descriptor(&self) -> Option<DescriptorChain<'a>> {
if self.has_next() { if self.has_next() {
DescriptorChain::checked_new(self.mem, self.desc_table, self.queue_size, self.next).map( DescriptorChain::checked_new(
|mut c| { self.mem,
self.desc_table,
self.queue_size,
self.next,
self.iommu_mapping_cb.clone(),
)
.map(|mut c| {
c.ttl = self.ttl - 1; c.ttl = self.ttl - 1;
c c
}, })
)
} else { } else {
None None
} }
@ -158,6 +176,7 @@ pub struct AvailIter<'a, 'b> {
last_index: Wrapping<u16>, last_index: Wrapping<u16>,
queue_size: u16, queue_size: u16,
next_avail: &'b mut Wrapping<u16>, next_avail: &'b mut Wrapping<u16>,
iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
} }
impl<'a, 'b> AvailIter<'a, 'b> { impl<'a, 'b> AvailIter<'a, 'b> {
@ -170,6 +189,7 @@ impl<'a, 'b> AvailIter<'a, 'b> {
last_index: Wrapping(0), last_index: Wrapping(0),
queue_size: 0, queue_size: 0,
next_avail: q_next_avail, next_avail: q_next_avail,
iommu_mapping_cb: None,
} }
} }
} }
@ -199,8 +219,13 @@ impl<'a, 'b> Iterator for AvailIter<'a, 'b> {
self.next_index += Wrapping(1); self.next_index += Wrapping(1);
let ret = let ret = DescriptorChain::checked_new(
DescriptorChain::checked_new(self.mem, self.desc_table, self.queue_size, desc_index); self.mem,
self.desc_table,
self.queue_size,
desc_index,
self.iommu_mapping_cb.clone(),
);
if ret.is_some() { if ret.is_some() {
*self.next_avail += Wrapping(1); *self.next_avail += Wrapping(1);
} }
@ -234,6 +259,8 @@ pub struct Queue {
pub next_avail: Wrapping<u16>, pub next_avail: Wrapping<u16>,
pub next_used: Wrapping<u16>, pub next_used: Wrapping<u16>,
pub iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
} }
impl Queue { impl Queue {
@ -249,6 +276,7 @@ impl Queue {
used_ring: GuestAddress(0), used_ring: GuestAddress(0),
next_avail: Wrapping(0), next_avail: Wrapping(0),
next_used: Wrapping(0), next_used: Wrapping(0),
iommu_mapping_cb: None,
} }
} }
@ -256,6 +284,26 @@ impl Queue {
self.max_size self.max_size
} }
pub fn enable(&mut self, set: bool) {
self.ready = set;
if set {
// Translate address of descriptor table and vrings.
if let Some(iommu_mapping_cb) = &self.iommu_mapping_cb {
self.desc_table =
GuestAddress((iommu_mapping_cb)(self.desc_table.raw_value()).unwrap());
self.avail_ring =
GuestAddress((iommu_mapping_cb)(self.avail_ring.raw_value()).unwrap());
self.used_ring =
GuestAddress((iommu_mapping_cb)(self.used_ring.raw_value()).unwrap());
}
} else {
self.desc_table = GuestAddress(0);
self.avail_ring = GuestAddress(0);
self.used_ring = GuestAddress(0);
}
}
/// Return the actual size of the queue, as the driver may not set up a /// Return the actual size of the queue, as the driver may not set up a
/// queue as big as the device allows. /// queue as big as the device allows.
pub fn actual_size(&self) -> u16 { pub fn actual_size(&self) -> u16 {
@ -354,6 +402,7 @@ impl Queue {
last_index: Wrapping(last_index), last_index: Wrapping(last_index),
queue_size, queue_size,
next_avail: &mut self.next_avail, next_avail: &mut self.next_avail,
iommu_mapping_cb: self.iommu_mapping_cb.clone(),
} }
} }
@ -647,14 +696,16 @@ pub(crate) mod tests {
assert!(vq.end().0 < 0x1000); assert!(vq.end().0 < 0x1000);
// index >= queue_size // index >= queue_size
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 16).is_none()); assert!(DescriptorChain::checked_new(m, vq.start(), 16, 16, None).is_none());
// desc_table address is way off // desc_table address is way off
assert!(DescriptorChain::checked_new(m, GuestAddress(0x00ff_ffff_ffff), 16, 0).is_none()); assert!(
DescriptorChain::checked_new(m, GuestAddress(0x00ff_ffff_ffff), 16, 0, None).is_none()
);
// the addr field of the descriptor is way off // the addr field of the descriptor is way off
vq.dtable[0].addr.set(0x0fff_ffff_ffff); vq.dtable[0].addr.set(0x0fff_ffff_ffff);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none()); assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0, None).is_none());
// let's create some invalid chains // let's create some invalid chains
@ -663,7 +714,7 @@ pub(crate) mod tests {
vq.dtable[0].addr.set(0x1000); vq.dtable[0].addr.set(0x1000);
// ...but the length is too large // ...but the length is too large
vq.dtable[0].len.set(0xffff_ffff); vq.dtable[0].len.set(0xffff_ffff);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none()); assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0, None).is_none());
} }
{ {
@ -673,7 +724,7 @@ pub(crate) mod tests {
//..but the the index of the next descriptor is too large //..but the the index of the next descriptor is too large
vq.dtable[0].next.set(16); vq.dtable[0].next.set(16);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none()); assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0, None).is_none());
} }
// finally, let's test an ok chain // finally, let's test an ok chain
@ -682,7 +733,7 @@ pub(crate) mod tests {
vq.dtable[0].next.set(1); vq.dtable[0].next.set(1);
vq.dtable[1].set(0x2000, 0x1000, 0, 0); vq.dtable[1].set(0x2000, 0x1000, 0, 0);
let c = DescriptorChain::checked_new(m, vq.start(), 16, 0).unwrap(); let c = DescriptorChain::checked_new(m, vq.start(), 16, 0, None).unwrap();
assert_eq!(c.mem as *const GuestMemoryMmap, m as *const GuestMemoryMmap); assert_eq!(c.mem as *const GuestMemoryMmap, m as *const GuestMemoryMmap);
assert_eq!(c.desc_table, vq.start()); assert_eq!(c.desc_table, vq.start());

View File

@ -149,7 +149,7 @@ impl VirtioPciCommonConfig {
0x16 => self.queue_select = value, 0x16 => self.queue_select = value,
0x18 => self.with_queue_mut(queues, |q| q.size = value), 0x18 => self.with_queue_mut(queues, |q| q.size = value),
0x1a => self.with_queue_mut(queues, |q| q.vector = value), 0x1a => self.with_queue_mut(queues, |q| q.vector = value),
0x1c => self.with_queue_mut(queues, |q| q.ready = value == 1), 0x1c => self.with_queue_mut(queues, |q| q.enable(value == 1)),
_ => { _ => {
warn!("invalid virtio register word write: 0x{:x}", offset); warn!("invalid virtio register word write: 0x{:x}", offset);
} }

View File

@ -31,8 +31,8 @@ use vmm_sys_util::{errno::Result, eventfd::EventFd};
use super::VirtioPciCommonConfig; use super::VirtioPciCommonConfig;
use crate::{ use crate::{
Queue, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType, Queue, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType,
DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, DEVICE_FEATURES_OK, VirtioIommuRemapping, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED,
DEVICE_INIT, INTERRUPT_STATUS_CONFIG_CHANGED, INTERRUPT_STATUS_USED_RING, DEVICE_FEATURES_OK, DEVICE_INIT, INTERRUPT_STATUS_CONFIG_CHANGED, INTERRUPT_STATUS_USED_RING,
}; };
#[allow(clippy::enum_variant_names)] #[allow(clippy::enum_variant_names)]
@ -250,6 +250,7 @@ impl VirtioPciDevice {
memory: Arc<RwLock<GuestMemoryMmap>>, memory: Arc<RwLock<GuestMemoryMmap>>,
device: Box<dyn VirtioDevice>, device: Box<dyn VirtioDevice>,
msix_num: u16, msix_num: u16,
iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
) -> Result<Self> { ) -> Result<Self> {
let mut queue_evts = Vec::new(); let mut queue_evts = Vec::new();
for _ in device.queue_max_sizes().iter() { for _ in device.queue_max_sizes().iter() {
@ -258,7 +259,11 @@ impl VirtioPciDevice {
let queues = device let queues = device
.queue_max_sizes() .queue_max_sizes()
.iter() .iter()
.map(|&s| Queue::new(s)) .map(|&s| {
let mut queue = Queue::new(s);
queue.iommu_mapping_cb = iommu_mapping_cb.clone();
queue
})
.collect(); .collect();
let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16; let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;

View File

@ -943,7 +943,8 @@ impl DeviceManager {
0 0
}; };
let mut virtio_pci_device = VirtioPciDevice::new(memory.clone(), virtio_device, msix_num) let mut virtio_pci_device =
VirtioPciDevice::new(memory.clone(), virtio_device, msix_num, None)
.map_err(DeviceManagerError::VirtioDevice)?; .map_err(DeviceManagerError::VirtioDevice)?;
let bars = virtio_pci_device let bars = virtio_pci_device