diff --git a/pci/src/device.rs b/pci/src/device.rs index ca278d2b0..913b9c24a 100755 --- a/pci/src/device.rs +++ b/pci/src/device.rs @@ -13,8 +13,8 @@ use vm_allocator::SystemAllocator; use vm_memory::{GuestAddress, GuestUsize}; use vmm_sys_util::EventFd; -pub struct InterruptParameters { - pub msix: Option, +pub struct InterruptParameters<'a> { + pub msix: Option<&'a MsixTableEntry>, } pub type InterruptDelivery = diff --git a/pci/src/msix.rs b/pci/src/msix.rs index 2e15d113e..cf593bc4f 100644 --- a/pci/src/msix.rs +++ b/pci/src/msix.rs @@ -6,7 +6,10 @@ extern crate byteorder; extern crate vm_memory; -use crate::{PciCapability, PciCapabilityID}; +use std::sync::Arc; + +use crate::device::InterruptParameters; +use crate::{InterruptDelivery, PciCapability, PciCapabilityID}; use byteorder::{ByteOrder, LittleEndian}; use vm_memory::ByteValued; @@ -37,11 +40,12 @@ impl Default for MsixTableEntry { pub struct MsixConfig { pub table_entries: Vec, pub pba_entries: Vec, + interrupt_cb: Option>, } impl MsixConfig { pub fn new(msix_vectors: u16) -> Self { - assert!(msix_vectors < MAX_MSIX_VECTORS_PER_DEVICE); + assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); let mut table_entries: Vec = Vec::new(); table_entries.resize_with(msix_vectors as usize, Default::default); @@ -52,9 +56,14 @@ impl MsixConfig { MsixConfig { table_entries, pba_entries, + interrupt_cb: None, } } + pub fn register_interrupt_cb(&mut self, cb: Arc) { + self.interrupt_cb = Some(cb); + } + pub fn read_table(&mut self, offset: u64, data: &mut [u8]) { assert!((data.len() == 4 || data.len() == 8)); @@ -108,6 +117,9 @@ impl MsixConfig { let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; + // Store the value of the vector control register + let mut old_vector_ctl: Option = None; + match data.len() { 4 => { let value = LittleEndian::read_u32(data); @@ -115,7 +127,10 @@ impl MsixConfig { 0x0 => self.table_entries[index].msg_addr_lo = value, 0x4 => self.table_entries[index].msg_addr_hi = value, 0x8 => self.table_entries[index].msg_data = value, - 0x10 => self.table_entries[index].vector_ctl = value, + 0x10 => { + old_vector_ctl = Some(self.table_entries[index].vector_ctl); + self.table_entries[index].vector_ctl = value; + } _ => error!("invalid offset"), }; @@ -129,6 +144,7 @@ impl MsixConfig { self.table_entries[index].msg_addr_hi = (value >> 32) as u32; } 0x8 => { + old_vector_ctl = Some(self.table_entries[index].vector_ctl); self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; self.table_entries[index].vector_ctl = (value >> 32) as u32; } @@ -139,6 +155,32 @@ impl MsixConfig { } _ => error!("invalid data length"), }; + + // After the MSI-X table entry has been updated, it is necessary to + // check if the vector control masking bit has changed. In case the + // bit has been flipped from 1 to 0, we need to inject a MSI message + // if the corresponding pending bit from the PBA is set. Once the MSI + // has been injected, the pending bit in the PBA needs to be cleared. + if let Some(old_vector_ctl) = old_vector_ctl { + // Check is bit has been flipped + if old_vector_ctl == 0x0000_0001u32 + && self.table_entries[index].vector_ctl == 0x0000_0000u32 + && self.get_pba_bit(index as u16) == 1 + { + // Inject the MSI message + if let Some(cb) = &self.interrupt_cb { + match (cb)(InterruptParameters { + msix: Some(&self.table_entries[index]), + }) { + Ok(_) => debug!("MSI-X injected on vector control flip"), + Err(e) => error!("failed to inject MSI-X: {}", e), + }; + } + + // Clear the bit from PBA + self.set_pba_bit(index as u16, true); + } + } } pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { @@ -182,6 +224,30 @@ impl MsixConfig { pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { error!("Pending Bit Array is read only"); } + + pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { + assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); + + let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; + let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; + let mut mask: u64 = (1 << shift) as u64; + + if reset { + mask = !mask; + self.pba_entries[index] &= mask; + } else { + self.pba_entries[index] |= mask; + } + } + + fn get_pba_bit(&mut self, vector: u16) -> u8 { + assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); + + let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; + let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; + + ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 + } } #[allow(dead_code)] diff --git a/vm-virtio/src/transport/pci_device.rs b/vm-virtio/src/transport/pci_device.rs index 483cd5d13..7bea369bd 100755 --- a/vm-virtio/src/transport/pci_device.rs +++ b/vm-virtio/src/transport/pci_device.rs @@ -358,15 +358,27 @@ impl PciDevice for VirtioPciDevice { } fn assign_msix(&mut self, msi_cb: Arc) { + self.msix_config + .lock() + .unwrap() + .register_interrupt_cb(msi_cb.clone()); + let msix_config = self.msix_config.clone(); let cb = Arc::new(Box::new(move |queue: &Queue| { - let param = InterruptParameters { - msix: Some( - msix_config.lock().unwrap().table_entries[queue.vector as usize].clone(), - ), - }; - (msi_cb)(param) + let entry = &msix_config.lock().unwrap().table_entries[queue.vector as usize]; + + // In case the vector control register associated with the entry + // has its first bit set, this means the vector is masked and the + // device should not inject the interrupt. + // Instead, the Pending Bit Array table is updated to reflect there + // is a pending interrupt for this specific vector. + if entry.vector_ctl == 0x0000_0001u32 { + msix_config.lock().unwrap().set_pba_bit(queue.vector, false); + return Ok(()); + } + + (msi_cb)(InterruptParameters { msix: Some(entry) }) }) as VirtioInterrupt); self.interrupt_cb = Some(cb);