diff --git a/pci/src/configuration.rs b/pci/src/configuration.rs index fe25e28a8..9ec90c9cf 100755 --- a/pci/src/configuration.rs +++ b/pci/src/configuration.rs @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. -use crate::PciInterruptPin; +use std::sync::{Arc, Mutex}; + +use crate::{MsixConfig, PciInterruptPin}; use byteorder::{ByteOrder, LittleEndian}; use std::fmt::{self, Display}; @@ -132,6 +134,7 @@ pub trait PciProgrammingInterface { } /// Types of PCI capabilities. +#[derive(PartialEq)] pub enum PciCapabilityID { ListID = 0, PowerManagement = 0x01, @@ -171,6 +174,8 @@ pub struct PciConfiguration { bar_used: [bool; NUM_BAR_REGS], // Contains the byte offset and size of the last capability. last_capability: Option<(usize, usize)>, + msix_cap_reg_idx: Option, + msix_config: Option>>, } /// See pci_regs.h in kernel @@ -245,6 +250,7 @@ impl PciConfiguration { header_type: PciHeaderType, subsystem_vendor_id: u16, subsystem_id: u16, + msix_config: Option>>, ) -> Self { let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; @@ -278,6 +284,8 @@ impl PciConfiguration { writable_bits, bar_used: [false; NUM_BAR_REGS], last_capability: None, + msix_cap_reg_idx: None, + msix_config, } } @@ -455,6 +463,11 @@ impl PciConfiguration { self.write_byte_internal(cap_offset + i + 2, *byte, false); } self.last_capability = Some((cap_offset, total_len)); + + if cap_data.id() == PciCapabilityID::MSIX { + self.msix_cap_reg_idx = Some(cap_offset / 4); + } + Ok(cap_offset) } @@ -469,6 +482,18 @@ impl PciConfiguration { return; } + // Handle potential write to MSI-X message control register + if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { + if let Some(msix_config) = &self.msix_config { + if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { + msix_config + .lock() + .unwrap() + .set_msg_ctl(LittleEndian::read_u16(data)); + } + } + } + match data.len() { 1 => self.write_byte(reg_idx * 4 + offset as usize, data[0]), 2 => self.write_word( diff --git a/pci/src/device.rs b/pci/src/device.rs index 913b9c24a..cf26731e9 100755 --- a/pci/src/device.rs +++ b/pci/src/device.rs @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. -use crate::configuration::{self, PciConfiguration}; +use crate::configuration; use crate::msix::MsixTableEntry; use crate::PciInterruptPin; use devices::BusDevice; @@ -80,10 +80,6 @@ pub trait PciDevice: BusDevice { fn ioeventfds(&self) -> Vec<(&EventFd, u64, u64)> { Vec::new() } - /// Gets the configuration registers of the Pci Device. - fn config_registers(&self) -> &PciConfiguration; // TODO - remove these - /// Gets the configuration registers of the Pci Device for modification. - fn config_registers_mut(&mut self) -> &mut PciConfiguration; /// Reads from a BAR region mapped in to the device. /// * `addr` - The guest address inside the BAR. /// * `data` - Filled with the data from `addr`. diff --git a/pci/src/msix.rs b/pci/src/msix.rs index cf593bc4f..e29c8f195 100644 --- a/pci/src/msix.rs +++ b/pci/src/msix.rs @@ -17,6 +17,7 @@ const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; const MSIX_PBA_ENTRIES_MODULO: u64 = 8; const BITS_PER_PBA_ENTRY: usize = 64; +const FUNCTION_MASK_BIT: u8 = 14; #[derive(Debug, Clone)] pub struct MsixTableEntry { @@ -26,6 +27,12 @@ pub struct MsixTableEntry { pub vector_ctl: u32, } +impl MsixTableEntry { + pub fn is_masked(&self) -> bool { + self.vector_ctl == 1u32 + } +} + impl Default for MsixTableEntry { fn default() -> Self { MsixTableEntry { @@ -41,6 +48,7 @@ pub struct MsixConfig { pub table_entries: Vec, pub pba_entries: Vec, interrupt_cb: Option>, + masked: bool, } impl MsixConfig { @@ -57,6 +65,7 @@ impl MsixConfig { table_entries, pba_entries, interrupt_cb: None, + masked: false, } } @@ -64,6 +73,28 @@ impl MsixConfig { self.interrupt_cb = Some(cb); } + pub fn is_masked(&self) -> bool { + self.masked + } + + pub fn set_msg_ctl(&mut self, reg: u16) { + let old_masked = self.masked; + + self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; + + // If the Function Mask bit was set, and has just been cleared, it's + // important to go through the entire PBA to check if there was any + // pending MSI-X message to inject, given that the vector is not + // masked. + if old_masked && !self.masked { + for (index, entry) in self.table_entries.clone().iter().enumerate() { + if !entry.is_masked() && self.get_pba_bit(index as u16) == 1 { + self.inject_msix_and_clear_pba(index); + } + } + } + } + pub fn read_table(&mut self, offset: u64, data: &mut [u8]) { assert!((data.len() == 4 || data.len() == 8)); @@ -117,8 +148,8 @@ impl MsixConfig { let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; - // Store the value of the vector control register - let mut old_vector_ctl: Option = None; + // Store the value of the entry before modification + let mut old_entry: Option = None; match data.len() { 4 => { @@ -128,7 +159,7 @@ impl MsixConfig { 0x4 => self.table_entries[index].msg_addr_hi = value, 0x8 => self.table_entries[index].msg_data = value, 0x10 => { - old_vector_ctl = Some(self.table_entries[index].vector_ctl); + old_entry = Some(self.table_entries[index].clone()); self.table_entries[index].vector_ctl = value; } _ => error!("invalid offset"), @@ -144,7 +175,7 @@ impl MsixConfig { self.table_entries[index].msg_addr_hi = (value >> 32) as u32; } 0x8 => { - old_vector_ctl = Some(self.table_entries[index].vector_ctl); + old_entry = Some(self.table_entries[index].clone()); self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; self.table_entries[index].vector_ctl = (value >> 32) as u32; } @@ -161,24 +192,16 @@ impl MsixConfig { // bit has been flipped from 1 to 0, we need to inject a MSI message // if the corresponding pending bit from the PBA is set. Once the MSI // has been injected, the pending bit in the PBA needs to be cleared. - if let Some(old_vector_ctl) = old_vector_ctl { - // Check is bit has been flipped - if old_vector_ctl == 0x0000_0001u32 - && self.table_entries[index].vector_ctl == 0x0000_0000u32 + // All of this is valid only if MSI-X has not been masked for the whole + // device. + if let Some(old_entry) = old_entry { + // Check if bit has been flipped + if !self.is_masked() + && old_entry.is_masked() + && !self.table_entries[index].is_masked() && self.get_pba_bit(index as u16) == 1 { - // Inject the MSI message - if let Some(cb) = &self.interrupt_cb { - match (cb)(InterruptParameters { - msix: Some(&self.table_entries[index]), - }) { - Ok(_) => debug!("MSI-X injected on vector control flip"), - Err(e) => error!("failed to inject MSI-X: {}", e), - }; - } - - // Clear the bit from PBA - self.set_pba_bit(index as u16, true); + self.inject_msix_and_clear_pba(index); } } } @@ -240,7 +263,7 @@ impl MsixConfig { } } - fn get_pba_bit(&mut self, vector: u16) -> u8 { + fn get_pba_bit(&self, vector: u16) -> u8 { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; @@ -248,6 +271,21 @@ impl MsixConfig { ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 } + + fn inject_msix_and_clear_pba(&mut self, vector: usize) { + // Inject the MSI message + if let Some(cb) = &self.interrupt_cb { + match (cb)(InterruptParameters { + msix: Some(&self.table_entries[vector]), + }) { + Ok(_) => debug!("MSI-X injected on vector control flip"), + Err(e) => error!("failed to inject MSI-X: {}", e), + }; + } + + // Clear the bit from PBA + self.set_pba_bit(vector as u16, true); + } } #[allow(dead_code)] diff --git a/pci/src/root.rs b/pci/src/root.rs index 1468f6757..9620434c0 100755 --- a/pci/src/root.rs +++ b/pci/src/root.rs @@ -53,6 +53,7 @@ impl PciRoot { PciHeaderType::Bridge, 0, 0, + None, ), devices: Vec::new(), } diff --git a/vm-virtio/src/transport/pci_device.rs b/vm-virtio/src/transport/pci_device.rs index 7bea369bd..69eb2e8cd 100755 --- a/vm-virtio/src/transport/pci_device.rs +++ b/vm-virtio/src/transport/pci_device.rs @@ -12,7 +12,6 @@ extern crate vm_allocator; extern crate vm_memory; extern crate vmm_sys_util; -use byteorder::{ByteOrder, LittleEndian}; use libc::EFD_NONBLOCK; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -211,6 +210,8 @@ impl VirtioPciDevice { let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16; + let msix_config = Arc::new(Mutex::new(MsixConfig::new(msix_num))); + let configuration = PciConfiguration::new( VIRTIO_PCI_VENDOR_ID, pci_device_id, @@ -220,6 +221,7 @@ impl VirtioPciDevice { PciHeaderType::Device, VIRTIO_PCI_VENDOR_ID, pci_device_id, + Some(msix_config.clone()), ); Ok(VirtioPciDevice { @@ -232,7 +234,7 @@ impl VirtioPciDevice { queue_select: 0, msix_config: 0, }, - msix_config: Arc::new(Mutex::new(MsixConfig::new(msix_num))), + msix_config, msix_num, device, device_activated: false, @@ -366,15 +368,16 @@ impl PciDevice for VirtioPciDevice { let msix_config = self.msix_config.clone(); let cb = Arc::new(Box::new(move |queue: &Queue| { - let entry = &msix_config.lock().unwrap().table_entries[queue.vector as usize]; + let config = &mut msix_config.lock().unwrap(); + let entry = &config.table_entries[queue.vector as usize]; // In case the vector control register associated with the entry // has its first bit set, this means the vector is masked and the // device should not inject the interrupt. // Instead, the Pending Bit Array table is updated to reflect there // is a pending interrupt for this specific vector. - if entry.vector_ctl == 0x0000_0001u32 { - msix_config.lock().unwrap().set_pba_bit(queue.vector, false); + if config.is_masked() || entry.is_masked() { + config.set_pba_bit(queue.vector, false); return Ok(()); } @@ -384,14 +387,6 @@ impl PciDevice for VirtioPciDevice { self.interrupt_cb = Some(cb); } - fn config_registers(&self) -> &PciConfiguration { - &self.configuration - } - - fn config_registers_mut(&mut self) -> &mut PciConfiguration { - &mut self.configuration - } - fn ioeventfds(&self) -> Vec<(&EventFd, u64, u64)> { let bar0 = self .configuration @@ -578,24 +573,11 @@ impl BusDevice for VirtioPciDevice { } fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { - if offset as usize + data.len() > 4 { - return; - } - - let regs = self.config_registers_mut(); - - match data.len() { - 1 => regs.write_byte(reg_idx * 4 + offset as usize, data[0]), - 2 => regs.write_word( - reg_idx * 4 + offset as usize, - u16::from(data[0]) | (u16::from(data[1]) << 8), - ), - 4 => regs.write_reg(reg_idx, LittleEndian::read_u32(data)), - _ => (), - } + self.configuration + .write_config_register(reg_idx, offset, data); } fn read_config_register(&self, reg_idx: usize) -> u32 { - self.config_registers().read_reg(reg_idx) + self.configuration.read_config_register(reg_idx) } }