msix: Handle MSI-X device masking

As mentioned in the PCI specification, the Function Mask from the
Message Control Register can be set to prevent a device from injecting
MSI-X messages. This supersedes the vector masking as it interacts at
the device level.

Here quoted from the specification:
For MSI and MSI-X, while a vector is masked, the function is prohibited
from sending the associated message, and the function must set the
associated Pending bit whenever the function would otherwise send the
message. When software unmasks a vector whose associated Pending bit is
set, the function must schedule sending the associated message, and
clear the Pending bit as soon as the message has been sent. Note that
clearing the MSI-X Function Mask bit may result in many messages
needing to be sent.

This commit implements the behavior described above by reorganizing
the way the PCI configuration space is being written. It is indeed
important to be able to catch a change in the Message Control
Register without having to implement it for every PciDevice
implementation. Instead, the PciConfiguration has been modified to
take care of handling any update made to this register.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2019-06-06 18:46:11 -07:00 committed by Rob Bradford
parent d810c7712d
commit 4d98dcb077
5 changed files with 98 additions and 56 deletions

View File

@ -2,7 +2,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE-BSD-3-Clause file.
use crate::PciInterruptPin;
use std::sync::{Arc, Mutex};
use crate::{MsixConfig, PciInterruptPin};
use byteorder::{ByteOrder, LittleEndian};
use std::fmt::{self, Display};
@ -132,6 +134,7 @@ pub trait PciProgrammingInterface {
}
/// Types of PCI capabilities.
#[derive(PartialEq)]
pub enum PciCapabilityID {
ListID = 0,
PowerManagement = 0x01,
@ -171,6 +174,8 @@ pub struct PciConfiguration {
bar_used: [bool; NUM_BAR_REGS],
// Contains the byte offset and size of the last capability.
last_capability: Option<(usize, usize)>,
msix_cap_reg_idx: Option<usize>,
msix_config: Option<Arc<Mutex<MsixConfig>>>,
}
/// See pci_regs.h in kernel
@ -245,6 +250,7 @@ impl PciConfiguration {
header_type: PciHeaderType,
subsystem_vendor_id: u16,
subsystem_id: u16,
msix_config: Option<Arc<Mutex<MsixConfig>>>,
) -> Self {
let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS];
let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS];
@ -278,6 +284,8 @@ impl PciConfiguration {
writable_bits,
bar_used: [false; NUM_BAR_REGS],
last_capability: None,
msix_cap_reg_idx: None,
msix_config,
}
}
@ -455,6 +463,11 @@ impl PciConfiguration {
self.write_byte_internal(cap_offset + i + 2, *byte, false);
}
self.last_capability = Some((cap_offset, total_len));
if cap_data.id() == PciCapabilityID::MSIX {
self.msix_cap_reg_idx = Some(cap_offset / 4);
}
Ok(cap_offset)
}
@ -469,6 +482,18 @@ impl PciConfiguration {
return;
}
// Handle potential write to MSI-X message control register
if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx {
if let Some(msix_config) = &self.msix_config {
if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 {
msix_config
.lock()
.unwrap()
.set_msg_ctl(LittleEndian::read_u16(data));
}
}
}
match data.len() {
1 => self.write_byte(reg_idx * 4 + offset as usize, data[0]),
2 => self.write_word(

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE-BSD-3-Clause file.
use crate::configuration::{self, PciConfiguration};
use crate::configuration;
use crate::msix::MsixTableEntry;
use crate::PciInterruptPin;
use devices::BusDevice;
@ -80,10 +80,6 @@ pub trait PciDevice: BusDevice {
fn ioeventfds(&self) -> Vec<(&EventFd, u64, u64)> {
Vec::new()
}
/// Gets the configuration registers of the Pci Device.
fn config_registers(&self) -> &PciConfiguration; // TODO - remove these
/// Gets the configuration registers of the Pci Device for modification.
fn config_registers_mut(&mut self) -> &mut PciConfiguration;
/// Reads from a BAR region mapped in to the device.
/// * `addr` - The guest address inside the BAR.
/// * `data` - Filled with the data from `addr`.

View File

@ -17,6 +17,7 @@ const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
const BITS_PER_PBA_ENTRY: usize = 64;
const FUNCTION_MASK_BIT: u8 = 14;
#[derive(Debug, Clone)]
pub struct MsixTableEntry {
@ -26,6 +27,12 @@ pub struct MsixTableEntry {
pub vector_ctl: u32,
}
impl MsixTableEntry {
pub fn is_masked(&self) -> bool {
self.vector_ctl == 1u32
}
}
impl Default for MsixTableEntry {
fn default() -> Self {
MsixTableEntry {
@ -41,6 +48,7 @@ pub struct MsixConfig {
pub table_entries: Vec<MsixTableEntry>,
pub pba_entries: Vec<u64>,
interrupt_cb: Option<Arc<InterruptDelivery>>,
masked: bool,
}
impl MsixConfig {
@ -57,6 +65,7 @@ impl MsixConfig {
table_entries,
pba_entries,
interrupt_cb: None,
masked: false,
}
}
@ -64,6 +73,28 @@ impl MsixConfig {
self.interrupt_cb = Some(cb);
}
pub fn is_masked(&self) -> bool {
self.masked
}
pub fn set_msg_ctl(&mut self, reg: u16) {
let old_masked = self.masked;
self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
// If the Function Mask bit was set, and has just been cleared, it's
// important to go through the entire PBA to check if there was any
// pending MSI-X message to inject, given that the vector is not
// masked.
if old_masked && !self.masked {
for (index, entry) in self.table_entries.clone().iter().enumerate() {
if !entry.is_masked() && self.get_pba_bit(index as u16) == 1 {
self.inject_msix_and_clear_pba(index);
}
}
}
}
pub fn read_table(&mut self, offset: u64, data: &mut [u8]) {
assert!((data.len() == 4 || data.len() == 8));
@ -117,8 +148,8 @@ impl MsixConfig {
let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
// Store the value of the vector control register
let mut old_vector_ctl: Option<u32> = None;
// Store the value of the entry before modification
let mut old_entry: Option<MsixTableEntry> = None;
match data.len() {
4 => {
@ -128,7 +159,7 @@ impl MsixConfig {
0x4 => self.table_entries[index].msg_addr_hi = value,
0x8 => self.table_entries[index].msg_data = value,
0x10 => {
old_vector_ctl = Some(self.table_entries[index].vector_ctl);
old_entry = Some(self.table_entries[index].clone());
self.table_entries[index].vector_ctl = value;
}
_ => error!("invalid offset"),
@ -144,7 +175,7 @@ impl MsixConfig {
self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
}
0x8 => {
old_vector_ctl = Some(self.table_entries[index].vector_ctl);
old_entry = Some(self.table_entries[index].clone());
self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
self.table_entries[index].vector_ctl = (value >> 32) as u32;
}
@ -161,24 +192,16 @@ impl MsixConfig {
// bit has been flipped from 1 to 0, we need to inject a MSI message
// if the corresponding pending bit from the PBA is set. Once the MSI
// has been injected, the pending bit in the PBA needs to be cleared.
if let Some(old_vector_ctl) = old_vector_ctl {
// Check is bit has been flipped
if old_vector_ctl == 0x0000_0001u32
&& self.table_entries[index].vector_ctl == 0x0000_0000u32
// All of this is valid only if MSI-X has not been masked for the whole
// device.
if let Some(old_entry) = old_entry {
// Check if bit has been flipped
if !self.is_masked()
&& old_entry.is_masked()
&& !self.table_entries[index].is_masked()
&& self.get_pba_bit(index as u16) == 1
{
// Inject the MSI message
if let Some(cb) = &self.interrupt_cb {
match (cb)(InterruptParameters {
msix: Some(&self.table_entries[index]),
}) {
Ok(_) => debug!("MSI-X injected on vector control flip"),
Err(e) => error!("failed to inject MSI-X: {}", e),
};
}
// Clear the bit from PBA
self.set_pba_bit(index as u16, true);
self.inject_msix_and_clear_pba(index);
}
}
}
@ -240,7 +263,7 @@ impl MsixConfig {
}
}
fn get_pba_bit(&mut self, vector: u16) -> u8 {
fn get_pba_bit(&self, vector: u16) -> u8 {
assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
@ -248,6 +271,21 @@ impl MsixConfig {
((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
}
fn inject_msix_and_clear_pba(&mut self, vector: usize) {
// Inject the MSI message
if let Some(cb) = &self.interrupt_cb {
match (cb)(InterruptParameters {
msix: Some(&self.table_entries[vector]),
}) {
Ok(_) => debug!("MSI-X injected on vector control flip"),
Err(e) => error!("failed to inject MSI-X: {}", e),
};
}
// Clear the bit from PBA
self.set_pba_bit(vector as u16, true);
}
}
#[allow(dead_code)]

View File

@ -53,6 +53,7 @@ impl PciRoot {
PciHeaderType::Bridge,
0,
0,
None,
),
devices: Vec::new(),
}

View File

@ -12,7 +12,6 @@ extern crate vm_allocator;
extern crate vm_memory;
extern crate vmm_sys_util;
use byteorder::{ByteOrder, LittleEndian};
use libc::EFD_NONBLOCK;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
@ -211,6 +210,8 @@ impl VirtioPciDevice {
let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
let msix_config = Arc::new(Mutex::new(MsixConfig::new(msix_num)));
let configuration = PciConfiguration::new(
VIRTIO_PCI_VENDOR_ID,
pci_device_id,
@ -220,6 +221,7 @@ impl VirtioPciDevice {
PciHeaderType::Device,
VIRTIO_PCI_VENDOR_ID,
pci_device_id,
Some(msix_config.clone()),
);
Ok(VirtioPciDevice {
@ -232,7 +234,7 @@ impl VirtioPciDevice {
queue_select: 0,
msix_config: 0,
},
msix_config: Arc::new(Mutex::new(MsixConfig::new(msix_num))),
msix_config,
msix_num,
device,
device_activated: false,
@ -366,15 +368,16 @@ impl PciDevice for VirtioPciDevice {
let msix_config = self.msix_config.clone();
let cb = Arc::new(Box::new(move |queue: &Queue| {
let entry = &msix_config.lock().unwrap().table_entries[queue.vector as usize];
let config = &mut msix_config.lock().unwrap();
let entry = &config.table_entries[queue.vector as usize];
// In case the vector control register associated with the entry
// has its first bit set, this means the vector is masked and the
// device should not inject the interrupt.
// Instead, the Pending Bit Array table is updated to reflect there
// is a pending interrupt for this specific vector.
if entry.vector_ctl == 0x0000_0001u32 {
msix_config.lock().unwrap().set_pba_bit(queue.vector, false);
if config.is_masked() || entry.is_masked() {
config.set_pba_bit(queue.vector, false);
return Ok(());
}
@ -384,14 +387,6 @@ impl PciDevice for VirtioPciDevice {
self.interrupt_cb = Some(cb);
}
fn config_registers(&self) -> &PciConfiguration {
&self.configuration
}
fn config_registers_mut(&mut self) -> &mut PciConfiguration {
&mut self.configuration
}
fn ioeventfds(&self) -> Vec<(&EventFd, u64, u64)> {
let bar0 = self
.configuration
@ -578,24 +573,11 @@ impl BusDevice for VirtioPciDevice {
}
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
if offset as usize + data.len() > 4 {
return;
}
let regs = self.config_registers_mut();
match data.len() {
1 => regs.write_byte(reg_idx * 4 + offset as usize, data[0]),
2 => regs.write_word(
reg_idx * 4 + offset as usize,
u16::from(data[0]) | (u16::from(data[1]) << 8),
),
4 => regs.write_reg(reg_idx, LittleEndian::read_u32(data)),
_ => (),
}
self.configuration
.write_config_register(reg_idx, offset, data);
}
fn read_config_register(&self, reg_idx: usize) -> u32 {
self.config_registers().read_reg(reg_idx)
self.configuration.read_config_register(reg_idx)
}
}