vmm: Add MSI-X support to virtio-pci devices

In order to allow virtio-pci devices to use MSI-X messages instead
of legacy pin based interrupts, this patch implements the MSI-X
support for cloud-hypervisor. The VMM code and virtio-pci bits have
been modified based on the "msix" module previously added to the pci
crate.

Fixes #12

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2019-05-29 16:33:29 -07:00 committed by Rob Bradford
parent 13a065d2cd
commit 8df05b72dc
6 changed files with 148 additions and 36 deletions

View File

@ -3,6 +3,7 @@
// found in the LICENSE-BSD-3-Clause file. // found in the LICENSE-BSD-3-Clause file.
use crate::configuration::{self, PciConfiguration}; use crate::configuration::{self, PciConfiguration};
use crate::msix::MsixTableEntry;
use crate::PciInterruptPin; use crate::PciInterruptPin;
use devices::BusDevice; use devices::BusDevice;
use std; use std;
@ -13,6 +14,8 @@ use vm_memory::{GuestAddress, GuestUsize};
use vmm_sys_util::EventFd; use vmm_sys_util::EventFd;
pub type IrqClosure = Box<Fn() -> std::result::Result<(), std::io::Error> + Send + Sync>; pub type IrqClosure = Box<Fn() -> std::result::Result<(), std::io::Error> + Send + Sync>;
pub type MsixClosure =
Box<Fn(MsixTableEntry) -> std::result::Result<(), std::io::Error> + Send + Sync>;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -44,7 +47,16 @@ impl Display for Error {
pub trait PciDevice: BusDevice { pub trait PciDevice: BusDevice {
/// Assign a legacy PCI IRQ to this device. /// Assign a legacy PCI IRQ to this device.
/// The device may write to `irq_evt` to trigger an interrupt. /// The device may write to `irq_evt` to trigger an interrupt.
fn assign_irq(&mut self, _irq_cb: Arc<IrqClosure>, _irq_num: u32, _irq_pin: PciInterruptPin) {} fn assign_pin_irq(
&mut self,
_irq_cb: Arc<IrqClosure>,
_irq_num: u32,
_irq_pin: PciInterruptPin,
) {
}
/// Assign MSI-X to this device.
fn assign_msix(&mut self, _msi_cb: Arc<MsixClosure>) {}
/// Allocates the needed PCI BARs space using the `allocate` function which takes a size and /// Allocates the needed PCI BARs space using the `allocate` function which takes a size and
/// returns an address. Returns a Vec of (GuestAddress, GuestUsize) tuples. /// returns an address. Returns a Vec of (GuestAddress, GuestUsize) tuples.

View File

@ -12,6 +12,7 @@ extern crate vmm_sys_util;
mod configuration; mod configuration;
mod device; mod device;
mod msix;
mod root; mod root;
pub use self::configuration::{ pub use self::configuration::{
@ -20,7 +21,8 @@ pub use self::configuration::{
PciSubclass, PciSubclass,
}; };
pub use self::device::Error as PciDeviceError; pub use self::device::Error as PciDeviceError;
pub use self::device::{IrqClosure, PciDevice}; pub use self::device::{IrqClosure, MsixClosure, PciDevice};
pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry};
pub use self::root::{PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; pub use self::root::{PciConfigIo, PciConfigMmio, PciRoot, PciRootError};
/// PCI has four interrupt pins A->D. /// PCI has four interrupt pins A->D.

View File

@ -218,6 +218,9 @@ pub struct Queue {
/// Inidcates if the queue is finished with configuration /// Inidcates if the queue is finished with configuration
pub ready: bool, pub ready: bool,
/// Interrupt vector index of the queue
pub vector: u16,
/// Guest physical address of the descriptor table /// Guest physical address of the descriptor table
pub desc_table: GuestAddress, pub desc_table: GuestAddress,
@ -238,6 +241,7 @@ impl Queue {
max_size, max_size,
size: max_size, size: max_size,
ready: false, ready: false,
vector: 0,
desc_table: GuestAddress(0), desc_table: GuestAddress(0),
avail_ring: GuestAddress(0), avail_ring: GuestAddress(0),
used_ring: GuestAddress(0), used_ring: GuestAddress(0),

View File

@ -40,6 +40,7 @@ pub struct VirtioPciCommonConfig {
pub device_feature_select: u32, pub device_feature_select: u32,
pub driver_feature_select: u32, pub driver_feature_select: u32,
pub queue_select: u16, pub queue_select: u16,
pub msix_config: u16,
} }
impl VirtioPciCommonConfig { impl VirtioPciCommonConfig {
@ -119,10 +120,11 @@ impl VirtioPciCommonConfig {
fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 { fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 {
debug!("read_common_config_word: offset 0x{:x}", offset); debug!("read_common_config_word: offset 0x{:x}", offset);
match offset { match offset {
0x10 => 0, // TODO msi-x (crbug/854765): self.msix_config, 0x10 => self.msix_config,
0x12 => queues.len() as u16, // num_queues 0x12 => queues.len() as u16, // num_queues
0x16 => self.queue_select, 0x16 => self.queue_select,
0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0), 0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0),
0x1a => self.with_queue(queues, |q| q.vector).unwrap_or(0),
0x1c => { 0x1c => {
if self.with_queue(queues, |q| q.ready).unwrap_or(false) { if self.with_queue(queues, |q| q.ready).unwrap_or(false) {
1 1
@ -141,10 +143,10 @@ impl VirtioPciCommonConfig {
fn write_common_config_word(&mut self, offset: u64, value: u16, queues: &mut Vec<Queue>) { fn write_common_config_word(&mut self, offset: u64, value: u16, queues: &mut Vec<Queue>) {
debug!("write_common_config_word: offset 0x{:x}", offset); debug!("write_common_config_word: offset 0x{:x}", offset);
match offset { match offset {
0x10 => (), // TODO msi-x (crbug/854765): self.msix_config = value, 0x10 => self.msix_config = value,
0x16 => self.queue_select = value, 0x16 => self.queue_select = value,
0x18 => self.with_queue_mut(queues, |q| q.size = value), 0x18 => self.with_queue_mut(queues, |q| q.size = value),
0x1a => (), // TODO msi-x (crbug/854765): self.with_queue_mut(queues, |q| q.msix_vector = v), 0x1a => self.with_queue_mut(queues, |q| q.vector = value),
0x1c => self.with_queue_mut(queues, |q| q.ready = value == 1), 0x1c => self.with_queue_mut(queues, |q| q.ready = value == 1),
_ => { _ => {
warn!("invalid virtio register word write: 0x{:x}", offset); warn!("invalid virtio register word write: 0x{:x}", offset);

View File

@ -16,11 +16,13 @@ use byteorder::{ByteOrder, LittleEndian};
use libc::EFD_NONBLOCK; use libc::EFD_NONBLOCK;
use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::sync::Mutex;
use devices::BusDevice; use devices::BusDevice;
use pci::{ use pci::{
IrqClosure, PciBarConfiguration, PciCapability, PciCapabilityID, PciClassCode, IrqClosure, MsixCap, MsixClosure, MsixConfig, PciBarConfiguration, PciCapability,
PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciInterruptPin, PciSubclass, PciCapabilityID, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType,
PciInterruptPin, PciSubclass,
}; };
use vm_allocator::SystemAllocator; use vm_allocator::SystemAllocator;
use vm_memory::{Address, ByteValued, GuestAddress, GuestMemoryMmap, GuestUsize, Le32}; use vm_memory::{Address, ByteValued, GuestAddress, GuestMemoryMmap, GuestUsize, Le32};
@ -143,7 +145,11 @@ const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
const DEVICE_CONFIG_SIZE: u64 = 0x1000; const DEVICE_CONFIG_SIZE: u64 = 0x1000;
const NOTIFICATION_BAR_OFFSET: u64 = 0x3000; const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
const NOTIFICATION_SIZE: u64 = 0x1000; const NOTIFICATION_SIZE: u64 = 0x1000;
const CAPABILITY_BAR_SIZE: u64 = 0x4000; const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
const MSIX_TABLE_SIZE: u64 = 0x1000;
const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
const MSIX_PBA_SIZE: u64 = 0x1000;
const CAPABILITY_BAR_SIZE: u64 = 0x8000;
const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
@ -157,6 +163,12 @@ pub struct VirtioPciDevice {
// virtio PCI common configuration // virtio PCI common configuration
common_config: VirtioPciCommonConfig, common_config: VirtioPciCommonConfig,
// MSI-X config
msix_config: Arc<Mutex<MsixConfig>>,
// Number of MSI-X vectors
msix_num: u16,
// Virtio device reference and status // Virtio device reference and status
device: Box<VirtioDevice>, device: Box<VirtioDevice>,
device_activated: bool, device_activated: bool,
@ -178,7 +190,7 @@ pub struct VirtioPciDevice {
impl VirtioPciDevice { impl VirtioPciDevice {
/// Constructs a new PCI transport for the given virtio device. /// Constructs a new PCI transport for the given virtio device.
pub fn new(memory: GuestMemoryMmap, device: Box<VirtioDevice>) -> Result<Self> { pub fn new(memory: GuestMemoryMmap, device: Box<VirtioDevice>, msix_num: u16) -> Result<Self> {
let mut queue_evts = Vec::new(); let mut queue_evts = Vec::new();
for _ in device.queue_max_sizes().iter() { for _ in device.queue_max_sizes().iter() {
queue_evts.push(EventFd::new(EFD_NONBLOCK)?) queue_evts.push(EventFd::new(EFD_NONBLOCK)?)
@ -210,7 +222,10 @@ impl VirtioPciDevice {
device_feature_select: 0, device_feature_select: 0,
driver_feature_select: 0, driver_feature_select: 0,
queue_select: 0, queue_select: 0,
msix_config: 0,
}, },
msix_config: Arc::new(Mutex::new(MsixConfig::new(msix_num))),
msix_num,
device, device,
device_activated: false, device_activated: false,
interrupt_status: Arc::new(AtomicUsize::new(0)), interrupt_status: Arc::new(AtomicUsize::new(0)),
@ -302,13 +317,23 @@ impl VirtioPciDevice {
.add_capability(&configuration_cap) .add_capability(&configuration_cap)
.map_err(PciDeviceError::CapabilitiesSetup)?; .map_err(PciDeviceError::CapabilitiesSetup)?;
let msix_cap = MsixCap::new(
settings_bar,
self.msix_num,
MSIX_TABLE_BAR_OFFSET as u32,
MSIX_PBA_BAR_OFFSET as u32,
);
self.configuration
.add_capability(&msix_cap)
.map_err(PciDeviceError::CapabilitiesSetup)?;
self.settings_bar = settings_bar; self.settings_bar = settings_bar;
Ok(()) Ok(())
} }
} }
impl PciDevice for VirtioPciDevice { impl PciDevice for VirtioPciDevice {
fn assign_irq(&mut self, irq_cb: Arc<IrqClosure>, irq_num: u32, irq_pin: PciInterruptPin) { fn assign_pin_irq(&mut self, irq_cb: Arc<IrqClosure>, irq_num: u32, irq_pin: PciInterruptPin) {
self.configuration.set_irq(irq_num as u8, irq_pin); self.configuration.set_irq(irq_num as u8, irq_pin);
let cb = Arc::new(Box::new(move |_queue: &Queue| (irq_cb)()) as VirtioInterrupt); let cb = Arc::new(Box::new(move |_queue: &Queue| (irq_cb)()) as VirtioInterrupt);
@ -316,6 +341,16 @@ impl PciDevice for VirtioPciDevice {
self.interrupt_cb = Some(cb); self.interrupt_cb = Some(cb);
} }
fn assign_msix(&mut self, msi_cb: Arc<MsixClosure>) {
let msix_config = self.msix_config.clone();
let cb = Arc::new(Box::new(move |queue: &Queue| {
(msi_cb)(msix_config.lock().unwrap().table_entries[queue.vector as usize].clone())
}) as VirtioInterruptClosure);
self.interrupt_cb = Some(cb);
}
fn config_registers(&self) -> &PciConfiguration { fn config_registers(&self) -> &PciConfiguration {
&self.configuration &self.configuration
} }
@ -406,6 +441,18 @@ impl PciDevice for VirtioPciDevice {
{ {
// Handled with ioeventfds. // Handled with ioeventfds.
} }
o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => {
self.msix_config
.lock()
.unwrap()
.read_table(o - MSIX_TABLE_BAR_OFFSET, data);
}
o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => {
self.msix_config
.lock()
.unwrap()
.read_pba(o - MSIX_PBA_BAR_OFFSET, data);
}
_ => (), _ => (),
} }
} }
@ -434,6 +481,18 @@ impl PciDevice for VirtioPciDevice {
{ {
// Handled with ioeventfds. // Handled with ioeventfds.
} }
o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => {
self.msix_config
.lock()
.unwrap()
.write_table(o - MSIX_TABLE_BAR_OFFSET, data);
}
o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => {
self.msix_config
.lock()
.unwrap()
.write_pba(o - MSIX_PBA_BAR_OFFSET, data);
}
_ => (), _ => (),
}; };

View File

@ -22,12 +22,14 @@ extern crate vm_virtio;
extern crate vmm_sys_util; extern crate vmm_sys_util;
use crate::config::VmConfig; use crate::config::VmConfig;
use kvm_bindings::{kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY}; use kvm_bindings::{kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY};
use kvm_ioctls::*; use kvm_ioctls::*;
use libc::{c_void, siginfo_t, EFD_NONBLOCK}; use libc::{c_void, siginfo_t, EFD_NONBLOCK};
use linux_loader::loader::KernelLoader; use linux_loader::loader::KernelLoader;
use net_util::Tap; use net_util::Tap;
use pci::{IrqClosure, PciConfigIo, PciDevice, PciInterruptPin, PciRoot}; use pci::{
IrqClosure, MsixClosure, MsixTableEntry, PciConfigIo, PciDevice, PciInterruptPin, PciRoot,
};
use qcow::{self, ImageType, QcowFile}; use qcow::{self, ImageType, QcowFile};
use std::ffi::CString; use std::ffi::CString;
use std::fs::{File, OpenOptions}; use std::fs::{File, OpenOptions};
@ -47,6 +49,7 @@ use vmm_sys_util::EventFd;
const VCPU_RTSIG_OFFSET: i32 = 0; const VCPU_RTSIG_OFFSET: i32 = 0;
const X86_64_IRQ_BASE: u32 = 5; const X86_64_IRQ_BASE: u32 = 5;
const DEFAULT_MSIX_VEC_NUM: u16 = 2;
// CPUID feature bits // CPUID feature bits
const ECX_HYPERVISOR_SHIFT: u32 = 31; // Hypervisor bit. const ECX_HYPERVISOR_SHIFT: u32 = 31; // Hypervisor bit.
@ -303,8 +306,9 @@ impl DeviceManager {
fn new( fn new(
memory: GuestMemoryMmap, memory: GuestMemoryMmap,
allocator: &mut SystemAllocator, allocator: &mut SystemAllocator,
vm_fd: &VmFd, vm_fd: &Arc<VmFd>,
vm_cfg: &VmConfig, vm_cfg: &VmConfig,
msi_capable: bool,
) -> DeviceManagerResult<Self> { ) -> DeviceManagerResult<Self> {
let io_bus = devices::Bus::new(); let io_bus = devices::Bus::new();
let mut mmio_bus = devices::Bus::new(); let mut mmio_bus = devices::Bus::new();
@ -357,6 +361,7 @@ impl DeviceManager {
vm_fd, vm_fd,
&mut pci_root, &mut pci_root,
&mut mmio_bus, &mut mmio_bus,
msi_capable,
)?; )?;
} }
@ -381,6 +386,7 @@ impl DeviceManager {
vm_fd, vm_fd,
&mut pci_root, &mut pci_root,
&mut mmio_bus, &mut mmio_bus,
msi_capable,
)?; )?;
} }
@ -397,6 +403,7 @@ impl DeviceManager {
vm_fd, vm_fd,
&mut pci_root, &mut pci_root,
&mut mmio_bus, &mut mmio_bus,
msi_capable,
)?; )?;
} }
@ -417,11 +424,13 @@ impl DeviceManager {
virtio_device: Box<vm_virtio::VirtioDevice>, virtio_device: Box<vm_virtio::VirtioDevice>,
memory: GuestMemoryMmap, memory: GuestMemoryMmap,
allocator: &mut SystemAllocator, allocator: &mut SystemAllocator,
vm_fd: &VmFd, vm_fd: &Arc<VmFd>,
pci_root: &mut PciRoot, pci_root: &mut PciRoot,
mmio_bus: &mut devices::Bus, mmio_bus: &mut devices::Bus,
msi_capable: bool,
) -> DeviceManagerResult<()> { ) -> DeviceManagerResult<()> {
let mut virtio_pci_device = VirtioPciDevice::new(memory, virtio_device) let mut virtio_pci_device =
VirtioPciDevice::new(memory, virtio_device, DEFAULT_MSIX_VEC_NUM)
.map_err(DeviceManagerError::VirtioDevice)?; .map_err(DeviceManagerError::VirtioDevice)?;
let bars = virtio_pci_device let bars = virtio_pci_device
@ -435,8 +444,32 @@ impl DeviceManager {
.map_err(DeviceManagerError::RegisterIoevent)?; .map_err(DeviceManagerError::RegisterIoevent)?;
} }
// Assign IRQ to the virtio-pci device if msi_capable {
let vm_fd_clone = vm_fd.clone();
let msi_cb = Arc::new(Box::new(move |entry: MsixTableEntry| {
let msi_queue = kvm_msi {
address_lo: entry.msg_addr_lo,
address_hi: entry.msg_addr_hi,
data: entry.msg_data,
flags: 0u32,
devid: 0u32,
pad: [0u8; 12],
};
vm_fd_clone.signal_msi(msi_queue).map(|ret| {
if ret > 0 {
debug!("MSI message successfully delivered");
} else if ret == 0 {
warn!("failed to deliver MSI message, blocked by guest");
}
})
}) as MsixClosure);
virtio_pci_device.assign_msix(msi_cb);
} else {
let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?;
let irq_num = allocator let irq_num = allocator
.allocate_irq() .allocate_irq()
.ok_or(DeviceManagerError::AllocateIrq)?; .ok_or(DeviceManagerError::AllocateIrq)?;
@ -445,9 +478,8 @@ impl DeviceManager {
.map_err(DeviceManagerError::Irq)?; .map_err(DeviceManagerError::Irq)?;
let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure); let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure);
virtio_pci_device.assign_pin_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA);
// Let's use irq line INTA for now. }
virtio_pci_device.assign_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA);
let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device));
@ -542,7 +574,7 @@ impl AsRawFd for EpollContext {
} }
pub struct Vm<'a> { pub struct Vm<'a> {
fd: VmFd, fd: Arc<VmFd>,
kernel: File, kernel: File,
memory: GuestMemoryMmap, memory: GuestMemoryMmap,
vcpus: Vec<thread::JoinHandle<()>>, vcpus: Vec<thread::JoinHandle<()>>,
@ -557,6 +589,7 @@ impl<'a> Vm<'a> {
pub fn new(kvm: &Kvm, config: VmConfig<'a>) -> Result<Self> { pub fn new(kvm: &Kvm, config: VmConfig<'a>) -> Result<Self> {
let kernel = File::open(&config.kernel.path).map_err(Error::KernelFile)?; let kernel = File::open(&config.kernel.path).map_err(Error::KernelFile)?;
let fd = kvm.create_vm().map_err(Error::VmCreate)?; let fd = kvm.create_vm().map_err(Error::VmCreate)?;
let fd = Arc::new(fd);
// Init guest memory // Init guest memory
let arch_mem_regions = arch::arch_memory_regions(u64::from(&config.memory) << 20); let arch_mem_regions = arch::arch_memory_regions(u64::from(&config.memory) << 20);
@ -607,7 +640,13 @@ impl<'a> Vm<'a> {
) )
.ok_or(Error::CreateSystemAllocator)?; .ok_or(Error::CreateSystemAllocator)?;
let device_manager = DeviceManager::new(guest_memory.clone(), &mut allocator, &fd, &config) let device_manager = DeviceManager::new(
guest_memory.clone(),
&mut allocator,
&fd,
&config,
kvm.check_extension(Cap::SignalMsi),
)
.map_err(Error::DeviceManager)?; .map_err(Error::DeviceManager)?;
fd.register_irqfd(device_manager.serial_evt.as_raw_fd(), 4) fd.register_irqfd(device_manager.serial_evt.as_raw_fd(), 4)
.map_err(Error::Irq)?; .map_err(Error::Irq)?;
@ -789,12 +828,6 @@ impl<'a> Vm<'a> {
&self.memory &self.memory
} }
/// Gets a reference to the kvm file descriptor owned by this VM.
///
pub fn get_fd(&self) -> &VmFd {
&self.fd
}
fn patch_cpuid(cpuid: &mut CpuId) { fn patch_cpuid(cpuid: &mut CpuId) {
let entries = cpuid.mut_entries_slice(); let entries = cpuid.mut_entries_slice();