mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-01 17:35:19 +00:00
vmm: Add MSI-X support to virtio-pci devices
In order to allow virtio-pci devices to use MSI-X messages instead of legacy pin based interrupts, this patch implements the MSI-X support for cloud-hypervisor. The VMM code and virtio-pci bits have been modified based on the "msix" module previously added to the pci crate. Fixes #12 Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
parent
13a065d2cd
commit
8df05b72dc
@ -3,6 +3,7 @@
|
||||
// found in the LICENSE-BSD-3-Clause file.
|
||||
|
||||
use crate::configuration::{self, PciConfiguration};
|
||||
use crate::msix::MsixTableEntry;
|
||||
use crate::PciInterruptPin;
|
||||
use devices::BusDevice;
|
||||
use std;
|
||||
@ -13,6 +14,8 @@ use vm_memory::{GuestAddress, GuestUsize};
|
||||
use vmm_sys_util::EventFd;
|
||||
|
||||
pub type IrqClosure = Box<Fn() -> std::result::Result<(), std::io::Error> + Send + Sync>;
|
||||
pub type MsixClosure =
|
||||
Box<Fn(MsixTableEntry) -> std::result::Result<(), std::io::Error> + Send + Sync>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
@ -44,7 +47,16 @@ impl Display for Error {
|
||||
pub trait PciDevice: BusDevice {
|
||||
/// Assign a legacy PCI IRQ to this device.
|
||||
/// The device may write to `irq_evt` to trigger an interrupt.
|
||||
fn assign_irq(&mut self, _irq_cb: Arc<IrqClosure>, _irq_num: u32, _irq_pin: PciInterruptPin) {}
|
||||
fn assign_pin_irq(
|
||||
&mut self,
|
||||
_irq_cb: Arc<IrqClosure>,
|
||||
_irq_num: u32,
|
||||
_irq_pin: PciInterruptPin,
|
||||
) {
|
||||
}
|
||||
|
||||
/// Assign MSI-X to this device.
|
||||
fn assign_msix(&mut self, _msi_cb: Arc<MsixClosure>) {}
|
||||
|
||||
/// Allocates the needed PCI BARs space using the `allocate` function which takes a size and
|
||||
/// returns an address. Returns a Vec of (GuestAddress, GuestUsize) tuples.
|
||||
|
@ -12,6 +12,7 @@ extern crate vmm_sys_util;
|
||||
|
||||
mod configuration;
|
||||
mod device;
|
||||
mod msix;
|
||||
mod root;
|
||||
|
||||
pub use self::configuration::{
|
||||
@ -20,7 +21,8 @@ pub use self::configuration::{
|
||||
PciSubclass,
|
||||
};
|
||||
pub use self::device::Error as PciDeviceError;
|
||||
pub use self::device::{IrqClosure, PciDevice};
|
||||
pub use self::device::{IrqClosure, MsixClosure, PciDevice};
|
||||
pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry};
|
||||
pub use self::root::{PciConfigIo, PciConfigMmio, PciRoot, PciRootError};
|
||||
|
||||
/// PCI has four interrupt pins A->D.
|
||||
|
@ -218,6 +218,9 @@ pub struct Queue {
|
||||
/// Inidcates if the queue is finished with configuration
|
||||
pub ready: bool,
|
||||
|
||||
/// Interrupt vector index of the queue
|
||||
pub vector: u16,
|
||||
|
||||
/// Guest physical address of the descriptor table
|
||||
pub desc_table: GuestAddress,
|
||||
|
||||
@ -238,6 +241,7 @@ impl Queue {
|
||||
max_size,
|
||||
size: max_size,
|
||||
ready: false,
|
||||
vector: 0,
|
||||
desc_table: GuestAddress(0),
|
||||
avail_ring: GuestAddress(0),
|
||||
used_ring: GuestAddress(0),
|
||||
|
@ -40,6 +40,7 @@ pub struct VirtioPciCommonConfig {
|
||||
pub device_feature_select: u32,
|
||||
pub driver_feature_select: u32,
|
||||
pub queue_select: u16,
|
||||
pub msix_config: u16,
|
||||
}
|
||||
|
||||
impl VirtioPciCommonConfig {
|
||||
@ -119,10 +120,11 @@ impl VirtioPciCommonConfig {
|
||||
fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 {
|
||||
debug!("read_common_config_word: offset 0x{:x}", offset);
|
||||
match offset {
|
||||
0x10 => 0, // TODO msi-x (crbug/854765): self.msix_config,
|
||||
0x10 => self.msix_config,
|
||||
0x12 => queues.len() as u16, // num_queues
|
||||
0x16 => self.queue_select,
|
||||
0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0),
|
||||
0x1a => self.with_queue(queues, |q| q.vector).unwrap_or(0),
|
||||
0x1c => {
|
||||
if self.with_queue(queues, |q| q.ready).unwrap_or(false) {
|
||||
1
|
||||
@ -141,10 +143,10 @@ impl VirtioPciCommonConfig {
|
||||
fn write_common_config_word(&mut self, offset: u64, value: u16, queues: &mut Vec<Queue>) {
|
||||
debug!("write_common_config_word: offset 0x{:x}", offset);
|
||||
match offset {
|
||||
0x10 => (), // TODO msi-x (crbug/854765): self.msix_config = value,
|
||||
0x10 => self.msix_config = value,
|
||||
0x16 => self.queue_select = value,
|
||||
0x18 => self.with_queue_mut(queues, |q| q.size = value),
|
||||
0x1a => (), // TODO msi-x (crbug/854765): self.with_queue_mut(queues, |q| q.msix_vector = v),
|
||||
0x1a => self.with_queue_mut(queues, |q| q.vector = value),
|
||||
0x1c => self.with_queue_mut(queues, |q| q.ready = value == 1),
|
||||
_ => {
|
||||
warn!("invalid virtio register word write: 0x{:x}", offset);
|
||||
|
@ -16,11 +16,13 @@ use byteorder::{ByteOrder, LittleEndian};
|
||||
use libc::EFD_NONBLOCK;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use devices::BusDevice;
|
||||
use pci::{
|
||||
IrqClosure, PciBarConfiguration, PciCapability, PciCapabilityID, PciClassCode,
|
||||
PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciInterruptPin, PciSubclass,
|
||||
IrqClosure, MsixCap, MsixClosure, MsixConfig, PciBarConfiguration, PciCapability,
|
||||
PciCapabilityID, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType,
|
||||
PciInterruptPin, PciSubclass,
|
||||
};
|
||||
use vm_allocator::SystemAllocator;
|
||||
use vm_memory::{Address, ByteValued, GuestAddress, GuestMemoryMmap, GuestUsize, Le32};
|
||||
@ -143,7 +145,11 @@ const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
|
||||
const DEVICE_CONFIG_SIZE: u64 = 0x1000;
|
||||
const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
|
||||
const NOTIFICATION_SIZE: u64 = 0x1000;
|
||||
const CAPABILITY_BAR_SIZE: u64 = 0x4000;
|
||||
const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
|
||||
const MSIX_TABLE_SIZE: u64 = 0x1000;
|
||||
const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
|
||||
const MSIX_PBA_SIZE: u64 = 0x1000;
|
||||
const CAPABILITY_BAR_SIZE: u64 = 0x8000;
|
||||
|
||||
const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
|
||||
|
||||
@ -157,6 +163,12 @@ pub struct VirtioPciDevice {
|
||||
// virtio PCI common configuration
|
||||
common_config: VirtioPciCommonConfig,
|
||||
|
||||
// MSI-X config
|
||||
msix_config: Arc<Mutex<MsixConfig>>,
|
||||
|
||||
// Number of MSI-X vectors
|
||||
msix_num: u16,
|
||||
|
||||
// Virtio device reference and status
|
||||
device: Box<VirtioDevice>,
|
||||
device_activated: bool,
|
||||
@ -178,7 +190,7 @@ pub struct VirtioPciDevice {
|
||||
|
||||
impl VirtioPciDevice {
|
||||
/// Constructs a new PCI transport for the given virtio device.
|
||||
pub fn new(memory: GuestMemoryMmap, device: Box<VirtioDevice>) -> Result<Self> {
|
||||
pub fn new(memory: GuestMemoryMmap, device: Box<VirtioDevice>, msix_num: u16) -> Result<Self> {
|
||||
let mut queue_evts = Vec::new();
|
||||
for _ in device.queue_max_sizes().iter() {
|
||||
queue_evts.push(EventFd::new(EFD_NONBLOCK)?)
|
||||
@ -210,7 +222,10 @@ impl VirtioPciDevice {
|
||||
device_feature_select: 0,
|
||||
driver_feature_select: 0,
|
||||
queue_select: 0,
|
||||
msix_config: 0,
|
||||
},
|
||||
msix_config: Arc::new(Mutex::new(MsixConfig::new(msix_num))),
|
||||
msix_num,
|
||||
device,
|
||||
device_activated: false,
|
||||
interrupt_status: Arc::new(AtomicUsize::new(0)),
|
||||
@ -302,13 +317,23 @@ impl VirtioPciDevice {
|
||||
.add_capability(&configuration_cap)
|
||||
.map_err(PciDeviceError::CapabilitiesSetup)?;
|
||||
|
||||
let msix_cap = MsixCap::new(
|
||||
settings_bar,
|
||||
self.msix_num,
|
||||
MSIX_TABLE_BAR_OFFSET as u32,
|
||||
MSIX_PBA_BAR_OFFSET as u32,
|
||||
);
|
||||
self.configuration
|
||||
.add_capability(&msix_cap)
|
||||
.map_err(PciDeviceError::CapabilitiesSetup)?;
|
||||
|
||||
self.settings_bar = settings_bar;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl PciDevice for VirtioPciDevice {
|
||||
fn assign_irq(&mut self, irq_cb: Arc<IrqClosure>, irq_num: u32, irq_pin: PciInterruptPin) {
|
||||
fn assign_pin_irq(&mut self, irq_cb: Arc<IrqClosure>, irq_num: u32, irq_pin: PciInterruptPin) {
|
||||
self.configuration.set_irq(irq_num as u8, irq_pin);
|
||||
|
||||
let cb = Arc::new(Box::new(move |_queue: &Queue| (irq_cb)()) as VirtioInterrupt);
|
||||
@ -316,6 +341,16 @@ impl PciDevice for VirtioPciDevice {
|
||||
self.interrupt_cb = Some(cb);
|
||||
}
|
||||
|
||||
fn assign_msix(&mut self, msi_cb: Arc<MsixClosure>) {
|
||||
let msix_config = self.msix_config.clone();
|
||||
|
||||
let cb = Arc::new(Box::new(move |queue: &Queue| {
|
||||
(msi_cb)(msix_config.lock().unwrap().table_entries[queue.vector as usize].clone())
|
||||
}) as VirtioInterruptClosure);
|
||||
|
||||
self.interrupt_cb = Some(cb);
|
||||
}
|
||||
|
||||
fn config_registers(&self) -> &PciConfiguration {
|
||||
&self.configuration
|
||||
}
|
||||
@ -406,6 +441,18 @@ impl PciDevice for VirtioPciDevice {
|
||||
{
|
||||
// Handled with ioeventfds.
|
||||
}
|
||||
o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => {
|
||||
self.msix_config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.read_table(o - MSIX_TABLE_BAR_OFFSET, data);
|
||||
}
|
||||
o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => {
|
||||
self.msix_config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.read_pba(o - MSIX_PBA_BAR_OFFSET, data);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
@ -434,6 +481,18 @@ impl PciDevice for VirtioPciDevice {
|
||||
{
|
||||
// Handled with ioeventfds.
|
||||
}
|
||||
o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => {
|
||||
self.msix_config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.write_table(o - MSIX_TABLE_BAR_OFFSET, data);
|
||||
}
|
||||
o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => {
|
||||
self.msix_config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.write_pba(o - MSIX_PBA_BAR_OFFSET, data);
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
|
||||
|
@ -22,12 +22,14 @@ extern crate vm_virtio;
|
||||
extern crate vmm_sys_util;
|
||||
|
||||
use crate::config::VmConfig;
|
||||
use kvm_bindings::{kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY};
|
||||
use kvm_bindings::{kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY};
|
||||
use kvm_ioctls::*;
|
||||
use libc::{c_void, siginfo_t, EFD_NONBLOCK};
|
||||
use linux_loader::loader::KernelLoader;
|
||||
use net_util::Tap;
|
||||
use pci::{IrqClosure, PciConfigIo, PciDevice, PciInterruptPin, PciRoot};
|
||||
use pci::{
|
||||
IrqClosure, MsixClosure, MsixTableEntry, PciConfigIo, PciDevice, PciInterruptPin, PciRoot,
|
||||
};
|
||||
use qcow::{self, ImageType, QcowFile};
|
||||
use std::ffi::CString;
|
||||
use std::fs::{File, OpenOptions};
|
||||
@ -47,6 +49,7 @@ use vmm_sys_util::EventFd;
|
||||
|
||||
const VCPU_RTSIG_OFFSET: i32 = 0;
|
||||
const X86_64_IRQ_BASE: u32 = 5;
|
||||
const DEFAULT_MSIX_VEC_NUM: u16 = 2;
|
||||
|
||||
// CPUID feature bits
|
||||
const ECX_HYPERVISOR_SHIFT: u32 = 31; // Hypervisor bit.
|
||||
@ -303,8 +306,9 @@ impl DeviceManager {
|
||||
fn new(
|
||||
memory: GuestMemoryMmap,
|
||||
allocator: &mut SystemAllocator,
|
||||
vm_fd: &VmFd,
|
||||
vm_fd: &Arc<VmFd>,
|
||||
vm_cfg: &VmConfig,
|
||||
msi_capable: bool,
|
||||
) -> DeviceManagerResult<Self> {
|
||||
let io_bus = devices::Bus::new();
|
||||
let mut mmio_bus = devices::Bus::new();
|
||||
@ -357,6 +361,7 @@ impl DeviceManager {
|
||||
vm_fd,
|
||||
&mut pci_root,
|
||||
&mut mmio_bus,
|
||||
msi_capable,
|
||||
)?;
|
||||
}
|
||||
|
||||
@ -381,6 +386,7 @@ impl DeviceManager {
|
||||
vm_fd,
|
||||
&mut pci_root,
|
||||
&mut mmio_bus,
|
||||
msi_capable,
|
||||
)?;
|
||||
}
|
||||
|
||||
@ -397,6 +403,7 @@ impl DeviceManager {
|
||||
vm_fd,
|
||||
&mut pci_root,
|
||||
&mut mmio_bus,
|
||||
msi_capable,
|
||||
)?;
|
||||
}
|
||||
|
||||
@ -417,12 +424,14 @@ impl DeviceManager {
|
||||
virtio_device: Box<vm_virtio::VirtioDevice>,
|
||||
memory: GuestMemoryMmap,
|
||||
allocator: &mut SystemAllocator,
|
||||
vm_fd: &VmFd,
|
||||
vm_fd: &Arc<VmFd>,
|
||||
pci_root: &mut PciRoot,
|
||||
mmio_bus: &mut devices::Bus,
|
||||
msi_capable: bool,
|
||||
) -> DeviceManagerResult<()> {
|
||||
let mut virtio_pci_device = VirtioPciDevice::new(memory, virtio_device)
|
||||
.map_err(DeviceManagerError::VirtioDevice)?;
|
||||
let mut virtio_pci_device =
|
||||
VirtioPciDevice::new(memory, virtio_device, DEFAULT_MSIX_VEC_NUM)
|
||||
.map_err(DeviceManagerError::VirtioDevice)?;
|
||||
|
||||
let bars = virtio_pci_device
|
||||
.allocate_bars(allocator)
|
||||
@ -435,19 +444,42 @@ impl DeviceManager {
|
||||
.map_err(DeviceManagerError::RegisterIoevent)?;
|
||||
}
|
||||
|
||||
// Assign IRQ to the virtio-pci device
|
||||
let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?;
|
||||
let irq_num = allocator
|
||||
.allocate_irq()
|
||||
.ok_or(DeviceManagerError::AllocateIrq)?;
|
||||
vm_fd
|
||||
.register_irqfd(irqfd.as_raw_fd(), irq_num)
|
||||
.map_err(DeviceManagerError::Irq)?;
|
||||
if msi_capable {
|
||||
let vm_fd_clone = vm_fd.clone();
|
||||
|
||||
let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure);
|
||||
let msi_cb = Arc::new(Box::new(move |entry: MsixTableEntry| {
|
||||
let msi_queue = kvm_msi {
|
||||
address_lo: entry.msg_addr_lo,
|
||||
address_hi: entry.msg_addr_hi,
|
||||
data: entry.msg_data,
|
||||
flags: 0u32,
|
||||
devid: 0u32,
|
||||
pad: [0u8; 12],
|
||||
};
|
||||
|
||||
// Let's use irq line INTA for now.
|
||||
virtio_pci_device.assign_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA);
|
||||
vm_fd_clone.signal_msi(msi_queue).map(|ret| {
|
||||
if ret > 0 {
|
||||
debug!("MSI message successfully delivered");
|
||||
} else if ret == 0 {
|
||||
warn!("failed to deliver MSI message, blocked by guest");
|
||||
}
|
||||
})
|
||||
}) as MsixClosure);
|
||||
|
||||
virtio_pci_device.assign_msix(msi_cb);
|
||||
} else {
|
||||
let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?;
|
||||
|
||||
let irq_num = allocator
|
||||
.allocate_irq()
|
||||
.ok_or(DeviceManagerError::AllocateIrq)?;
|
||||
vm_fd
|
||||
.register_irqfd(irqfd.as_raw_fd(), irq_num)
|
||||
.map_err(DeviceManagerError::Irq)?;
|
||||
|
||||
let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure);
|
||||
virtio_pci_device.assign_pin_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA);
|
||||
}
|
||||
|
||||
let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device));
|
||||
|
||||
@ -542,7 +574,7 @@ impl AsRawFd for EpollContext {
|
||||
}
|
||||
|
||||
pub struct Vm<'a> {
|
||||
fd: VmFd,
|
||||
fd: Arc<VmFd>,
|
||||
kernel: File,
|
||||
memory: GuestMemoryMmap,
|
||||
vcpus: Vec<thread::JoinHandle<()>>,
|
||||
@ -557,6 +589,7 @@ impl<'a> Vm<'a> {
|
||||
pub fn new(kvm: &Kvm, config: VmConfig<'a>) -> Result<Self> {
|
||||
let kernel = File::open(&config.kernel.path).map_err(Error::KernelFile)?;
|
||||
let fd = kvm.create_vm().map_err(Error::VmCreate)?;
|
||||
let fd = Arc::new(fd);
|
||||
|
||||
// Init guest memory
|
||||
let arch_mem_regions = arch::arch_memory_regions(u64::from(&config.memory) << 20);
|
||||
@ -607,8 +640,14 @@ impl<'a> Vm<'a> {
|
||||
)
|
||||
.ok_or(Error::CreateSystemAllocator)?;
|
||||
|
||||
let device_manager = DeviceManager::new(guest_memory.clone(), &mut allocator, &fd, &config)
|
||||
.map_err(Error::DeviceManager)?;
|
||||
let device_manager = DeviceManager::new(
|
||||
guest_memory.clone(),
|
||||
&mut allocator,
|
||||
&fd,
|
||||
&config,
|
||||
kvm.check_extension(Cap::SignalMsi),
|
||||
)
|
||||
.map_err(Error::DeviceManager)?;
|
||||
fd.register_irqfd(device_manager.serial_evt.as_raw_fd(), 4)
|
||||
.map_err(Error::Irq)?;
|
||||
|
||||
@ -789,12 +828,6 @@ impl<'a> Vm<'a> {
|
||||
&self.memory
|
||||
}
|
||||
|
||||
/// Gets a reference to the kvm file descriptor owned by this VM.
|
||||
///
|
||||
pub fn get_fd(&self) -> &VmFd {
|
||||
&self.fd
|
||||
}
|
||||
|
||||
fn patch_cpuid(cpuid: &mut CpuId) {
|
||||
let entries = cpuid.mut_entries_slice();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user