mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-22 11:22:26 +00:00
hypervisor: mshv: Emulate IrqFd and IOEventFd for mshv module
We don't have IrqFd and IOEventFd support in the kernel for now. So an emulation layer is needed. In the future, we will be adding this support in the kernel. Co-Developed-by: Wei Liu <liuwe@microsoft.com> Signed-off-by: Wei Liu <liuwe@microsoft.com> Signed-off-by: Muminul Islam <muislam@microsoft.com>
This commit is contained in:
parent
286a23fbd4
commit
fd0ef6cfb5
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -418,6 +418,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"env_logger",
|
||||
"epoll",
|
||||
"iced-x86",
|
||||
"kvm-bindings",
|
||||
"kvm-ioctls",
|
||||
|
@ -11,6 +11,7 @@ mshv = []
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
epoll = ">=4.0.1"
|
||||
thiserror = "1.0"
|
||||
libc = "0.2.81"
|
||||
log = "0.4.11"
|
||||
|
@ -17,7 +17,7 @@ use crate::cpu::Vcpu;
|
||||
use crate::hypervisor;
|
||||
use crate::vm::{self, VmmOps};
|
||||
pub use mshv_bindings::*;
|
||||
use mshv_ioctls::{set_registers_64, Mshv, VcpuFd, VmFd};
|
||||
use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, VcpuFd, VmFd};
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use vm::DataMatch;
|
||||
@ -31,7 +31,6 @@ use vmm_sys_util::eventfd::EventFd;
|
||||
pub use x86_64::VcpuMshvState as CpuState;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub use x86_64::*;
|
||||
|
||||
// Wei: for emulating irqfd and ioeventfd
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
@ -49,6 +48,195 @@ pub struct HvState {
|
||||
|
||||
pub use HvState as VmState;
|
||||
|
||||
struct IrqfdCtrlEpollHandler {
|
||||
vm_fd: Arc<VmFd>, /* For issuing hypercall */
|
||||
irqfd: EventFd, /* Registered by caller */
|
||||
kill: EventFd, /* Created by us, signal thread exit */
|
||||
epoll_fd: RawFd, /* epoll fd */
|
||||
gsi: u32,
|
||||
gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
|
||||
}
|
||||
|
||||
fn register_listener(
|
||||
epoll_fd: RawFd,
|
||||
fd: RawFd,
|
||||
ev_type: epoll::Events,
|
||||
data: u64,
|
||||
) -> std::result::Result<(), io::Error> {
|
||||
epoll::ctl(
|
||||
epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
fd,
|
||||
epoll::Event::new(ev_type, data),
|
||||
)
|
||||
}
|
||||
|
||||
const KILL_EVENT: u16 = 1;
|
||||
const IRQFD_EVENT: u16 = 2;
|
||||
|
||||
impl IrqfdCtrlEpollHandler {
|
||||
fn assert_virtual_interrupt(&self, e: &MshvIrqRoutingEntry) -> vm::Result<()> {
|
||||
// GSI routing contains MSI information.
|
||||
// We still need to translate that to APIC ID etc
|
||||
|
||||
debug!("Inject {:x?}", e);
|
||||
|
||||
let MshvIrqRouting::Msi(msi) = e.route;
|
||||
|
||||
/* Make an assumption here ... */
|
||||
if msi.address_hi != 0 {
|
||||
panic!("MSI high address part is not zero");
|
||||
}
|
||||
|
||||
let typ = self
|
||||
.get_interrupt_type(self.get_delivery_mode(msi.data))
|
||||
.unwrap();
|
||||
let apic_id = self.get_destination(msi.address_lo);
|
||||
let vector = self.get_vector(msi.data);
|
||||
let level_triggered = self.get_trigger_mode(msi.data);
|
||||
let logical_destination_mode = self.get_destination_mode(msi.address_lo);
|
||||
|
||||
debug!(
|
||||
"{:x} {:x} {:x} {} {}",
|
||||
typ, apic_id, vector, level_triggered, logical_destination_mode
|
||||
);
|
||||
|
||||
let request: InterruptRequest = InterruptRequest {
|
||||
interrupt_type: typ,
|
||||
apic_id,
|
||||
vector: vector.into(),
|
||||
level_triggered,
|
||||
logical_destination_mode,
|
||||
long_mode: false,
|
||||
};
|
||||
|
||||
self.vm_fd
|
||||
.request_virtual_interrupt(&request)
|
||||
.map_err(|e| vm::HypervisorVmError::AsserttVirtualInterrupt(e.into()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn run_ctrl(&mut self) {
|
||||
self.epoll_fd = epoll::create(true).unwrap();
|
||||
let epoll_file = unsafe { File::from_raw_fd(self.epoll_fd) };
|
||||
|
||||
register_listener(
|
||||
epoll_file.as_raw_fd(),
|
||||
self.kill.as_raw_fd(),
|
||||
epoll::Events::EPOLLIN,
|
||||
u64::from(KILL_EVENT),
|
||||
)
|
||||
.unwrap_or_else(|err| {
|
||||
info!(
|
||||
"IrqfdCtrlEpollHandler: failed to register listener: {:?}",
|
||||
err
|
||||
);
|
||||
});
|
||||
|
||||
register_listener(
|
||||
epoll_file.as_raw_fd(),
|
||||
self.irqfd.as_raw_fd(),
|
||||
epoll::Events::EPOLLIN,
|
||||
u64::from(IRQFD_EVENT),
|
||||
)
|
||||
.unwrap_or_else(|err| {
|
||||
info!(
|
||||
"IrqfdCtrlEpollHandler: failed to register listener: {:?}",
|
||||
err
|
||||
);
|
||||
});
|
||||
|
||||
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 2];
|
||||
|
||||
'epoll: loop {
|
||||
let num_events = match epoll::wait(epoll_file.as_raw_fd(), -1, &mut events[..]) {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
if e.kind() == std::io::ErrorKind::Interrupted {
|
||||
continue;
|
||||
}
|
||||
panic!("irqfd epoll ???");
|
||||
}
|
||||
};
|
||||
|
||||
for event in events.iter().take(num_events) {
|
||||
let ev_type = event.data as u16;
|
||||
|
||||
match ev_type {
|
||||
KILL_EVENT => {
|
||||
break 'epoll;
|
||||
}
|
||||
IRQFD_EVENT => {
|
||||
debug!("IRQFD_EVENT received, inject to guest");
|
||||
let _ = self.irqfd.read().unwrap();
|
||||
let gsi_routes = self.gsi_routes.read().unwrap();
|
||||
|
||||
if let Some(e) = gsi_routes.get(&self.gsi) {
|
||||
self.assert_virtual_interrupt(&e).unwrap();
|
||||
} else {
|
||||
debug!("No routing info found for GSI {}", self.gsi);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
error!("Unknown event");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// See Intel SDM vol3 10.11.1
|
||||
/// We assume APIC ID and Hyper-V Vcpu ID are the same value
|
||||
///
|
||||
|
||||
fn get_destination(&self, message_address: u32) -> u64 {
|
||||
((message_address >> 12) & 0xff).into()
|
||||
}
|
||||
|
||||
fn get_destination_mode(&self, message_address: u32) -> bool {
|
||||
if (message_address >> 2) & 0x1 == 0x1 {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn get_vector(&self, message_data: u32) -> u8 {
|
||||
(message_data & 0xff) as u8
|
||||
}
|
||||
|
||||
///
|
||||
/// True means level triggered
|
||||
///
|
||||
fn get_trigger_mode(&self, message_data: u32) -> bool {
|
||||
if (message_data >> 15) & 0x1 == 0x1 {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn get_delivery_mode(&self, message_data: u32) -> u8 {
|
||||
((message_data & 0x700) >> 8) as u8
|
||||
}
|
||||
///
|
||||
/// Translate from architectural defined delivery mode to Hyper-V type
|
||||
/// See Intel SDM vol3 10.11.2
|
||||
///
|
||||
fn get_interrupt_type(&self, delivery_mode: u8) -> Option<hv_interrupt_type> {
|
||||
match delivery_mode {
|
||||
0 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED),
|
||||
1 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY),
|
||||
2 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_SMI),
|
||||
4 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI),
|
||||
5 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_INIT),
|
||||
7 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_EXTINT),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper over mshv system ioctls.
|
||||
pub struct MshvHypervisor {
|
||||
mshv: Mshv,
|
||||
@ -108,9 +296,16 @@ impl hypervisor::Hypervisor for MshvHypervisor {
|
||||
}
|
||||
let vm_fd = Arc::new(fd);
|
||||
|
||||
let irqfds = Mutex::new(HashMap::new());
|
||||
let ioeventfds = Arc::new(RwLock::new(HashMap::new()));
|
||||
let gsi_routes = Arc::new(RwLock::new(HashMap::new()));
|
||||
|
||||
Ok(Arc::new(MshvVm {
|
||||
fd: vm_fd,
|
||||
msrs,
|
||||
irqfds,
|
||||
ioeventfds,
|
||||
gsi_routes,
|
||||
hv_state: hv_state_init(),
|
||||
vmmops: None,
|
||||
}))
|
||||
@ -132,12 +327,15 @@ impl hypervisor::Hypervisor for MshvHypervisor {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
/// Vcpu struct for Microsoft Hypervisor
|
||||
pub struct MshvVcpu {
|
||||
fd: VcpuFd,
|
||||
vp_index: u8,
|
||||
cpuid: CpuId,
|
||||
msrs: MsrEntries,
|
||||
ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>,
|
||||
gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
|
||||
hv_state: Arc<RwLock<HvState>>, // Mshv State
|
||||
vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
|
||||
}
|
||||
@ -335,10 +533,17 @@ impl cpu::Vcpu for MshvVcpu {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
/// Wrapper over Mshv VM ioctls.
|
||||
pub struct MshvVm {
|
||||
fd: Arc<VmFd>,
|
||||
msrs: MsrEntries,
|
||||
// Emulate irqfd
|
||||
irqfds: Mutex<HashMap<u32, (EventFd, EventFd)>>,
|
||||
// Emulate ioeventfd
|
||||
ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>,
|
||||
// GSI routing information
|
||||
gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
|
||||
// Hypervisor State
|
||||
hv_state: Arc<RwLock<HvState>>,
|
||||
vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
|
||||
@ -377,12 +582,36 @@ impl vm::Vm for MshvVm {
|
||||
/// Registers an event that will, when signaled, trigger the `gsi` IRQ.
|
||||
///
|
||||
fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
|
||||
let dup_fd = fd.try_clone().unwrap();
|
||||
let kill_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
|
||||
|
||||
let mut ctrl_handler = IrqfdCtrlEpollHandler {
|
||||
vm_fd: self.fd.clone(),
|
||||
kill: kill_fd.try_clone().unwrap(),
|
||||
irqfd: fd.try_clone().unwrap(),
|
||||
epoll_fd: 0,
|
||||
gsi,
|
||||
gsi_routes: self.gsi_routes.clone(),
|
||||
};
|
||||
|
||||
debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
|
||||
|
||||
thread::Builder::new()
|
||||
.name(format!("irqfd_{}", gsi))
|
||||
.spawn(move || ctrl_handler.run_ctrl())
|
||||
.unwrap();
|
||||
|
||||
self.irqfds.lock().unwrap().insert(gsi, (dup_fd, kill_fd));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
///
|
||||
/// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
|
||||
///
|
||||
fn unregister_irqfd(&self, _fd: &EventFd, gsi: u32) -> vm::Result<()> {
|
||||
debug!("unregister_irqfd fd {} gsi {}", _fd.as_raw_fd(), gsi);
|
||||
let (_, kill_fd) = self.irqfds.lock().unwrap().remove(&gsi).unwrap();
|
||||
kill_fd.write(1).unwrap();
|
||||
Ok(())
|
||||
}
|
||||
///
|
||||
@ -402,6 +631,8 @@ impl vm::Vm for MshvVm {
|
||||
vp_index: id,
|
||||
cpuid: CpuId::new(1 as usize),
|
||||
msrs: self.msrs.clone(),
|
||||
ioeventfds: self.ioeventfds.clone(),
|
||||
gsi_routes: self.gsi_routes.clone(),
|
||||
hv_state: self.hv_state.clone(),
|
||||
vmmops,
|
||||
};
|
||||
@ -417,10 +648,25 @@ impl vm::Vm for MshvVm {
|
||||
addr: &IoEventAddress,
|
||||
datamatch: Option<DataMatch>,
|
||||
) -> vm::Result<()> {
|
||||
let dup_fd = fd.try_clone().unwrap();
|
||||
|
||||
debug!(
|
||||
"register_ioevent fd {} addr {:x?} datamatch {:?}",
|
||||
fd.as_raw_fd(),
|
||||
addr,
|
||||
datamatch
|
||||
);
|
||||
|
||||
self.ioeventfds
|
||||
.write()
|
||||
.unwrap()
|
||||
.insert(*addr, (datamatch, dup_fd));
|
||||
Ok(())
|
||||
}
|
||||
/// Unregister an event from a certain address it has been previously registered to.
|
||||
fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
|
||||
debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
|
||||
self.ioeventfds.write().unwrap().remove(addr).unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -461,6 +707,15 @@ impl vm::Vm for MshvVm {
|
||||
}
|
||||
|
||||
fn set_gsi_routing(&self, irq_routing: &[IrqRoutingEntry]) -> vm::Result<()> {
|
||||
let mut routes = self.gsi_routes.write().unwrap();
|
||||
|
||||
routes.drain();
|
||||
|
||||
for r in irq_routing {
|
||||
debug!("gsi routing {:x?}", r);
|
||||
routes.insert(r.gsi, *r);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
///
|
||||
|
@ -158,6 +158,11 @@ pub enum HypervisorVmError {
|
||||
///
|
||||
#[error("Failed to get dirty log: {0}")]
|
||||
GetDirtyLog(#[source] anyhow::Error),
|
||||
///
|
||||
/// Assert virtual interrupt error
|
||||
///
|
||||
#[error("Failed to assert virtual Interrupt: {0}")]
|
||||
AsserttVirtualInterrupt(#[source] anyhow::Error),
|
||||
}
|
||||
///
|
||||
/// Result type for returning from a function
|
||||
|
Loading…
x
Reference in New Issue
Block a user