hypervisor, vmm: Introduce VmmOps trait

Run loop in hypervisor needs a callback mechanism to access resources
like guest memory, mmio, pio etc.

VmmOps trait is introduced here, which is implemented by vmm module.
While handling vcpuexits in run loop, this trait allows hypervisor
module access to the above mentioned resources via callbacks.

Signed-off-by: Praveen Paladugu <prapal@microsoft.com>
Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Praveen Paladugu 2020-09-03 13:50:56 -07:00 committed by Rob Bradford
parent 6a9934d933
commit 71c435ce91
6 changed files with 261 additions and 141 deletions

1
Cargo.lock generated
View File

@ -470,6 +470,7 @@ name = "hypervisor"
version = "0.1.0"
dependencies = [
"anyhow",
"arc-swap",
"kvm-bindings",
"kvm-ioctls",
"libc",

View File

@ -9,6 +9,7 @@ kvm = []
[dependencies]
anyhow = "1.0"
arc-swap = ">=0.4.4"
thiserror = "1.0"
libc = "0.2.78"
log = "0.4.11"

View File

@ -16,9 +16,10 @@ pub use crate::aarch64::{
use crate::cpu;
use crate::device;
use crate::hypervisor;
use crate::vm;
use crate::vm::{self, VmmOps};
#[cfg(target_arch = "aarch64")]
use crate::{arm64_core_reg_id, offset__of};
use arc_swap::ArcSwapOption;
use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
use serde_derive::{Deserialize, Serialize};
use std::os::unix::io::{AsRawFd, RawFd};
@ -91,6 +92,7 @@ pub struct KvmVm {
#[cfg(target_arch = "x86_64")]
msrs: MsrEntries,
state: KvmVmState,
vmmops: ArcSwapOption<Box<dyn vm::VmmOps>>,
}
// Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
@ -179,6 +181,7 @@ impl vm::Vm for KvmVm {
fd: vc,
#[cfg(target_arch = "x86_64")]
msrs: self.msrs.clone(),
vmmops: self.vmmops.clone(),
};
Ok(Arc::new(vcpu))
}
@ -345,6 +348,14 @@ impl vm::Vm for KvmVm {
fn set_state(&self, _state: VmState) -> vm::Result<()> {
Ok(())
}
///
/// Set the VmmOps interface
///
fn set_vmmops(&self, vmmops: Box<dyn VmmOps>) -> vm::Result<()> {
self.vmmops.store(Some(Arc::new(vmmops)));
Ok(())
}
}
/// Wrapper over KVM system ioctls.
pub struct KvmHypervisor {
@ -422,6 +433,7 @@ impl hypervisor::Hypervisor for KvmHypervisor {
fd: vm_fd,
msrs,
state: VmState {},
vmmops: ArcSwapOption::from(None),
}))
}
@ -430,6 +442,7 @@ impl hypervisor::Hypervisor for KvmHypervisor {
Ok(Arc::new(KvmVm {
fd: vm_fd,
state: VmState {},
vmmops: ArcSwapOption::from(None),
}))
}
}
@ -490,6 +503,7 @@ pub struct KvmVcpu {
fd: VcpuFd,
#[cfg(target_arch = "x86_64")]
msrs: MsrEntries,
vmmops: ArcSwapOption<Box<dyn vm::VmmOps>>,
}
/// Implementation of Vcpu trait for KVM
/// Example:
@ -681,9 +695,27 @@ impl cpu::Vcpu for KvmVcpu {
match self.fd.run() {
Ok(run) => match run {
#[cfg(target_arch = "x86_64")]
VcpuExit::IoIn(addr, data) => Ok(cpu::VmExit::IoIn(addr, data)),
VcpuExit::IoIn(addr, data) => {
if let Some(vmmops) = self.vmmops.load_full() {
return vmmops
.pio_read(addr.into(), data)
.map(|_| cpu::VmExit::Ignore)
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
}
Ok(cpu::VmExit::IoIn(addr, data))
}
#[cfg(target_arch = "x86_64")]
VcpuExit::IoOut(addr, data) => Ok(cpu::VmExit::IoOut(addr, data)),
VcpuExit::IoOut(addr, data) => {
if let Some(vmmops) = self.vmmops.load_full() {
return vmmops
.pio_write(addr.into(), data)
.map(|_| cpu::VmExit::Ignore)
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
}
Ok(cpu::VmExit::IoOut(addr, data))
}
#[cfg(target_arch = "x86_64")]
VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
#[cfg(target_arch = "x86_64")]
@ -705,8 +737,26 @@ impl cpu::Vcpu for KvmVcpu {
}
}
VcpuExit::MmioRead(addr, data) => Ok(cpu::VmExit::MmioRead(addr, data)),
VcpuExit::MmioWrite(addr, data) => Ok(cpu::VmExit::MmioWrite(addr, data)),
VcpuExit::MmioRead(addr, data) => {
if let Some(vmmops) = self.vmmops.load_full() {
return vmmops
.mmio_read(addr, data)
.map(|_| cpu::VmExit::Ignore)
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
}
Ok(cpu::VmExit::MmioRead(addr, data))
}
VcpuExit::MmioWrite(addr, data) => {
if let Some(vmmops) = self.vmmops.load_full() {
return vmmops
.mmio_write(addr, data)
.map(|_| cpu::VmExit::Ignore)
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
}
Ok(cpu::VmExit::MmioWrite(addr, data))
}
VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(

View File

@ -117,6 +117,36 @@ pub enum HypervisorVmError {
///
#[error("Failed to create passthrough device: {0}")]
CreatePassthroughDevice(#[source] anyhow::Error),
///
/// Write to Guest memory
///
#[error("Failed to write to guest memory: {0}")]
GuestMemWrite(#[source] anyhow::Error),
///
/// Read Guest memory
///
#[error("Failed to read guest memory: {0}")]
GuestMemRead(#[source] anyhow::Error),
///
/// Read from MMIO Bus
///
#[error("Failed to read from MMIO Bus: {0}")]
MmioBusRead(#[source] anyhow::Error),
///
/// Write to MMIO Bus
///
#[error("Failed to write to MMIO Bus: {0}")]
MmioBusWrite(#[source] anyhow::Error),
///
/// Read from IO Bus
///
#[error("Failed to read from IO Bus: {0}")]
IoBusRead(#[source] anyhow::Error),
///
/// Write to IO Bus
///
#[error("Failed to write to IO Bus: {0}")]
IoBusWrite(#[source] anyhow::Error),
}
///
/// Result type for returning from a function
@ -184,4 +214,17 @@ pub trait Vm: Send + Sync {
fn state(&self) -> Result<VmState>;
/// Set the VM state
fn set_state(&self, state: VmState) -> Result<()>;
/// Set VmmOps interface
fn set_vmmops(&self, vmmops: Box<dyn VmmOps>) -> Result<()>;
}
pub trait VmmOps: Send + Sync {
fn guest_mem_write(&self, buf: &[u8], gpa: u64) -> Result<usize>;
fn guest_mem_read(&self, buf: &mut [u8], gpa: u64) -> Result<usize>;
fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()>;
fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()>;
#[cfg(target_arch = "x86_64")]
fn pio_read(&self, addr: u64, data: &mut [u8]) -> Result<()>;
#[cfg(target_arch = "x86_64")]
fn pio_write(&self, addr: u64, data: &[u8]) -> Result<()>;
}

View File

@ -37,13 +37,11 @@ use hypervisor::{CpuState, VmExit};
use seccomp::{SeccompAction, SeccompFilter};
use libc::{c_void, siginfo_t};
#[cfg(target_arch = "x86_64")]
use std::fmt;
use std::os::unix::thread::JoinHandleExt;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Barrier, Mutex};
use std::{cmp, io, result, thread};
use vm_device::{Bus, BusDevice};
use vm_device::BusDevice;
#[cfg(target_arch = "x86_64")]
use vm_memory::GuestAddress;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
@ -62,51 +60,6 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
#[cfg(target_arch = "x86_64")]
const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit.
// Debug I/O port
#[cfg(target_arch = "x86_64")]
const DEBUG_IOPORT: u16 = 0x80;
#[cfg(target_arch = "x86_64")]
const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port";
#[cfg(target_arch = "x86_64")]
/// Debug I/O port, see:
/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html
///
/// Since we're not a physical platform, we can freely assign code ranges for
/// debugging specific parts of our virtual platform.
pub enum DebugIoPortRange {
Firmware,
Bootloader,
Kernel,
Userspace,
Custom,
}
#[cfg(target_arch = "x86_64")]
impl DebugIoPortRange {
fn from_u8(value: u8) -> DebugIoPortRange {
match value {
0x00..=0x1f => DebugIoPortRange::Firmware,
0x20..=0x3f => DebugIoPortRange::Bootloader,
0x40..=0x5f => DebugIoPortRange::Kernel,
0x60..=0x7f => DebugIoPortRange::Userspace,
_ => DebugIoPortRange::Custom,
}
}
}
#[cfg(target_arch = "x86_64")]
impl fmt::Display for DebugIoPortRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX),
}
}
}
#[derive(Debug)]
pub enum Error {
/// Cannot create the vCPU.
@ -216,6 +169,9 @@ pub enum Error {
/// Error starting vCPU after restore
StartRestoreVcpu(anyhow::Error),
/// Error because an unexpected VmExit type was received.
UnexpectedVmExit,
}
pub type Result<T> = result::Result<T, Error>;
@ -256,13 +212,8 @@ pub struct Vcpu {
// The hypervisor abstracted CPU.
vcpu: Arc<dyn hypervisor::Vcpu>,
id: u8,
#[cfg(target_arch = "x86_64")]
io_bus: Arc<Bus>,
mmio_bus: Arc<Bus>,
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>,
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
vm_ts: std::time::Instant,
#[cfg(target_arch = "aarch64")]
mpidr: u64,
saved_state: Option<CpuState>,
@ -278,10 +229,7 @@ impl Vcpu {
pub fn new(
id: u8,
vm: &Arc<dyn hypervisor::Vm>,
#[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
mmio_bus: Arc<Bus>,
interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>,
creation_ts: std::time::Instant,
) -> Result<Arc<Mutex<Self>>> {
let vcpu = vm
.create_vcpu(id)
@ -290,11 +238,7 @@ impl Vcpu {
Ok(Arc::new(Mutex::new(Vcpu {
vcpu,
id,
#[cfg(target_arch = "x86_64")]
io_bus,
mmio_bus,
interrupt_controller,
vm_ts: creation_ts,
#[cfg(target_arch = "aarch64")]
mpidr: 0,
saved_state: None,
@ -373,43 +317,6 @@ impl Vcpu {
pub fn run(&self) -> Result<bool> {
match self.vcpu.run() {
Ok(run) => match run {
#[cfg(target_arch = "x86_64")]
VmExit::IoIn(addr, data) => {
if let Err(e) = self.io_bus.read(u64::from(addr), data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest PIO read to unregistered address 0x{:x}", addr);
}
}
Ok(true)
}
#[cfg(target_arch = "x86_64")]
VmExit::IoOut(addr, data) => {
if addr == DEBUG_IOPORT && data.len() == 1 {
self.log_debug_ioport(data[0]);
}
if let Err(e) = self.io_bus.write(u64::from(addr), data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest PIO write to unregistered address 0x{:x}", addr);
}
}
Ok(true)
}
VmExit::MmioRead(addr, data) => {
if let Err(e) = self.mmio_bus.read(addr as u64, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest MMIO read to unregistered address 0x{:x}", addr);
}
}
Ok(true)
}
VmExit::MmioWrite(addr, data) => {
if let Err(e) = self.mmio_bus.write(addr as u64, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest MMIO write to unregistered address 0x{:x}", addr);
}
}
Ok(true)
}
#[cfg(target_arch = "x86_64")]
VmExit::IoapicEoi(vector) => {
if let Some(interrupt_controller) = &self.interrupt_controller {
@ -420,30 +327,16 @@ impl Vcpu {
}
Ok(true)
}
VmExit::Ignore => Ok(true),
VmExit::Reset => Ok(false),
// No need to handle anything from a KVM HyperV exit
VmExit::Hyperv => Ok(true),
_ => Err(Error::UnexpectedVmExit),
},
Err(e) => Err(Error::VcpuRun(e.into())),
}
}
#[cfg(target_arch = "x86_64")]
// Log debug io port codes.
fn log_debug_ioport(&self, code: u8) {
let ts = self.vm_ts.elapsed();
debug!(
"[{} code 0x{:x}] {}.{:>06} seconds",
DebugIoPortRange::from_u8(code),
code,
ts.as_secs(),
ts.as_micros()
);
}
}
const VCPU_SNAPSHOT_ID: &str = "vcpu";
@ -513,10 +406,6 @@ impl Snapshottable for Vcpu {
pub struct CpuManager {
config: CpusConfig,
#[cfg(target_arch = "x86_64")]
io_bus: Arc<Bus>,
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
mmio_bus: Arc<Bus>,
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>,
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
@ -678,9 +567,6 @@ impl CpuManager {
let device_manager = device_manager.lock().unwrap();
let cpu_manager = Arc::new(Mutex::new(CpuManager {
config: config.clone(),
#[cfg(target_arch = "x86_64")]
io_bus: device_manager.io_bus().clone(),
mmio_bus: device_manager.mmio_bus().clone(),
interrupt_controller: device_manager.interrupt_controller().clone(),
vm_memory: guest_memory,
#[cfg(target_arch = "x86_64")]
@ -704,10 +590,8 @@ impl CpuManager {
.ok_or(Error::AllocateIOPort)?;
#[cfg(target_arch = "x86_64")]
cpu_manager
.lock()
.unwrap()
.io_bus
device_manager
.io_bus()
.insert(cpu_manager.clone(), 0x0cd8, 0xc)
.map_err(Error::BusError)?;
@ -792,17 +676,7 @@ impl CpuManager {
None
};
let creation_ts = std::time::Instant::now();
let vcpu = Vcpu::new(
cpu_id,
&self.vm,
#[cfg(target_arch = "x86_64")]
self.io_bus.clone(),
self.mmio_bus.clone(),
interrupt_controller,
creation_ts,
)?;
let vcpu = Vcpu::new(cpu_id, &self.vm, interrupt_controller)?;
if let Some(snapshot) = snapshot {
#[cfg(target_arch = "x86_64")]

View File

@ -42,6 +42,7 @@ use anyhow::anyhow;
use arch::BootProtocol;
use arch::EntryPoint;
use devices::HotPlugNotificationFlags;
use hypervisor::vm::{HypervisorVmError, VmmOps};
use linux_loader::cmdline::Cmdline;
#[cfg(target_arch = "x86_64")]
use linux_loader::loader::elf::Error::InvalidElfMagicNumber;
@ -53,6 +54,8 @@ use signal_hook::{iterator::Signals, SIGINT, SIGTERM, SIGWINCH};
use std::collections::{BTreeMap, HashMap};
use std::convert::TryInto;
use std::ffi::CString;
#[cfg(target_arch = "x86_64")]
use std::fmt;
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::io::{Seek, SeekFrom};
@ -62,8 +65,10 @@ use std::path::PathBuf;
use std::sync::{Arc, Mutex, RwLock};
use std::{result, str, thread};
use url::Url;
use vm_device::Bus;
use vm_memory::{
Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap,
Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap,
GuestRegionMmap,
};
use vm_migration::{
Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable,
@ -217,6 +222,9 @@ pub enum Error {
/// Failed resizing a memory zone.
ResizeZone,
/// Failed setting the VmmOps interface.
SetVmmOpsInterface(hypervisor::HypervisorVmError),
}
pub type Result<T> = result::Result<T, Error>;
@ -295,6 +303,134 @@ impl VmState {
}
}
// Debug I/O port
#[cfg(target_arch = "x86_64")]
const DEBUG_IOPORT: u16 = 0x80;
#[cfg(target_arch = "x86_64")]
const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port";
#[cfg(target_arch = "x86_64")]
/// Debug I/O port, see:
/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html
///
/// Since we're not a physical platform, we can freely assign code ranges for
/// debugging specific parts of our virtual platform.
pub enum DebugIoPortRange {
Firmware,
Bootloader,
Kernel,
Userspace,
Custom,
}
#[cfg(target_arch = "x86_64")]
impl DebugIoPortRange {
fn from_u8(value: u8) -> DebugIoPortRange {
match value {
0x00..=0x1f => DebugIoPortRange::Firmware,
0x20..=0x3f => DebugIoPortRange::Bootloader,
0x40..=0x5f => DebugIoPortRange::Kernel,
0x60..=0x7f => DebugIoPortRange::Userspace,
_ => DebugIoPortRange::Custom,
}
}
}
#[cfg(target_arch = "x86_64")]
impl fmt::Display for DebugIoPortRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX),
DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX),
}
}
}
struct VmOps {
memory: GuestMemoryAtomic<GuestMemoryMmap>,
#[cfg(target_arch = "x86_64")]
io_bus: Arc<Bus>,
mmio_bus: Arc<Bus>,
#[cfg(target_arch = "x86_64")]
timestamp: std::time::Instant,
}
impl VmOps {
#[cfg(target_arch = "x86_64")]
// Log debug io port codes.
fn log_debug_ioport(&self, code: u8) {
let elapsed = self.timestamp.elapsed();
debug!(
"[{} code 0x{:x}] {}.{:>06} seconds",
DebugIoPortRange::from_u8(code),
code,
elapsed.as_secs(),
elapsed.as_micros()
);
}
}
impl VmmOps for VmOps {
fn guest_mem_write(&self, buf: &[u8], gpa: u64) -> hypervisor::vm::Result<usize> {
self.memory
.memory()
.write(buf, GuestAddress(gpa))
.map_err(|e| HypervisorVmError::GuestMemWrite(e.into()))
}
fn guest_mem_read(&self, buf: &mut [u8], gpa: u64) -> hypervisor::vm::Result<usize> {
self.memory
.memory()
.read(buf, GuestAddress(gpa))
.map_err(|e| HypervisorVmError::GuestMemRead(e.into()))
}
fn mmio_read(&self, addr: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> {
if let Err(e) = self.mmio_bus.read(addr, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest MMIO read to unregistered address 0x{:x}", addr);
}
}
Ok(())
}
fn mmio_write(&self, addr: u64, data: &[u8]) -> hypervisor::vm::Result<()> {
if let Err(e) = self.mmio_bus.write(addr, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest MMIO write to unregistered address 0x{:x}", addr);
}
}
Ok(())
}
#[cfg(target_arch = "x86_64")]
fn pio_read(&self, addr: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> {
if let Err(e) = self.io_bus.read(addr, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest PIO read to unregistered address 0x{:x}", addr);
}
}
Ok(())
}
#[cfg(target_arch = "x86_64")]
fn pio_write(&self, addr: u64, data: &[u8]) -> hypervisor::vm::Result<()> {
if addr == DEBUG_IOPORT as u64 && data.len() == 1 {
self.log_debug_ioport(data[0]);
}
if let Err(e) = self.io_bus.write(addr, data) {
if let vm_device::BusError::MissingAddressRange = e {
warn!("Guest PIO write to unregistered address 0x{:x}", addr);
}
}
Ok(())
}
}
pub struct Vm {
kernel: File,
initramfs: Option<File>,
@ -354,6 +490,22 @@ impl Vm {
)
.map_err(Error::DeviceManager)?;
let memory = memory_manager.lock().unwrap().guest_memory();
#[cfg(target_arch = "x86_64")]
let io_bus = Arc::clone(device_manager.lock().unwrap().io_bus());
let mmio_bus = Arc::clone(device_manager.lock().unwrap().mmio_bus());
// Create the VmOps structure, which implements the VmmOps trait.
// And send it to the hypervisor.
let vm_ops = Box::new(VmOps {
memory,
#[cfg(target_arch = "x86_64")]
io_bus,
mmio_bus,
#[cfg(target_arch = "x86_64")]
timestamp: std::time::Instant::now(),
});
vm.set_vmmops(vm_ops).map_err(Error::SetVmmOpsInterface)?;
let cpu_manager = cpu::CpuManager::new(
&config.lock().unwrap().cpus.clone(),
&device_manager,
@ -516,7 +668,6 @@ impl Vm {
.unwrap()
.create_devices()
.map_err(Error::DeviceManager)?;
Ok(new_vm)
}