From 0fec326582d191ab80828dcacbeed35ce2dea94f Mon Sep 17 00:00:00 2001
From: Rob Bradford <robert.bradford@intel.com>
Date: Wed, 18 Nov 2020 16:37:52 +0000
Subject: [PATCH] hypervisor, vmm: Remove shared ownership of VmmOps

This interface is used by the vCPU thread to delegate responsibility for
handling MMIO/PIO operations and to support different approaches than a
VM exit.

During profiling I found that we were spending 13.75% of the boot CPU
uage acquiring access to the object holding the VmmOps via
ArcSwap::load_full()

    13.75%     6.02%  vcpu0            cloud-hypervisor    [.] arc_swap::ArcSwapAny<T,S>::load_full
            |
            ---arc_swap::ArcSwapAny<T,S>::load_full
               |
                --13.43%--<hypervisor::kvm::KvmVcpu as hypervisor::cpu::Vcpu>::run
                          std::sys_common::backtrace::__rust_begin_short_backtrace
                          core::ops::function::FnOnce::call_once{{vtable-shim}}
                          std::sys::unix::thread::Thread::new::thread_start

However since the object implementing VmmOps does not need to be mutable
and it is only used from the vCPU side we can change the ownership to
being a simple Arc<> that is passed in when calling create_vcpu().

This completely removes the above CPU usage from subsequent profiles.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
---
 hypervisor/src/kvm/mod.rs | 35 ++++++++++++++---------------------
 hypervisor/src/vm.rs      |  4 +---
 vmm/src/cpu.rs            | 39 +++++++++++++++++++++++----------------
 vmm/src/interrupt.rs      |  8 ++++----
 vmm/src/vm.rs             |  8 ++++----
 5 files changed, 46 insertions(+), 48 deletions(-)
diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs
index 8d60da1d8..767940d66 100644
--- a/hypervisor/src/kvm/mod.rs
+++ b/hypervisor/src/kvm/mod.rs
@@ -19,7 +19,6 @@ use crate::hypervisor;
 use crate::vm::{self, VmmOps};
 #[cfg(target_arch = "aarch64")]
 use crate::{arm64_core_reg_id, offset__of};
-use arc_swap::ArcSwapOption;
 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
 use serde_derive::{Deserialize, Serialize};
 use std::os::unix::io::{AsRawFd, RawFd};
@@ -97,7 +96,6 @@ pub struct KvmVm {
     #[cfg(target_arch = "x86_64")]
     msrs: MsrEntries,
     state: KvmVmState,
-    vmmops: Arc<ArcSwapOption<Box<dyn vm::VmmOps>>>,
 }
 
 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
@@ -177,7 +175,11 @@ impl vm::Vm for KvmVm {
     ///
     /// Creates a VcpuFd object from a vcpu RawFd.
     ///
-    fn create_vcpu(&self, id: u8) -> vm::Result<Arc<dyn cpu::Vcpu>> {
+    fn create_vcpu(
+        &self,
+        id: u8,
+        vmmops: Option<Arc<Box<dyn VmmOps>>>,
+    ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
         let vc = self
             .fd
             .create_vcpu(id)
@@ -186,7 +188,7 @@ impl vm::Vm for KvmVm {
             fd: vc,
             #[cfg(target_arch = "x86_64")]
             msrs: self.msrs.clone(),
-            vmmops: self.vmmops.clone(),
+            vmmops,
             #[cfg(target_arch = "x86_64")]
             hyperv_synic: AtomicBool::new(false),
         };
@@ -362,13 +364,6 @@ impl vm::Vm for KvmVm {
         Ok(())
     }
 
-    ///
-    /// Set the VmmOps interface
-    ///
-    fn set_vmmops(&self, vmmops: Box<dyn VmmOps>) -> vm::Result<()> {
-        self.vmmops.store(Some(Arc::new(vmmops)));
-        Ok(())
-    }
     ///
     /// Get dirty pages bitmap (one bit per page)
     ///
@@ -454,7 +449,6 @@ impl hypervisor::Hypervisor for KvmHypervisor {
                 fd: vm_fd,
                 msrs,
                 state: VmState {},
-                vmmops: Arc::new(ArcSwapOption::from(None)),
             }))
         }
 
@@ -463,7 +457,6 @@ impl hypervisor::Hypervisor for KvmHypervisor {
             Ok(Arc::new(KvmVm {
                 fd: vm_fd,
                 state: VmState {},
-                vmmops: Arc::new(ArcSwapOption::from(None)),
             }))
         }
     }
@@ -524,7 +517,7 @@ pub struct KvmVcpu {
     fd: VcpuFd,
     #[cfg(target_arch = "x86_64")]
     msrs: MsrEntries,
-    vmmops: Arc<ArcSwapOption<Box<dyn vm::VmmOps>>>,
+    vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
     #[cfg(target_arch = "x86_64")]
     hyperv_synic: AtomicBool,
 }
@@ -535,7 +528,7 @@ pub struct KvmVcpu {
 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
-/// let vcpu = vm.create_vcpu(0).unwrap();
+/// let vcpu = vm.create_vcpu(0, None).unwrap();
 /// vcpu.get/set().unwrap()
 ///
 impl cpu::Vcpu for KvmVcpu {
@@ -723,7 +716,7 @@ impl cpu::Vcpu for KvmVcpu {
             Ok(run) => match run {
                 #[cfg(target_arch = "x86_64")]
                 VcpuExit::IoIn(addr, data) => {
-                    if let Some(vmmops) = self.vmmops.load_full() {
+                    if let Some(vmmops) = &self.vmmops {
                         return vmmops
                             .pio_read(addr.into(), data)
                             .map(|_| cpu::VmExit::Ignore)
@@ -734,7 +727,7 @@ impl cpu::Vcpu for KvmVcpu {
                 }
                 #[cfg(target_arch = "x86_64")]
                 VcpuExit::IoOut(addr, data) => {
-                    if let Some(vmmops) = self.vmmops.load_full() {
+                    if let Some(vmmops) = &self.vmmops {
                         return vmmops
                             .pio_write(addr.into(), data)
                             .map(|_| cpu::VmExit::Ignore)
@@ -767,7 +760,7 @@ impl cpu::Vcpu for KvmVcpu {
                 }
 
                 VcpuExit::MmioRead(addr, data) => {
-                    if let Some(vmmops) = self.vmmops.load_full() {
+                    if let Some(vmmops) = &self.vmmops {
                         return vmmops
                             .mmio_read(addr, data)
                             .map(|_| cpu::VmExit::Ignore)
@@ -777,7 +770,7 @@ impl cpu::Vcpu for KvmVcpu {
                     Ok(cpu::VmExit::MmioRead(addr, data))
                 }
                 VcpuExit::MmioWrite(addr, data) => {
-                    if let Some(vmmops) = self.vmmops.load_full() {
+                    if let Some(vmmops) = &self.vmmops {
                         return vmmops
                             .mmio_write(addr, data)
                             .map(|_| cpu::VmExit::Ignore)
@@ -1136,7 +1129,7 @@ impl cpu::Vcpu for KvmVcpu {
     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
     /// let vm = hv.create_vm().expect("new VM fd creation failed");
     /// vm.enable_split_irq().unwrap();
-    /// let vcpu = vm.create_vcpu(0).unwrap();
+    /// let vcpu = vm.create_vcpu(0, None).unwrap();
     /// let state = vcpu.state().unwrap();
     /// ```
     fn state(&self) -> cpu::Result<CpuState> {
@@ -1282,7 +1275,7 @@ impl cpu::Vcpu for KvmVcpu {
     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
     /// let vm = hv.create_vm().expect("new VM fd creation failed");
     /// vm.enable_split_irq().unwrap();
-    /// let vcpu = vm.create_vcpu(0).unwrap();
+    /// let vcpu = vm.create_vcpu(0, None).unwrap();
     /// let state = vcpu.state().unwrap();
     /// vcpu.set_state(&state).unwrap();
     /// ```
diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs
index 9184fbea9..ea9a86269 100644
--- a/hypervisor/src/vm.rs
+++ b/hypervisor/src/vm.rs
@@ -174,7 +174,7 @@ pub trait Vm: Send + Sync {
     /// Unregister an event that will, when signaled, trigger the `gsi` IRQ.
     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()>;
     /// Creates a new KVM vCPU file descriptor and maps the memory corresponding
-    fn create_vcpu(&self, id: u8) -> Result<Arc<dyn Vcpu>>;
+    fn create_vcpu(&self, id: u8, vmmops: Option<Arc<Box<dyn VmmOps>>>) -> Result<Arc<dyn Vcpu>>;
     /// Registers an event to be signaled whenever a certain address is written to.
     fn register_ioevent(
         &self,
@@ -220,8 +220,6 @@ pub trait Vm: Send + Sync {
     fn state(&self) -> Result<VmState>;
     /// Set the VM state
     fn set_state(&self, state: VmState) -> Result<()>;
-    /// Set VmmOps interface
-    fn set_vmmops(&self, vmmops: Box<dyn VmmOps>) -> Result<()>;
     /// Get dirty pages bitmap
     fn get_dirty_log(&self, slot: u32, memory_size: u64) -> Result<Vec<u64>>;
 }
diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs
index 742c8627e..af4c7c012 100644
--- a/vmm/src/cpu.rs
+++ b/vmm/src/cpu.rs
@@ -34,10 +34,9 @@ use devices::interrupt_controller::InterruptController;
 use hypervisor::kvm::kvm_bindings;
 #[cfg(target_arch = "x86_64")]
 use hypervisor::CpuId;
-use hypervisor::{CpuState, HypervisorCpuError, VmExit};
-use seccomp::{SeccompAction, SeccompFilter};
-
+use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit};
 use libc::{c_void, siginfo_t};
+use seccomp::{SeccompAction, SeccompFilter};
 use std::os::unix::thread::JoinHandleExt;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Barrier, Mutex};
@@ -225,9 +224,14 @@ impl Vcpu {
     ///
     /// * `id` - Represents the CPU number between [0, max vcpus).
     /// * `vm` - The virtual machine this vcpu will get attached to.
-    pub fn new(id: u8, vm: &Arc<dyn hypervisor::Vm>) -> Result<Arc<Mutex<Self>>> {
+    /// * `vmmops` - Optional object for exit handling.
+    pub fn new(
+        id: u8,
+        vm: &Arc<dyn hypervisor::Vm>,
+        vmmops: Option<Arc<Box<dyn VmmOps>>>,
+    ) -> Result<Arc<Mutex<Self>>> {
         let vcpu = vm
-            .create_vcpu(id)
+            .create_vcpu(id, vmmops)
             .map_err(|e| Error::VcpuCreate(e.into()))?;
         // Initially the cpuid per vCPU is the one supported by this VM.
         Ok(Arc::new(Mutex::new(Vcpu {
@@ -405,6 +409,7 @@ pub struct CpuManager {
     selected_cpu: u8,
     vcpus: Vec<Arc<Mutex<Vcpu>>>,
     seccomp_action: SeccompAction,
+    vmmops: Arc<Box<dyn VmmOps>>,
 }
 
 const CPU_ENABLE_FLAG: usize = 0;
@@ -534,6 +539,7 @@ impl CpuManager {
         reset_evt: EventFd,
         hypervisor: Arc<dyn hypervisor::Hypervisor>,
         seccomp_action: SeccompAction,
+        vmmops: Arc<Box<dyn VmmOps>>,
     ) -> Result<Arc<Mutex<CpuManager>>> {
         let guest_memory = memory_manager.lock().unwrap().guest_memory();
         let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
@@ -565,6 +571,7 @@ impl CpuManager {
             selected_cpu: 0,
             vcpus: Vec::with_capacity(usize::from(config.max_vcpus)),
             seccomp_action,
+            vmmops,
         }));
 
         #[cfg(target_arch = "x86_64")]
@@ -656,7 +663,7 @@ impl CpuManager {
     ) -> Result<Arc<Mutex<Vcpu>>> {
         info!("Creating vCPU: cpu_id = {}", cpu_id);
 
-        let vcpu = Vcpu::new(cpu_id, &self.vm)?;
+        let vcpu = Vcpu::new(cpu_id, &self.vm, Some(self.vmmops.clone()))?;
 
         if let Some(snapshot) = snapshot {
             // AArch64 vCPUs should be initialized after created.
@@ -1429,7 +1436,7 @@ mod tests {
         assert!(hv.check_capability(hypervisor::kvm::Cap::Irqchip));
         // Calling get_lapic will fail if there is no irqchip before hand.
         assert!(vm.create_irq_chip().is_ok());
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let klapic_before: LapicState = vcpu.get_lapic().unwrap();
 
         // Compute the value that is expected to represent LVT0 and LVT1.
@@ -1452,7 +1459,7 @@ mod tests {
     fn test_setup_fpu() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().expect("new VM fd creation failed");
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         setup_fpu(&vcpu).unwrap();
 
         let expected_fpu: FpuState = FpuState {
@@ -1477,7 +1484,7 @@ mod tests {
 
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().expect("new VM fd creation failed");
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         setup_msrs(&vcpu).unwrap();
 
         // This test will check against the last MSR entry configured (the tenth one).
@@ -1503,7 +1510,7 @@ mod tests {
     fn test_setup_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().expect("new VM fd creation failed");
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
 
         let expected_regs: StandardRegisters = StandardRegisters {
             rflags: 0x0000000000000002u64,
@@ -1531,7 +1538,7 @@ mod tests {
     fn test_setup_sregs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().expect("new VM fd creation failed");
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
 
         let mut expected_sregs: SpecialRegisters = vcpu.get_sregs().unwrap();
         let gm = GuestMemoryMmap::from_ranges(&vec![(GuestAddress(0), 0x10000)]).unwrap();
@@ -1562,7 +1569,7 @@ mod tests {
     fn test_setup_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let mut regions = Vec::new();
         regions.push((
             GuestAddress(layout::RAM_64BIT_START),
@@ -1585,7 +1592,7 @@ mod tests {
     fn test_read_mpidr() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
         vm.get_preferred_target(&mut kvi).unwrap();
 
@@ -1609,7 +1616,7 @@ mod tests {
     fn test_save_restore_core_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
         vm.get_preferred_target(&mut kvi).unwrap();
 
@@ -1645,7 +1652,7 @@ mod tests {
     fn test_save_restore_system_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
         vm.get_preferred_target(&mut kvi).unwrap();
 
@@ -1686,7 +1693,7 @@ mod tests {
     fn test_get_set_mpstate() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let vcpu = vm.create_vcpu(0).unwrap();
+        let vcpu = vm.create_vcpu(0, None).unwrap();
         let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
         vm.get_preferred_target(&mut kvi).unwrap();
 
diff --git a/vmm/src/interrupt.rs b/vmm/src/interrupt.rs
index 832a1ff04..7d275fe31 100644
--- a/vmm/src/interrupt.rs
+++ b/vmm/src/interrupt.rs
@@ -415,7 +415,7 @@ mod tests {
     fn test_get_set_dist_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let _ = vm.create_vcpu(0).unwrap();
+        let _ = vm.create_vcpu(0, None).unwrap();
         let gic = create_gic(&vm, 1).expect("Cannot create gic");
 
         let res = get_dist_regs(gic.device());
@@ -431,7 +431,7 @@ mod tests {
     fn test_get_set_redist_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let _ = vm.create_vcpu(0).unwrap();
+        let _ = vm.create_vcpu(0, None).unwrap();
         let gic = create_gic(&vm, 1).expect("Cannot create gic");
 
         let mut gicr_typer = Vec::new();
@@ -449,7 +449,7 @@ mod tests {
     fn test_get_set_icc_regs() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let _ = vm.create_vcpu(0).unwrap();
+        let _ = vm.create_vcpu(0, None).unwrap();
         let gic = create_gic(&vm, 1).expect("Cannot create gic");
 
         let mut gicr_typer = Vec::new();
@@ -467,7 +467,7 @@ mod tests {
     fn test_save_pending_tables() {
         let hv = hypervisor::new().unwrap();
         let vm = hv.create_vm().unwrap();
-        let _ = vm.create_vcpu(0).unwrap();
+        let _ = vm.create_vcpu(0, None).unwrap();
         let gic = create_gic(&vm, 1).expect("Cannot create gic");
 
         assert!(save_pending_tables(gic.device()).is_ok());
diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs
index ceac0ffef..a7190e140 100644
--- a/vmm/src/vm.rs
+++ b/vmm/src/vm.rs
@@ -500,15 +500,14 @@ impl Vm {
         let mmio_bus = Arc::clone(device_manager.lock().unwrap().mmio_bus());
         // Create the VmOps structure, which implements the VmmOps trait.
         // And send it to the hypervisor.
-        let vm_ops = Box::new(VmOps {
+        let vm_ops: Arc<Box<dyn VmmOps>> = Arc::new(Box::new(VmOps {
             memory,
             #[cfg(target_arch = "x86_64")]
             io_bus,
             mmio_bus,
             #[cfg(target_arch = "x86_64")]
             timestamp: std::time::Instant::now(),
-        });
-        vm.set_vmmops(vm_ops).map_err(Error::SetVmmOpsInterface)?;
+        }));
 
         let exit_evt_clone = exit_evt.try_clone().map_err(Error::EventFdClone)?;
         let cpu_manager = cpu::CpuManager::new(
@@ -520,6 +519,7 @@ impl Vm {
             reset_evt,
             hypervisor,
             seccomp_action.clone(),
+            vm_ops,
         )
         .map_err(Error::CpuManager)?;
 
@@ -2187,7 +2187,7 @@ pub fn test_vm() {
     mem.write_slice(&code, load_addr)
         .expect("Writing code to memory failed");
 
-    let vcpu = vm.create_vcpu(0).expect("new Vcpu failed");
+    let vcpu = vm.create_vcpu(0, None).expect("new Vcpu failed");
 
     let mut vcpu_sregs = vcpu.get_sregs().expect("get sregs failed");
     vcpu_sregs.cs.base = 0;