From 6958ec49223ed15c7dd5d8e19009ed40c950d529 Mon Sep 17 00:00:00 2001 From: Rob Bradford Date: Mon, 11 Nov 2019 13:55:50 +0000 Subject: [PATCH] vmm: Move CPU management code to its own module Move CpuManager, Vcpu and related functionality to its own module (and file) inside the VMM crate Signed-off-by: Rob Bradford --- vmm/src/cpu.rs | 508 +++++++++++++++++++++++++++++++++++++++++++++++++ vmm/src/lib.rs | 1 + vmm/src/vm.rs | 506 ++---------------------------------------------- 3 files changed, 528 insertions(+), 487 deletions(-) create mode 100644 vmm/src/cpu.rs diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs new file mode 100644 index 000000000..7e2e3240a --- /dev/null +++ b/vmm/src/cpu.rs @@ -0,0 +1,508 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +// + +use std::os::unix::thread::JoinHandleExt; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Barrier, Mutex, RwLock}; +use std::thread; +use std::{fmt, io, result}; + +use libc::{c_void, siginfo_t}; + +use crate::device_manager::DeviceManager; + +use devices::ioapic; +use kvm_ioctls::*; + +use vm_memory::{Address, GuestAddress, GuestMemoryMmap}; + +use vmm_sys_util::eventfd::EventFd; +use vmm_sys_util::signal::{register_signal_handler, validate_signal_num}; + +const VCPU_RTSIG_OFFSET: i32 = 0; + +// Debug I/O port +#[cfg(target_arch = "x86_64")] +const DEBUG_IOPORT: u16 = 0x80; +const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port"; + +/// Debug I/O port, see: +/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html +/// +/// Since we're not a physical platform, we can freely assign code ranges for +/// debugging specific parts of our virtual platform. +pub enum DebugIoPortRange { + Firmware, + Bootloader, + Kernel, + Userspace, + Custom, +} + +impl DebugIoPortRange { + fn from_u8(value: u8) -> DebugIoPortRange { + match value { + 0x00..=0x1f => DebugIoPortRange::Firmware, + 0x20..=0x3f => DebugIoPortRange::Bootloader, + 0x40..=0x5f => DebugIoPortRange::Kernel, + 0x60..=0x7f => DebugIoPortRange::Userspace, + _ => DebugIoPortRange::Custom, + } + } +} + +impl fmt::Display for DebugIoPortRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX), + } + } +} + +#[derive(Debug)] +pub enum Error { + /// Cannot open the VCPU file descriptor. + VcpuFd(io::Error), + + /// Cannot run the VCPUs. + VcpuRun(io::Error), + + /// Cannot spawn a new vCPU thread. + VcpuSpawn(io::Error), + + #[cfg(target_arch = "x86_64")] + /// Error configuring the general purpose registers + REGSConfiguration(arch::x86_64::regs::Error), + + #[cfg(target_arch = "x86_64")] + /// Error configuring the special registers + SREGSConfiguration(arch::x86_64::regs::Error), + + #[cfg(target_arch = "x86_64")] + /// Error configuring the floating point related registers + FPUConfiguration(arch::x86_64::regs::Error), + + /// The call to KVM_SET_CPUID2 failed. + SetSupportedCpusFailed(io::Error), + + #[cfg(target_arch = "x86_64")] + /// Cannot set the local interruption due to bad configuration. + LocalIntConfiguration(arch::x86_64::interrupts::Error), + + #[cfg(target_arch = "x86_64")] + /// Error configuring the MSR registers + MSRSConfiguration(arch::x86_64::regs::Error), + + /// Unexpected KVM_RUN exit reason + VcpuUnhandledKvmExit, + + /// Failed to join on vCPU threads + ThreadCleanup, +} +pub type Result = result::Result; + +#[allow(dead_code)] +#[derive(Copy, Clone)] +enum CpuidReg { + EAX, + EBX, + ECX, + EDX, +} + +pub struct CpuidPatch { + pub function: u32, + pub index: u32, + pub flags_bit: Option, + pub eax_bit: Option, + pub ebx_bit: Option, + pub ecx_bit: Option, + pub edx_bit: Option, +} + +impl CpuidPatch { + fn set_cpuid_reg( + cpuid: &mut CpuId, + function: u32, + index: Option, + reg: CpuidReg, + value: u32, + ) { + let entries = cpuid.as_mut_slice(); + + for entry in entries.iter_mut() { + if entry.function == function && (index == None || index.unwrap() == entry.index) { + match reg { + CpuidReg::EAX => { + entry.eax = value; + } + CpuidReg::EBX => { + entry.ebx = value; + } + CpuidReg::ECX => { + entry.ecx = value; + } + CpuidReg::EDX => { + entry.edx = value; + } + } + } + } + } + + pub fn patch_cpuid(cpuid: &mut CpuId, patches: Vec) { + let entries = cpuid.as_mut_slice(); + + for entry in entries.iter_mut() { + for patch in patches.iter() { + if entry.function == patch.function && entry.index == patch.index { + if let Some(flags_bit) = patch.flags_bit { + entry.flags |= 1 << flags_bit; + } + if let Some(eax_bit) = patch.eax_bit { + entry.eax |= 1 << eax_bit; + } + if let Some(ebx_bit) = patch.ebx_bit { + entry.ebx |= 1 << ebx_bit; + } + if let Some(ecx_bit) = patch.ecx_bit { + entry.ecx |= 1 << ecx_bit; + } + if let Some(edx_bit) = patch.edx_bit { + entry.edx |= 1 << edx_bit; + } + } + } + } + } +} + +/// A wrapper around creating and using a kvm-based VCPU. +pub struct Vcpu { + fd: VcpuFd, + id: u8, + io_bus: Arc, + mmio_bus: Arc, + ioapic: Option>>, + vm_ts: std::time::Instant, +} + +impl Vcpu { + /// Constructs a new VCPU for `vm`. + /// + /// # Arguments + /// + /// * `id` - Represents the CPU number between [0, max vcpus). + /// * `vm` - The virtual machine this vcpu will get attached to. + pub fn new( + id: u8, + fd: &Arc, + io_bus: Arc, + mmio_bus: Arc, + ioapic: Option>>, + creation_ts: std::time::Instant, + ) -> Result { + let kvm_vcpu = fd.create_vcpu(id).map_err(Error::VcpuFd)?; + // Initially the cpuid per vCPU is the one supported by this VM. + Ok(Vcpu { + fd: kvm_vcpu, + id, + io_bus, + mmio_bus, + ioapic, + vm_ts: creation_ts, + }) + } + + /// Configures a x86_64 specific vcpu and should be called once per vcpu from the vcpu's thread. + /// + /// # Arguments + /// + /// * `machine_config` - Specifies necessary info used for the CPUID configuration. + /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts. + /// * `vm` - The virtual machine this vcpu will get attached to. + pub fn configure( + &mut self, + kernel_start_addr: GuestAddress, + vm_memory: &Arc>, + cpuid: CpuId, + ) -> Result<()> { + let mut cpuid = cpuid; + CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(self.id)); + self.fd + .set_cpuid2(&cpuid) + .map_err(Error::SetSupportedCpusFailed)?; + + arch::x86_64::regs::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; + // Safe to unwrap because this method is called after the VM is configured + arch::x86_64::regs::setup_regs( + &self.fd, + kernel_start_addr.raw_value(), + arch::x86_64::layout::BOOT_STACK_POINTER.raw_value(), + arch::x86_64::layout::ZERO_PAGE_START.raw_value(), + ) + .map_err(Error::REGSConfiguration)?; + arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; + arch::x86_64::regs::setup_sregs(&vm_memory.read().unwrap(), &self.fd) + .map_err(Error::SREGSConfiguration)?; + arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?; + Ok(()) + } + + /// Runs the VCPU until it exits, returning the reason. + /// + /// Note that the state of the VCPU and associated VM must be setup first for this to do + /// anything useful. + pub fn run(&self) -> Result { + match self.fd.run() { + Ok(run) => match run { + VcpuExit::IoIn(addr, data) => { + self.io_bus.read(u64::from(addr), data); + Ok(true) + } + VcpuExit::IoOut(addr, data) => { + if addr == DEBUG_IOPORT && data.len() == 1 { + self.log_debug_ioport(data[0]); + } + self.io_bus.write(u64::from(addr), data); + Ok(true) + } + VcpuExit::MmioRead(addr, data) => { + self.mmio_bus.read(addr as u64, data); + Ok(true) + } + VcpuExit::MmioWrite(addr, data) => { + self.mmio_bus.write(addr as u64, data); + Ok(true) + } + VcpuExit::IoapicEoi(vector) => { + if let Some(ioapic) = &self.ioapic { + ioapic.lock().unwrap().end_of_interrupt(vector); + } + Ok(true) + } + VcpuExit::Shutdown => { + // Triple fault to trigger a reboot + Ok(false) + } + r => { + error!("Unexpected exit reason on vcpu run: {:?}", r); + Err(Error::VcpuUnhandledKvmExit) + } + }, + + Err(ref e) => match e.raw_os_error().unwrap() { + libc::EAGAIN | libc::EINTR => Ok(true), + _ => { + error!("VCPU {:?} error {:?}", self.id, e); + Err(Error::VcpuUnhandledKvmExit) + } + }, + } + } + + // Log debug io port codes. + fn log_debug_ioport(&self, code: u8) { + let ts = self.vm_ts.elapsed(); + + debug!( + "[{} code 0x{:x}] {}.{:>06} seconds", + DebugIoPortRange::from_u8(code), + code, + ts.as_secs(), + ts.as_micros() + ); + } +} + +pub struct CpuManager { + boot_vcpus: u8, + io_bus: Arc, + mmio_bus: Arc, + ioapic: Option>>, + vm_memory: Arc>, + cpuid: CpuId, + fd: Arc, + vcpus_kill_signalled: Arc, + vcpus_pause_signalled: Arc, + reset_evt: EventFd, + threads: Vec>, +} + +impl CpuManager { + pub fn new( + boot_vcpus: u8, + device_manager: &DeviceManager, + guest_memory: Arc>, + fd: Arc, + cpuid: CpuId, + reset_evt: EventFd, + ) -> CpuManager { + CpuManager { + boot_vcpus, + io_bus: device_manager.io_bus().clone(), + mmio_bus: device_manager.mmio_bus().clone(), + ioapic: device_manager.ioapic().clone(), + vm_memory: guest_memory, + cpuid, + fd, + vcpus_kill_signalled: Arc::new(AtomicBool::new(false)), + vcpus_pause_signalled: Arc::new(AtomicBool::new(false)), + threads: Vec::with_capacity(boot_vcpus as usize), + reset_evt, + } + } + + // Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running. + pub fn start_boot_vcpus(&mut self, entry_addr: GuestAddress) -> Result<()> { + let creation_ts = std::time::Instant::now(); + + let vcpu_thread_barrier = Arc::new(Barrier::new((self.boot_vcpus + 1) as usize)); + + for cpu_id in 0..self.boot_vcpus { + let ioapic = if let Some(ioapic) = &self.ioapic { + Some(ioapic.clone()) + } else { + None + }; + + let mut vcpu = Vcpu::new( + cpu_id, + &self.fd, + self.io_bus.clone(), + self.mmio_bus.clone(), + ioapic, + creation_ts, + )?; + vcpu.configure(entry_addr, &self.vm_memory, self.cpuid.clone())?; + + let vcpu_thread_barrier = vcpu_thread_barrier.clone(); + + let reset_evt = self.reset_evt.try_clone().unwrap(); + let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); + let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); + self.threads.push( + thread::Builder::new() + .name(format!("vcpu{}", vcpu.id)) + .spawn(move || { + unsafe { + extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) { + } + // This uses an async signal safe handler to kill the vcpu handles. + register_signal_handler( + VCPU_RTSIG_OFFSET, + vmm_sys_util::signal::SignalHandler::Siginfo(handle_signal), + true, + 0, + ) + .expect("Failed to register vcpu signal handler"); + } + + // Block until all CPUs are ready. + vcpu_thread_barrier.wait(); + + loop { + // vcpu.run() returns false on a KVM_EXIT_SHUTDOWN (triple-fault) so trigger a reset + match vcpu.run() { + Err(e) => { + error!("VCPU generated error: {:?}", e); + break; + } + Ok(true) => {} + Ok(false) => { + reset_evt.write(1).unwrap(); + break; + } + } + + // We've been told to terminate + if vcpu_kill_signalled.load(Ordering::SeqCst) { + break; + } + + // If we are being told to pause, we park the thread + // until the pause boolean is toggled. + // The resume operation is responsible for toggling + // the boolean and unpark the thread. + // We enter a loop because park() could spuriously + // return. We will then park() again unless the + // pause boolean has been toggled. + while vcpu_pause_signalled.load(Ordering::SeqCst) { + thread::park(); + } + } + }) + .map_err(Error::VcpuSpawn)?, + ); + } + + // Unblock all CPU threads. + vcpu_thread_barrier.wait(); + Ok(()) + } + + pub fn shutdown(&mut self) -> Result<()> { + // Tell the vCPUs to stop themselves next time they go through the loop + self.vcpus_kill_signalled.store(true, Ordering::SeqCst); + + // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads + // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set + // above. + for thread in self.threads.iter() { + let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap(); + unsafe { + libc::pthread_kill(thread.as_pthread_t(), signum); + } + } + + // Wait for all the threads to finish + for thread in self.threads.drain(..) { + thread.join().map_err(|_| Error::ThreadCleanup)? + } + + Ok(()) + } + + pub fn pause(&self) -> Result<()> { + // Tell the vCPUs to pause themselves next time they exit + self.vcpus_pause_signalled.store(true, Ordering::SeqCst); + + // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads + // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set + // above. + for thread in self.threads.iter() { + let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap(); + unsafe { + libc::pthread_kill(thread.as_pthread_t(), signum); + } + } + + Ok(()) + } + + pub fn resume(&self) -> Result<()> { + // Toggle the vCPUs pause boolean + self.vcpus_pause_signalled.store(false, Ordering::SeqCst); + + // Unpark all the VCPU threads. + // Once unparked, the next thing they will do is checking for the pause + // boolean. Since it'll be set to false, they will exit their pause loop + // and go back to vmx root. + for vcpu_thread in self.threads.iter() { + vcpu_thread.thread().unpark(); + } + Ok(()) + } +} diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 6cf830905..db176f4f9 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -27,6 +27,7 @@ use vmm_sys_util::eventfd::EventFd; pub mod api; pub mod config; +pub mod cpu; pub mod device_manager; pub mod vm; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 909e7ec14..fe03a393f 100755 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -24,6 +24,7 @@ extern crate vm_memory; extern crate vm_virtio; use crate::config::VmConfig; +use crate::cpu; use crate::device_manager::{get_win_size, Console, DeviceManager, DeviceManagerError}; use arch::RegionType; use devices::ioapic; @@ -32,7 +33,7 @@ use kvm_bindings::{ KVM_PIT_SPEAKER_DUMMY, }; use kvm_ioctls::*; -use libc::{c_void, siginfo_t}; + use linux_loader::cmdline::Cmdline; use linux_loader::loader::KernelLoader; use signal_hook::{iterator::Signals, SIGWINCH}; @@ -41,10 +42,9 @@ use std::fs::{File, OpenOptions}; use std::io; use std::ops::Deref; use std::os::unix::io::FromRawFd; -use std::os::unix::thread::JoinHandleExt; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Barrier, Mutex, RwLock}; -use std::{fmt, result, str, thread}; + +use std::sync::{Arc, RwLock}; +use std::{result, str, thread}; use vm_allocator::{GsiApic, SystemAllocator}; use vm_memory::guest_memory::FileOffset; use vm_memory::{ @@ -52,10 +52,8 @@ use vm_memory::{ GuestMemoryRegion, GuestUsize, }; use vmm_sys_util::eventfd::EventFd; -use vmm_sys_util::signal::{register_signal_handler, validate_signal_num}; use vmm_sys_util::terminal::Terminal; -const VCPU_RTSIG_OFFSET: i32 = 0; const X86_64_IRQ_BASE: u32 = 5; // CPUID feature bits @@ -65,48 +63,6 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. // 64 bit direct boot entry offset for bzImage const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200; -// Debug I/O port -#[cfg(target_arch = "x86_64")] -const DEBUG_IOPORT: u16 = 0x80; -const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port"; - -/// Debug I/O port, see: -/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html -/// -/// Since we're not a physical platform, we can freely assign code ranges for -/// debugging specific parts of our virtual platform. -pub enum DebugIoPortRange { - Firmware, - Bootloader, - Kernel, - Userspace, - Custom, -} - -impl DebugIoPortRange { - fn from_u8(value: u8) -> DebugIoPortRange { - match value { - 0x00..=0x1f => DebugIoPortRange::Firmware, - 0x20..=0x3f => DebugIoPortRange::Bootloader, - 0x40..=0x5f => DebugIoPortRange::Kernel, - 0x60..=0x7f => DebugIoPortRange::Userspace, - _ => DebugIoPortRange::Custom, - } - } -} - -impl fmt::Display for DebugIoPortRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX), - } - } -} - /// Errors associated with VM management #[derive(Debug)] pub enum Error { @@ -131,40 +87,8 @@ pub enum Error { /// Cannot load the command line in memory CmdLine, - /// Cannot open the VCPU file descriptor. - VcpuFd(io::Error), - - /// Cannot run the VCPUs. - VcpuRun(io::Error), - - /// Cannot spawn a new vCPU thread. - VcpuSpawn(io::Error), - - #[cfg(target_arch = "x86_64")] - /// Cannot set the local interruption due to bad configuration. - LocalIntConfiguration(arch::x86_64::interrupts::Error), - - #[cfg(target_arch = "x86_64")] - /// Error configuring the MSR registers - MSRSConfiguration(arch::x86_64::regs::Error), - PoisonedState, - #[cfg(target_arch = "x86_64")] - /// Error configuring the general purpose registers - REGSConfiguration(arch::x86_64::regs::Error), - - #[cfg(target_arch = "x86_64")] - /// Error configuring the special registers - SREGSConfiguration(arch::x86_64::regs::Error), - - #[cfg(target_arch = "x86_64")] - /// Error configuring the floating point related registers - FPUConfiguration(arch::x86_64::regs::Error), - - /// The call to KVM_SET_CPUID2 failed. - SetSupportedCpusFailed(io::Error), - /// Cannot create a device manager. DeviceManager(DeviceManagerError), @@ -183,9 +107,6 @@ pub enum Error { /// Failed parsing network parameters ParseNetworkParameters, - /// Unexpected KVM_RUN exit reason - VcpuUnhandledKvmExit, - /// Memory is overflow MemOverflow, @@ -221,223 +142,12 @@ pub enum Error { /// Invalid VM state transition InvalidStateTransition(VmState, VmState), + + /// Error from CPU handling + CpuManager(cpu::Error), } pub type Result = result::Result; -#[allow(dead_code)] -#[derive(Copy, Clone)] -enum CpuidReg { - EAX, - EBX, - ECX, - EDX, -} - -struct CpuidPatch { - function: u32, - index: u32, - flags_bit: Option, - eax_bit: Option, - ebx_bit: Option, - ecx_bit: Option, - edx_bit: Option, -} - -impl CpuidPatch { - fn set_cpuid_reg( - cpuid: &mut CpuId, - function: u32, - index: Option, - reg: CpuidReg, - value: u32, - ) { - let entries = cpuid.as_mut_slice(); - - for entry in entries.iter_mut() { - if entry.function == function && (index == None || index.unwrap() == entry.index) { - match reg { - CpuidReg::EAX => { - entry.eax = value; - } - CpuidReg::EBX => { - entry.ebx = value; - } - CpuidReg::ECX => { - entry.ecx = value; - } - CpuidReg::EDX => { - entry.edx = value; - } - } - } - } - } - - fn patch_cpuid(cpuid: &mut CpuId, patches: Vec) { - let entries = cpuid.as_mut_slice(); - - for entry in entries.iter_mut() { - for patch in patches.iter() { - if entry.function == patch.function && entry.index == patch.index { - if let Some(flags_bit) = patch.flags_bit { - entry.flags |= 1 << flags_bit; - } - if let Some(eax_bit) = patch.eax_bit { - entry.eax |= 1 << eax_bit; - } - if let Some(ebx_bit) = patch.ebx_bit { - entry.ebx |= 1 << ebx_bit; - } - if let Some(ecx_bit) = patch.ecx_bit { - entry.ecx |= 1 << ecx_bit; - } - if let Some(edx_bit) = patch.edx_bit { - entry.edx |= 1 << edx_bit; - } - } - } - } - } -} - -/// A wrapper around creating and using a kvm-based VCPU. -pub struct Vcpu { - fd: VcpuFd, - id: u8, - io_bus: Arc, - mmio_bus: Arc, - ioapic: Option>>, - vm_ts: std::time::Instant, -} - -impl Vcpu { - /// Constructs a new VCPU for `vm`. - /// - /// # Arguments - /// - /// * `id` - Represents the CPU number between [0, max vcpus). - /// * `vm` - The virtual machine this vcpu will get attached to. - pub fn new( - id: u8, - fd: &Arc, - io_bus: Arc, - mmio_bus: Arc, - ioapic: Option>>, - creation_ts: std::time::Instant, - ) -> Result { - let kvm_vcpu = fd.create_vcpu(id).map_err(Error::VcpuFd)?; - // Initially the cpuid per vCPU is the one supported by this VM. - Ok(Vcpu { - fd: kvm_vcpu, - id, - io_bus, - mmio_bus, - ioapic, - vm_ts: creation_ts, - }) - } - - /// Configures a x86_64 specific vcpu and should be called once per vcpu from the vcpu's thread. - /// - /// # Arguments - /// - /// * `machine_config` - Specifies necessary info used for the CPUID configuration. - /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts. - /// * `vm` - The virtual machine this vcpu will get attached to. - pub fn configure( - &mut self, - kernel_start_addr: GuestAddress, - vm_memory: &Arc>, - cpuid: CpuId, - ) -> Result<()> { - let mut cpuid = cpuid; - CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(self.id)); - self.fd - .set_cpuid2(&cpuid) - .map_err(Error::SetSupportedCpusFailed)?; - - arch::x86_64::regs::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; - // Safe to unwrap because this method is called after the VM is configured - arch::x86_64::regs::setup_regs( - &self.fd, - kernel_start_addr.raw_value(), - arch::x86_64::layout::BOOT_STACK_POINTER.raw_value(), - arch::x86_64::layout::ZERO_PAGE_START.raw_value(), - ) - .map_err(Error::REGSConfiguration)?; - arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; - arch::x86_64::regs::setup_sregs(&vm_memory.read().unwrap(), &self.fd) - .map_err(Error::SREGSConfiguration)?; - arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?; - Ok(()) - } - - /// Runs the VCPU until it exits, returning the reason. - /// - /// Note that the state of the VCPU and associated VM must be setup first for this to do - /// anything useful. - pub fn run(&self) -> Result { - match self.fd.run() { - Ok(run) => match run { - VcpuExit::IoIn(addr, data) => { - self.io_bus.read(u64::from(addr), data); - Ok(true) - } - VcpuExit::IoOut(addr, data) => { - if addr == DEBUG_IOPORT && data.len() == 1 { - self.log_debug_ioport(data[0]); - } - self.io_bus.write(u64::from(addr), data); - Ok(true) - } - VcpuExit::MmioRead(addr, data) => { - self.mmio_bus.read(addr as u64, data); - Ok(true) - } - VcpuExit::MmioWrite(addr, data) => { - self.mmio_bus.write(addr as u64, data); - Ok(true) - } - VcpuExit::IoapicEoi(vector) => { - if let Some(ioapic) = &self.ioapic { - ioapic.lock().unwrap().end_of_interrupt(vector); - } - Ok(true) - } - VcpuExit::Shutdown => { - // Triple fault to trigger a reboot - Ok(false) - } - r => { - error!("Unexpected exit reason on vcpu run: {:?}", r); - Err(Error::VcpuUnhandledKvmExit) - } - }, - - Err(ref e) => match e.raw_os_error().unwrap() { - libc::EAGAIN | libc::EINTR => Ok(true), - _ => { - error!("VCPU {:?} error {:?}", self.id, e); - Err(Error::VcpuUnhandledKvmExit) - } - }, - } - } - - // Log debug io port codes. - fn log_debug_ioport(&self, code: u8) { - let ts = self.vm_ts.elapsed(); - - debug!( - "[{} code 0x{:x}] {}.{:>06} seconds", - DebugIoPortRange::from_u8(code), - code, - ts.as_secs(), - ts.as_micros() - ); - } -} - pub struct VmInfo<'a> { pub memory: &'a Arc>, pub vm_fd: &'a Arc, @@ -486,186 +196,6 @@ impl VmState { } } -pub struct CpuManager { - boot_vcpus: u8, - io_bus: Arc, - mmio_bus: Arc, - ioapic: Option>>, - vm_memory: Arc>, - cpuid: CpuId, - fd: Arc, - vcpus_kill_signalled: Arc, - vcpus_pause_signalled: Arc, - reset_evt: EventFd, - threads: Vec>, -} - -impl CpuManager { - fn new( - boot_vcpus: u8, - device_manager: &DeviceManager, - guest_memory: Arc>, - fd: Arc, - cpuid: CpuId, - reset_evt: EventFd, - ) -> CpuManager { - CpuManager { - boot_vcpus, - io_bus: device_manager.io_bus().clone(), - mmio_bus: device_manager.mmio_bus().clone(), - ioapic: device_manager.ioapic().clone(), - vm_memory: guest_memory, - cpuid, - fd, - vcpus_kill_signalled: Arc::new(AtomicBool::new(false)), - vcpus_pause_signalled: Arc::new(AtomicBool::new(false)), - threads: Vec::with_capacity(boot_vcpus as usize), - reset_evt, - } - } - - // Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running. - fn start_boot_vcpus(&mut self, entry_addr: GuestAddress) -> Result<()> { - let creation_ts = std::time::Instant::now(); - - let vcpu_thread_barrier = Arc::new(Barrier::new((self.boot_vcpus + 1) as usize)); - - for cpu_id in 0..self.boot_vcpus { - let ioapic = if let Some(ioapic) = &self.ioapic { - Some(ioapic.clone()) - } else { - None - }; - - let mut vcpu = Vcpu::new( - cpu_id, - &self.fd, - self.io_bus.clone(), - self.mmio_bus.clone(), - ioapic, - creation_ts, - )?; - vcpu.configure(entry_addr, &self.vm_memory, self.cpuid.clone())?; - - let vcpu_thread_barrier = vcpu_thread_barrier.clone(); - - let reset_evt = self.reset_evt.try_clone().unwrap(); - let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); - let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); - self.threads.push( - thread::Builder::new() - .name(format!("vcpu{}", vcpu.id)) - .spawn(move || { - unsafe { - extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) { - } - // This uses an async signal safe handler to kill the vcpu handles. - register_signal_handler( - VCPU_RTSIG_OFFSET, - vmm_sys_util::signal::SignalHandler::Siginfo(handle_signal), - true, - 0, - ) - .expect("Failed to register vcpu signal handler"); - } - - // Block until all CPUs are ready. - vcpu_thread_barrier.wait(); - - loop { - // vcpu.run() returns false on a KVM_EXIT_SHUTDOWN (triple-fault) so trigger a reset - match vcpu.run() { - Err(e) => { - error!("VCPU generated error: {:?}", e); - break; - } - Ok(true) => {} - Ok(false) => { - reset_evt.write(1).unwrap(); - break; - } - } - - // We've been told to terminate - if vcpu_kill_signalled.load(Ordering::SeqCst) { - break; - } - - // If we are being told to pause, we park the thread - // until the pause boolean is toggled. - // The resume operation is responsible for toggling - // the boolean and unpark the thread. - // We enter a loop because park() could spuriously - // return. We will then park() again unless the - // pause boolean has been toggled. - while vcpu_pause_signalled.load(Ordering::SeqCst) { - thread::park(); - } - } - }) - .map_err(Error::VcpuSpawn)?, - ); - } - - // Unblock all CPU threads. - vcpu_thread_barrier.wait(); - Ok(()) - } - - fn shutdown(&mut self) -> Result<()> { - // Tell the vCPUs to stop themselves next time they go through the loop - self.vcpus_kill_signalled.store(true, Ordering::SeqCst); - - // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads - // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set - // above. - for thread in self.threads.iter() { - let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap(); - unsafe { - libc::pthread_kill(thread.as_pthread_t(), signum); - } - } - - // Wait for all the threads to finish - for thread in self.threads.drain(..) { - thread.join().map_err(|_| Error::ThreadCleanup)? - } - - Ok(()) - } - - fn pause(&self) -> Result<()> { - // Tell the vCPUs to pause themselves next time they exit - self.vcpus_pause_signalled.store(true, Ordering::SeqCst); - - // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads - // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set - // above. - for thread in self.threads.iter() { - let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap(); - unsafe { - libc::pthread_kill(thread.as_pthread_t(), signum); - } - } - - Ok(()) - } - - fn resume(&self) -> Result<()> { - // Toggle the vCPUs pause boolean - self.vcpus_pause_signalled.store(false, Ordering::SeqCst); - - // Unpark all the VCPU threads. - // Once unparked, the next thing they will do is checking for the pause - // boolean. Since it'll be set to false, they will exit their pause loop - // and go back to vmx root. - for vcpu_thread in self.threads.iter() { - vcpu_thread.thread().unpark(); - } - Ok(()) - } -} - pub struct Vm { kernel: File, memory: Arc>, @@ -675,7 +205,7 @@ pub struct Vm { on_tty: bool, signals: Option, state: RwLock, - cpu_manager: CpuManager, + cpu_manager: cpu::CpuManager, } fn get_host_cpu_phys_bits() -> u8 { @@ -802,7 +332,7 @@ impl Vm { } // Patch tsc deadline timer bit - cpuid_patches.push(CpuidPatch { + cpuid_patches.push(cpu::CpuidPatch { function: 1, index: 0, flags_bit: None, @@ -824,7 +354,7 @@ impl Vm { } // Patch hypervisor bit - cpuid_patches.push(CpuidPatch { + cpuid_patches.push(cpu::CpuidPatch { function: 1, index: 0, flags_bit: None, @@ -839,7 +369,7 @@ impl Vm { .get_supported_cpuid(MAX_KVM_CPUID_ENTRIES) .map_err(Error::VmSetup)?; - CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); + cpu::CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); let ioapic = GsiApic::new( X86_64_IRQ_BASE, @@ -891,7 +421,7 @@ impl Vm { let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0; let boot_vcpus = config.cpus.cpu_count; - let cpu_manager = CpuManager::new( + let cpu_manager = cpu::CpuManager::new( boot_vcpus, &device_manager, guest_memory.clone(), @@ -1034,7 +564,7 @@ impl Vm { signals.close(); } - self.cpu_manager.shutdown()?; + self.cpu_manager.shutdown().map_err(Error::CpuManager)?; // Wait for all the threads to finish for thread in self.threads.drain(..) { @@ -1051,7 +581,7 @@ impl Vm { state.valid_transition(new_state)?; - self.cpu_manager.pause()?; + self.cpu_manager.pause().map_err(Error::CpuManager)?; *state = new_state; @@ -1064,7 +594,7 @@ impl Vm { state.valid_transition(new_state)?; - self.cpu_manager.resume()?; + self.cpu_manager.resume().map_err(Error::CpuManager)?; // And we're back to the Running state. *state = new_state; @@ -1092,7 +622,9 @@ impl Vm { let entry_addr = self.load_kernel()?; - self.cpu_manager.start_boot_vcpus(entry_addr)?; + self.cpu_manager + .start_boot_vcpus(entry_addr) + .map_err(Error::CpuManager)?; if self.devices.console().input_enabled() { let console = self.devices.console().clone();