mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-01 11:05:46 +00:00
vmm: Move CPU management code to its own module
Move CpuManager, Vcpu and related functionality to its own module (and file) inside the VMM crate Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
parent
7b77189c80
commit
6958ec4922
508
vmm/src/cpu.rs
Normal file
508
vmm/src/cpu.rs
Normal file
@ -0,0 +1,508 @@
|
|||||||
|
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE-BSD-3-Clause file.
|
||||||
|
//
|
||||||
|
// Copyright © 2019 Intel Corporation
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
||||||
|
//
|
||||||
|
|
||||||
|
use std::os::unix::thread::JoinHandleExt;
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::{Arc, Barrier, Mutex, RwLock};
|
||||||
|
use std::thread;
|
||||||
|
use std::{fmt, io, result};
|
||||||
|
|
||||||
|
use libc::{c_void, siginfo_t};
|
||||||
|
|
||||||
|
use crate::device_manager::DeviceManager;
|
||||||
|
|
||||||
|
use devices::ioapic;
|
||||||
|
use kvm_ioctls::*;
|
||||||
|
|
||||||
|
use vm_memory::{Address, GuestAddress, GuestMemoryMmap};
|
||||||
|
|
||||||
|
use vmm_sys_util::eventfd::EventFd;
|
||||||
|
use vmm_sys_util::signal::{register_signal_handler, validate_signal_num};
|
||||||
|
|
||||||
|
const VCPU_RTSIG_OFFSET: i32 = 0;
|
||||||
|
|
||||||
|
// Debug I/O port
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
const DEBUG_IOPORT: u16 = 0x80;
|
||||||
|
const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port";
|
||||||
|
|
||||||
|
/// Debug I/O port, see:
|
||||||
|
/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html
|
||||||
|
///
|
||||||
|
/// Since we're not a physical platform, we can freely assign code ranges for
|
||||||
|
/// debugging specific parts of our virtual platform.
|
||||||
|
pub enum DebugIoPortRange {
|
||||||
|
Firmware,
|
||||||
|
Bootloader,
|
||||||
|
Kernel,
|
||||||
|
Userspace,
|
||||||
|
Custom,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DebugIoPortRange {
|
||||||
|
fn from_u8(value: u8) -> DebugIoPortRange {
|
||||||
|
match value {
|
||||||
|
0x00..=0x1f => DebugIoPortRange::Firmware,
|
||||||
|
0x20..=0x3f => DebugIoPortRange::Bootloader,
|
||||||
|
0x40..=0x5f => DebugIoPortRange::Kernel,
|
||||||
|
0x60..=0x7f => DebugIoPortRange::Userspace,
|
||||||
|
_ => DebugIoPortRange::Custom,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for DebugIoPortRange {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX),
|
||||||
|
DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX),
|
||||||
|
DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX),
|
||||||
|
DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX),
|
||||||
|
DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
/// Cannot open the VCPU file descriptor.
|
||||||
|
VcpuFd(io::Error),
|
||||||
|
|
||||||
|
/// Cannot run the VCPUs.
|
||||||
|
VcpuRun(io::Error),
|
||||||
|
|
||||||
|
/// Cannot spawn a new vCPU thread.
|
||||||
|
VcpuSpawn(io::Error),
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
/// Error configuring the general purpose registers
|
||||||
|
REGSConfiguration(arch::x86_64::regs::Error),
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
/// Error configuring the special registers
|
||||||
|
SREGSConfiguration(arch::x86_64::regs::Error),
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
/// Error configuring the floating point related registers
|
||||||
|
FPUConfiguration(arch::x86_64::regs::Error),
|
||||||
|
|
||||||
|
/// The call to KVM_SET_CPUID2 failed.
|
||||||
|
SetSupportedCpusFailed(io::Error),
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
/// Cannot set the local interruption due to bad configuration.
|
||||||
|
LocalIntConfiguration(arch::x86_64::interrupts::Error),
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
/// Error configuring the MSR registers
|
||||||
|
MSRSConfiguration(arch::x86_64::regs::Error),
|
||||||
|
|
||||||
|
/// Unexpected KVM_RUN exit reason
|
||||||
|
VcpuUnhandledKvmExit,
|
||||||
|
|
||||||
|
/// Failed to join on vCPU threads
|
||||||
|
ThreadCleanup,
|
||||||
|
}
|
||||||
|
pub type Result<T> = result::Result<T, Error>;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
enum CpuidReg {
|
||||||
|
EAX,
|
||||||
|
EBX,
|
||||||
|
ECX,
|
||||||
|
EDX,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CpuidPatch {
|
||||||
|
pub function: u32,
|
||||||
|
pub index: u32,
|
||||||
|
pub flags_bit: Option<u8>,
|
||||||
|
pub eax_bit: Option<u8>,
|
||||||
|
pub ebx_bit: Option<u8>,
|
||||||
|
pub ecx_bit: Option<u8>,
|
||||||
|
pub edx_bit: Option<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuidPatch {
|
||||||
|
fn set_cpuid_reg(
|
||||||
|
cpuid: &mut CpuId,
|
||||||
|
function: u32,
|
||||||
|
index: Option<u32>,
|
||||||
|
reg: CpuidReg,
|
||||||
|
value: u32,
|
||||||
|
) {
|
||||||
|
let entries = cpuid.as_mut_slice();
|
||||||
|
|
||||||
|
for entry in entries.iter_mut() {
|
||||||
|
if entry.function == function && (index == None || index.unwrap() == entry.index) {
|
||||||
|
match reg {
|
||||||
|
CpuidReg::EAX => {
|
||||||
|
entry.eax = value;
|
||||||
|
}
|
||||||
|
CpuidReg::EBX => {
|
||||||
|
entry.ebx = value;
|
||||||
|
}
|
||||||
|
CpuidReg::ECX => {
|
||||||
|
entry.ecx = value;
|
||||||
|
}
|
||||||
|
CpuidReg::EDX => {
|
||||||
|
entry.edx = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn patch_cpuid(cpuid: &mut CpuId, patches: Vec<CpuidPatch>) {
|
||||||
|
let entries = cpuid.as_mut_slice();
|
||||||
|
|
||||||
|
for entry in entries.iter_mut() {
|
||||||
|
for patch in patches.iter() {
|
||||||
|
if entry.function == patch.function && entry.index == patch.index {
|
||||||
|
if let Some(flags_bit) = patch.flags_bit {
|
||||||
|
entry.flags |= 1 << flags_bit;
|
||||||
|
}
|
||||||
|
if let Some(eax_bit) = patch.eax_bit {
|
||||||
|
entry.eax |= 1 << eax_bit;
|
||||||
|
}
|
||||||
|
if let Some(ebx_bit) = patch.ebx_bit {
|
||||||
|
entry.ebx |= 1 << ebx_bit;
|
||||||
|
}
|
||||||
|
if let Some(ecx_bit) = patch.ecx_bit {
|
||||||
|
entry.ecx |= 1 << ecx_bit;
|
||||||
|
}
|
||||||
|
if let Some(edx_bit) = patch.edx_bit {
|
||||||
|
entry.edx |= 1 << edx_bit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A wrapper around creating and using a kvm-based VCPU.
|
||||||
|
pub struct Vcpu {
|
||||||
|
fd: VcpuFd,
|
||||||
|
id: u8,
|
||||||
|
io_bus: Arc<devices::Bus>,
|
||||||
|
mmio_bus: Arc<devices::Bus>,
|
||||||
|
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
||||||
|
vm_ts: std::time::Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Vcpu {
|
||||||
|
/// Constructs a new VCPU for `vm`.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `id` - Represents the CPU number between [0, max vcpus).
|
||||||
|
/// * `vm` - The virtual machine this vcpu will get attached to.
|
||||||
|
pub fn new(
|
||||||
|
id: u8,
|
||||||
|
fd: &Arc<VmFd>,
|
||||||
|
io_bus: Arc<devices::Bus>,
|
||||||
|
mmio_bus: Arc<devices::Bus>,
|
||||||
|
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
||||||
|
creation_ts: std::time::Instant,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let kvm_vcpu = fd.create_vcpu(id).map_err(Error::VcpuFd)?;
|
||||||
|
// Initially the cpuid per vCPU is the one supported by this VM.
|
||||||
|
Ok(Vcpu {
|
||||||
|
fd: kvm_vcpu,
|
||||||
|
id,
|
||||||
|
io_bus,
|
||||||
|
mmio_bus,
|
||||||
|
ioapic,
|
||||||
|
vm_ts: creation_ts,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configures a x86_64 specific vcpu and should be called once per vcpu from the vcpu's thread.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `machine_config` - Specifies necessary info used for the CPUID configuration.
|
||||||
|
/// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts.
|
||||||
|
/// * `vm` - The virtual machine this vcpu will get attached to.
|
||||||
|
pub fn configure(
|
||||||
|
&mut self,
|
||||||
|
kernel_start_addr: GuestAddress,
|
||||||
|
vm_memory: &Arc<RwLock<GuestMemoryMmap>>,
|
||||||
|
cpuid: CpuId,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cpuid = cpuid;
|
||||||
|
CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(self.id));
|
||||||
|
self.fd
|
||||||
|
.set_cpuid2(&cpuid)
|
||||||
|
.map_err(Error::SetSupportedCpusFailed)?;
|
||||||
|
|
||||||
|
arch::x86_64::regs::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?;
|
||||||
|
// Safe to unwrap because this method is called after the VM is configured
|
||||||
|
arch::x86_64::regs::setup_regs(
|
||||||
|
&self.fd,
|
||||||
|
kernel_start_addr.raw_value(),
|
||||||
|
arch::x86_64::layout::BOOT_STACK_POINTER.raw_value(),
|
||||||
|
arch::x86_64::layout::ZERO_PAGE_START.raw_value(),
|
||||||
|
)
|
||||||
|
.map_err(Error::REGSConfiguration)?;
|
||||||
|
arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?;
|
||||||
|
arch::x86_64::regs::setup_sregs(&vm_memory.read().unwrap(), &self.fd)
|
||||||
|
.map_err(Error::SREGSConfiguration)?;
|
||||||
|
arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runs the VCPU until it exits, returning the reason.
|
||||||
|
///
|
||||||
|
/// Note that the state of the VCPU and associated VM must be setup first for this to do
|
||||||
|
/// anything useful.
|
||||||
|
pub fn run(&self) -> Result<bool> {
|
||||||
|
match self.fd.run() {
|
||||||
|
Ok(run) => match run {
|
||||||
|
VcpuExit::IoIn(addr, data) => {
|
||||||
|
self.io_bus.read(u64::from(addr), data);
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
VcpuExit::IoOut(addr, data) => {
|
||||||
|
if addr == DEBUG_IOPORT && data.len() == 1 {
|
||||||
|
self.log_debug_ioport(data[0]);
|
||||||
|
}
|
||||||
|
self.io_bus.write(u64::from(addr), data);
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
VcpuExit::MmioRead(addr, data) => {
|
||||||
|
self.mmio_bus.read(addr as u64, data);
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
VcpuExit::MmioWrite(addr, data) => {
|
||||||
|
self.mmio_bus.write(addr as u64, data);
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
VcpuExit::IoapicEoi(vector) => {
|
||||||
|
if let Some(ioapic) = &self.ioapic {
|
||||||
|
ioapic.lock().unwrap().end_of_interrupt(vector);
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
VcpuExit::Shutdown => {
|
||||||
|
// Triple fault to trigger a reboot
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
r => {
|
||||||
|
error!("Unexpected exit reason on vcpu run: {:?}", r);
|
||||||
|
Err(Error::VcpuUnhandledKvmExit)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
Err(ref e) => match e.raw_os_error().unwrap() {
|
||||||
|
libc::EAGAIN | libc::EINTR => Ok(true),
|
||||||
|
_ => {
|
||||||
|
error!("VCPU {:?} error {:?}", self.id, e);
|
||||||
|
Err(Error::VcpuUnhandledKvmExit)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log debug io port codes.
|
||||||
|
fn log_debug_ioport(&self, code: u8) {
|
||||||
|
let ts = self.vm_ts.elapsed();
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"[{} code 0x{:x}] {}.{:>06} seconds",
|
||||||
|
DebugIoPortRange::from_u8(code),
|
||||||
|
code,
|
||||||
|
ts.as_secs(),
|
||||||
|
ts.as_micros()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CpuManager {
|
||||||
|
boot_vcpus: u8,
|
||||||
|
io_bus: Arc<devices::Bus>,
|
||||||
|
mmio_bus: Arc<devices::Bus>,
|
||||||
|
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
||||||
|
vm_memory: Arc<RwLock<GuestMemoryMmap>>,
|
||||||
|
cpuid: CpuId,
|
||||||
|
fd: Arc<VmFd>,
|
||||||
|
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||||
|
vcpus_pause_signalled: Arc<AtomicBool>,
|
||||||
|
reset_evt: EventFd,
|
||||||
|
threads: Vec<thread::JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuManager {
|
||||||
|
pub fn new(
|
||||||
|
boot_vcpus: u8,
|
||||||
|
device_manager: &DeviceManager,
|
||||||
|
guest_memory: Arc<RwLock<GuestMemoryMmap>>,
|
||||||
|
fd: Arc<VmFd>,
|
||||||
|
cpuid: CpuId,
|
||||||
|
reset_evt: EventFd,
|
||||||
|
) -> CpuManager {
|
||||||
|
CpuManager {
|
||||||
|
boot_vcpus,
|
||||||
|
io_bus: device_manager.io_bus().clone(),
|
||||||
|
mmio_bus: device_manager.mmio_bus().clone(),
|
||||||
|
ioapic: device_manager.ioapic().clone(),
|
||||||
|
vm_memory: guest_memory,
|
||||||
|
cpuid,
|
||||||
|
fd,
|
||||||
|
vcpus_kill_signalled: Arc::new(AtomicBool::new(false)),
|
||||||
|
vcpus_pause_signalled: Arc::new(AtomicBool::new(false)),
|
||||||
|
threads: Vec::with_capacity(boot_vcpus as usize),
|
||||||
|
reset_evt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running.
|
||||||
|
pub fn start_boot_vcpus(&mut self, entry_addr: GuestAddress) -> Result<()> {
|
||||||
|
let creation_ts = std::time::Instant::now();
|
||||||
|
|
||||||
|
let vcpu_thread_barrier = Arc::new(Barrier::new((self.boot_vcpus + 1) as usize));
|
||||||
|
|
||||||
|
for cpu_id in 0..self.boot_vcpus {
|
||||||
|
let ioapic = if let Some(ioapic) = &self.ioapic {
|
||||||
|
Some(ioapic.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut vcpu = Vcpu::new(
|
||||||
|
cpu_id,
|
||||||
|
&self.fd,
|
||||||
|
self.io_bus.clone(),
|
||||||
|
self.mmio_bus.clone(),
|
||||||
|
ioapic,
|
||||||
|
creation_ts,
|
||||||
|
)?;
|
||||||
|
vcpu.configure(entry_addr, &self.vm_memory, self.cpuid.clone())?;
|
||||||
|
|
||||||
|
let vcpu_thread_barrier = vcpu_thread_barrier.clone();
|
||||||
|
|
||||||
|
let reset_evt = self.reset_evt.try_clone().unwrap();
|
||||||
|
let vcpu_kill_signalled = self.vcpus_kill_signalled.clone();
|
||||||
|
let vcpu_pause_signalled = self.vcpus_pause_signalled.clone();
|
||||||
|
self.threads.push(
|
||||||
|
thread::Builder::new()
|
||||||
|
.name(format!("vcpu{}", vcpu.id))
|
||||||
|
.spawn(move || {
|
||||||
|
unsafe {
|
||||||
|
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {
|
||||||
|
}
|
||||||
|
// This uses an async signal safe handler to kill the vcpu handles.
|
||||||
|
register_signal_handler(
|
||||||
|
VCPU_RTSIG_OFFSET,
|
||||||
|
vmm_sys_util::signal::SignalHandler::Siginfo(handle_signal),
|
||||||
|
true,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
.expect("Failed to register vcpu signal handler");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block until all CPUs are ready.
|
||||||
|
vcpu_thread_barrier.wait();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// vcpu.run() returns false on a KVM_EXIT_SHUTDOWN (triple-fault) so trigger a reset
|
||||||
|
match vcpu.run() {
|
||||||
|
Err(e) => {
|
||||||
|
error!("VCPU generated error: {:?}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok(true) => {}
|
||||||
|
Ok(false) => {
|
||||||
|
reset_evt.write(1).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We've been told to terminate
|
||||||
|
if vcpu_kill_signalled.load(Ordering::SeqCst) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are being told to pause, we park the thread
|
||||||
|
// until the pause boolean is toggled.
|
||||||
|
// The resume operation is responsible for toggling
|
||||||
|
// the boolean and unpark the thread.
|
||||||
|
// We enter a loop because park() could spuriously
|
||||||
|
// return. We will then park() again unless the
|
||||||
|
// pause boolean has been toggled.
|
||||||
|
while vcpu_pause_signalled.load(Ordering::SeqCst) {
|
||||||
|
thread::park();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map_err(Error::VcpuSpawn)?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unblock all CPU threads.
|
||||||
|
vcpu_thread_barrier.wait();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn shutdown(&mut self) -> Result<()> {
|
||||||
|
// Tell the vCPUs to stop themselves next time they go through the loop
|
||||||
|
self.vcpus_kill_signalled.store(true, Ordering::SeqCst);
|
||||||
|
|
||||||
|
// Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads
|
||||||
|
// this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set
|
||||||
|
// above.
|
||||||
|
for thread in self.threads.iter() {
|
||||||
|
let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap();
|
||||||
|
unsafe {
|
||||||
|
libc::pthread_kill(thread.as_pthread_t(), signum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all the threads to finish
|
||||||
|
for thread in self.threads.drain(..) {
|
||||||
|
thread.join().map_err(|_| Error::ThreadCleanup)?
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pause(&self) -> Result<()> {
|
||||||
|
// Tell the vCPUs to pause themselves next time they exit
|
||||||
|
self.vcpus_pause_signalled.store(true, Ordering::SeqCst);
|
||||||
|
|
||||||
|
// Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads
|
||||||
|
// this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set
|
||||||
|
// above.
|
||||||
|
for thread in self.threads.iter() {
|
||||||
|
let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap();
|
||||||
|
unsafe {
|
||||||
|
libc::pthread_kill(thread.as_pthread_t(), signum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn resume(&self) -> Result<()> {
|
||||||
|
// Toggle the vCPUs pause boolean
|
||||||
|
self.vcpus_pause_signalled.store(false, Ordering::SeqCst);
|
||||||
|
|
||||||
|
// Unpark all the VCPU threads.
|
||||||
|
// Once unparked, the next thing they will do is checking for the pause
|
||||||
|
// boolean. Since it'll be set to false, they will exit their pause loop
|
||||||
|
// and go back to vmx root.
|
||||||
|
for vcpu_thread in self.threads.iter() {
|
||||||
|
vcpu_thread.thread().unpark();
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -27,6 +27,7 @@ use vmm_sys_util::eventfd::EventFd;
|
|||||||
|
|
||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
|
pub mod cpu;
|
||||||
pub mod device_manager;
|
pub mod device_manager;
|
||||||
pub mod vm;
|
pub mod vm;
|
||||||
|
|
||||||
|
506
vmm/src/vm.rs
506
vmm/src/vm.rs
@ -24,6 +24,7 @@ extern crate vm_memory;
|
|||||||
extern crate vm_virtio;
|
extern crate vm_virtio;
|
||||||
|
|
||||||
use crate::config::VmConfig;
|
use crate::config::VmConfig;
|
||||||
|
use crate::cpu;
|
||||||
use crate::device_manager::{get_win_size, Console, DeviceManager, DeviceManagerError};
|
use crate::device_manager::{get_win_size, Console, DeviceManager, DeviceManagerError};
|
||||||
use arch::RegionType;
|
use arch::RegionType;
|
||||||
use devices::ioapic;
|
use devices::ioapic;
|
||||||
@ -32,7 +33,7 @@ use kvm_bindings::{
|
|||||||
KVM_PIT_SPEAKER_DUMMY,
|
KVM_PIT_SPEAKER_DUMMY,
|
||||||
};
|
};
|
||||||
use kvm_ioctls::*;
|
use kvm_ioctls::*;
|
||||||
use libc::{c_void, siginfo_t};
|
|
||||||
use linux_loader::cmdline::Cmdline;
|
use linux_loader::cmdline::Cmdline;
|
||||||
use linux_loader::loader::KernelLoader;
|
use linux_loader::loader::KernelLoader;
|
||||||
use signal_hook::{iterator::Signals, SIGWINCH};
|
use signal_hook::{iterator::Signals, SIGWINCH};
|
||||||
@ -41,10 +42,9 @@ use std::fs::{File, OpenOptions};
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::os::unix::io::FromRawFd;
|
use std::os::unix::io::FromRawFd;
|
||||||
use std::os::unix::thread::JoinHandleExt;
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::{Arc, RwLock};
|
||||||
use std::sync::{Arc, Barrier, Mutex, RwLock};
|
use std::{result, str, thread};
|
||||||
use std::{fmt, result, str, thread};
|
|
||||||
use vm_allocator::{GsiApic, SystemAllocator};
|
use vm_allocator::{GsiApic, SystemAllocator};
|
||||||
use vm_memory::guest_memory::FileOffset;
|
use vm_memory::guest_memory::FileOffset;
|
||||||
use vm_memory::{
|
use vm_memory::{
|
||||||
@ -52,10 +52,8 @@ use vm_memory::{
|
|||||||
GuestMemoryRegion, GuestUsize,
|
GuestMemoryRegion, GuestUsize,
|
||||||
};
|
};
|
||||||
use vmm_sys_util::eventfd::EventFd;
|
use vmm_sys_util::eventfd::EventFd;
|
||||||
use vmm_sys_util::signal::{register_signal_handler, validate_signal_num};
|
|
||||||
use vmm_sys_util::terminal::Terminal;
|
use vmm_sys_util::terminal::Terminal;
|
||||||
|
|
||||||
const VCPU_RTSIG_OFFSET: i32 = 0;
|
|
||||||
const X86_64_IRQ_BASE: u32 = 5;
|
const X86_64_IRQ_BASE: u32 = 5;
|
||||||
|
|
||||||
// CPUID feature bits
|
// CPUID feature bits
|
||||||
@ -65,48 +63,6 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
|
|||||||
// 64 bit direct boot entry offset for bzImage
|
// 64 bit direct boot entry offset for bzImage
|
||||||
const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
|
const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
|
||||||
|
|
||||||
// Debug I/O port
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
const DEBUG_IOPORT: u16 = 0x80;
|
|
||||||
const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port";
|
|
||||||
|
|
||||||
/// Debug I/O port, see:
|
|
||||||
/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html
|
|
||||||
///
|
|
||||||
/// Since we're not a physical platform, we can freely assign code ranges for
|
|
||||||
/// debugging specific parts of our virtual platform.
|
|
||||||
pub enum DebugIoPortRange {
|
|
||||||
Firmware,
|
|
||||||
Bootloader,
|
|
||||||
Kernel,
|
|
||||||
Userspace,
|
|
||||||
Custom,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DebugIoPortRange {
|
|
||||||
fn from_u8(value: u8) -> DebugIoPortRange {
|
|
||||||
match value {
|
|
||||||
0x00..=0x1f => DebugIoPortRange::Firmware,
|
|
||||||
0x20..=0x3f => DebugIoPortRange::Bootloader,
|
|
||||||
0x40..=0x5f => DebugIoPortRange::Kernel,
|
|
||||||
0x60..=0x7f => DebugIoPortRange::Userspace,
|
|
||||||
_ => DebugIoPortRange::Custom,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for DebugIoPortRange {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX),
|
|
||||||
DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX),
|
|
||||||
DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX),
|
|
||||||
DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX),
|
|
||||||
DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Errors associated with VM management
|
/// Errors associated with VM management
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
@ -131,40 +87,8 @@ pub enum Error {
|
|||||||
/// Cannot load the command line in memory
|
/// Cannot load the command line in memory
|
||||||
CmdLine,
|
CmdLine,
|
||||||
|
|
||||||
/// Cannot open the VCPU file descriptor.
|
|
||||||
VcpuFd(io::Error),
|
|
||||||
|
|
||||||
/// Cannot run the VCPUs.
|
|
||||||
VcpuRun(io::Error),
|
|
||||||
|
|
||||||
/// Cannot spawn a new vCPU thread.
|
|
||||||
VcpuSpawn(io::Error),
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
/// Cannot set the local interruption due to bad configuration.
|
|
||||||
LocalIntConfiguration(arch::x86_64::interrupts::Error),
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
/// Error configuring the MSR registers
|
|
||||||
MSRSConfiguration(arch::x86_64::regs::Error),
|
|
||||||
|
|
||||||
PoisonedState,
|
PoisonedState,
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
/// Error configuring the general purpose registers
|
|
||||||
REGSConfiguration(arch::x86_64::regs::Error),
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
/// Error configuring the special registers
|
|
||||||
SREGSConfiguration(arch::x86_64::regs::Error),
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
/// Error configuring the floating point related registers
|
|
||||||
FPUConfiguration(arch::x86_64::regs::Error),
|
|
||||||
|
|
||||||
/// The call to KVM_SET_CPUID2 failed.
|
|
||||||
SetSupportedCpusFailed(io::Error),
|
|
||||||
|
|
||||||
/// Cannot create a device manager.
|
/// Cannot create a device manager.
|
||||||
DeviceManager(DeviceManagerError),
|
DeviceManager(DeviceManagerError),
|
||||||
|
|
||||||
@ -183,9 +107,6 @@ pub enum Error {
|
|||||||
/// Failed parsing network parameters
|
/// Failed parsing network parameters
|
||||||
ParseNetworkParameters,
|
ParseNetworkParameters,
|
||||||
|
|
||||||
/// Unexpected KVM_RUN exit reason
|
|
||||||
VcpuUnhandledKvmExit,
|
|
||||||
|
|
||||||
/// Memory is overflow
|
/// Memory is overflow
|
||||||
MemOverflow,
|
MemOverflow,
|
||||||
|
|
||||||
@ -221,223 +142,12 @@ pub enum Error {
|
|||||||
|
|
||||||
/// Invalid VM state transition
|
/// Invalid VM state transition
|
||||||
InvalidStateTransition(VmState, VmState),
|
InvalidStateTransition(VmState, VmState),
|
||||||
|
|
||||||
|
/// Error from CPU handling
|
||||||
|
CpuManager(cpu::Error),
|
||||||
}
|
}
|
||||||
pub type Result<T> = result::Result<T, Error>;
|
pub type Result<T> = result::Result<T, Error>;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
#[derive(Copy, Clone)]
|
|
||||||
enum CpuidReg {
|
|
||||||
EAX,
|
|
||||||
EBX,
|
|
||||||
ECX,
|
|
||||||
EDX,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CpuidPatch {
|
|
||||||
function: u32,
|
|
||||||
index: u32,
|
|
||||||
flags_bit: Option<u8>,
|
|
||||||
eax_bit: Option<u8>,
|
|
||||||
ebx_bit: Option<u8>,
|
|
||||||
ecx_bit: Option<u8>,
|
|
||||||
edx_bit: Option<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CpuidPatch {
|
|
||||||
fn set_cpuid_reg(
|
|
||||||
cpuid: &mut CpuId,
|
|
||||||
function: u32,
|
|
||||||
index: Option<u32>,
|
|
||||||
reg: CpuidReg,
|
|
||||||
value: u32,
|
|
||||||
) {
|
|
||||||
let entries = cpuid.as_mut_slice();
|
|
||||||
|
|
||||||
for entry in entries.iter_mut() {
|
|
||||||
if entry.function == function && (index == None || index.unwrap() == entry.index) {
|
|
||||||
match reg {
|
|
||||||
CpuidReg::EAX => {
|
|
||||||
entry.eax = value;
|
|
||||||
}
|
|
||||||
CpuidReg::EBX => {
|
|
||||||
entry.ebx = value;
|
|
||||||
}
|
|
||||||
CpuidReg::ECX => {
|
|
||||||
entry.ecx = value;
|
|
||||||
}
|
|
||||||
CpuidReg::EDX => {
|
|
||||||
entry.edx = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn patch_cpuid(cpuid: &mut CpuId, patches: Vec<CpuidPatch>) {
|
|
||||||
let entries = cpuid.as_mut_slice();
|
|
||||||
|
|
||||||
for entry in entries.iter_mut() {
|
|
||||||
for patch in patches.iter() {
|
|
||||||
if entry.function == patch.function && entry.index == patch.index {
|
|
||||||
if let Some(flags_bit) = patch.flags_bit {
|
|
||||||
entry.flags |= 1 << flags_bit;
|
|
||||||
}
|
|
||||||
if let Some(eax_bit) = patch.eax_bit {
|
|
||||||
entry.eax |= 1 << eax_bit;
|
|
||||||
}
|
|
||||||
if let Some(ebx_bit) = patch.ebx_bit {
|
|
||||||
entry.ebx |= 1 << ebx_bit;
|
|
||||||
}
|
|
||||||
if let Some(ecx_bit) = patch.ecx_bit {
|
|
||||||
entry.ecx |= 1 << ecx_bit;
|
|
||||||
}
|
|
||||||
if let Some(edx_bit) = patch.edx_bit {
|
|
||||||
entry.edx |= 1 << edx_bit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A wrapper around creating and using a kvm-based VCPU.
|
|
||||||
pub struct Vcpu {
|
|
||||||
fd: VcpuFd,
|
|
||||||
id: u8,
|
|
||||||
io_bus: Arc<devices::Bus>,
|
|
||||||
mmio_bus: Arc<devices::Bus>,
|
|
||||||
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
|
||||||
vm_ts: std::time::Instant,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Vcpu {
|
|
||||||
/// Constructs a new VCPU for `vm`.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `id` - Represents the CPU number between [0, max vcpus).
|
|
||||||
/// * `vm` - The virtual machine this vcpu will get attached to.
|
|
||||||
pub fn new(
|
|
||||||
id: u8,
|
|
||||||
fd: &Arc<VmFd>,
|
|
||||||
io_bus: Arc<devices::Bus>,
|
|
||||||
mmio_bus: Arc<devices::Bus>,
|
|
||||||
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
|
||||||
creation_ts: std::time::Instant,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let kvm_vcpu = fd.create_vcpu(id).map_err(Error::VcpuFd)?;
|
|
||||||
// Initially the cpuid per vCPU is the one supported by this VM.
|
|
||||||
Ok(Vcpu {
|
|
||||||
fd: kvm_vcpu,
|
|
||||||
id,
|
|
||||||
io_bus,
|
|
||||||
mmio_bus,
|
|
||||||
ioapic,
|
|
||||||
vm_ts: creation_ts,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Configures a x86_64 specific vcpu and should be called once per vcpu from the vcpu's thread.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `machine_config` - Specifies necessary info used for the CPUID configuration.
|
|
||||||
/// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts.
|
|
||||||
/// * `vm` - The virtual machine this vcpu will get attached to.
|
|
||||||
pub fn configure(
|
|
||||||
&mut self,
|
|
||||||
kernel_start_addr: GuestAddress,
|
|
||||||
vm_memory: &Arc<RwLock<GuestMemoryMmap>>,
|
|
||||||
cpuid: CpuId,
|
|
||||||
) -> Result<()> {
|
|
||||||
let mut cpuid = cpuid;
|
|
||||||
CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(self.id));
|
|
||||||
self.fd
|
|
||||||
.set_cpuid2(&cpuid)
|
|
||||||
.map_err(Error::SetSupportedCpusFailed)?;
|
|
||||||
|
|
||||||
arch::x86_64::regs::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?;
|
|
||||||
// Safe to unwrap because this method is called after the VM is configured
|
|
||||||
arch::x86_64::regs::setup_regs(
|
|
||||||
&self.fd,
|
|
||||||
kernel_start_addr.raw_value(),
|
|
||||||
arch::x86_64::layout::BOOT_STACK_POINTER.raw_value(),
|
|
||||||
arch::x86_64::layout::ZERO_PAGE_START.raw_value(),
|
|
||||||
)
|
|
||||||
.map_err(Error::REGSConfiguration)?;
|
|
||||||
arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?;
|
|
||||||
arch::x86_64::regs::setup_sregs(&vm_memory.read().unwrap(), &self.fd)
|
|
||||||
.map_err(Error::SREGSConfiguration)?;
|
|
||||||
arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Runs the VCPU until it exits, returning the reason.
|
|
||||||
///
|
|
||||||
/// Note that the state of the VCPU and associated VM must be setup first for this to do
|
|
||||||
/// anything useful.
|
|
||||||
pub fn run(&self) -> Result<bool> {
|
|
||||||
match self.fd.run() {
|
|
||||||
Ok(run) => match run {
|
|
||||||
VcpuExit::IoIn(addr, data) => {
|
|
||||||
self.io_bus.read(u64::from(addr), data);
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
VcpuExit::IoOut(addr, data) => {
|
|
||||||
if addr == DEBUG_IOPORT && data.len() == 1 {
|
|
||||||
self.log_debug_ioport(data[0]);
|
|
||||||
}
|
|
||||||
self.io_bus.write(u64::from(addr), data);
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
VcpuExit::MmioRead(addr, data) => {
|
|
||||||
self.mmio_bus.read(addr as u64, data);
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
VcpuExit::MmioWrite(addr, data) => {
|
|
||||||
self.mmio_bus.write(addr as u64, data);
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
VcpuExit::IoapicEoi(vector) => {
|
|
||||||
if let Some(ioapic) = &self.ioapic {
|
|
||||||
ioapic.lock().unwrap().end_of_interrupt(vector);
|
|
||||||
}
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
VcpuExit::Shutdown => {
|
|
||||||
// Triple fault to trigger a reboot
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
r => {
|
|
||||||
error!("Unexpected exit reason on vcpu run: {:?}", r);
|
|
||||||
Err(Error::VcpuUnhandledKvmExit)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
Err(ref e) => match e.raw_os_error().unwrap() {
|
|
||||||
libc::EAGAIN | libc::EINTR => Ok(true),
|
|
||||||
_ => {
|
|
||||||
error!("VCPU {:?} error {:?}", self.id, e);
|
|
||||||
Err(Error::VcpuUnhandledKvmExit)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log debug io port codes.
|
|
||||||
fn log_debug_ioport(&self, code: u8) {
|
|
||||||
let ts = self.vm_ts.elapsed();
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"[{} code 0x{:x}] {}.{:>06} seconds",
|
|
||||||
DebugIoPortRange::from_u8(code),
|
|
||||||
code,
|
|
||||||
ts.as_secs(),
|
|
||||||
ts.as_micros()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct VmInfo<'a> {
|
pub struct VmInfo<'a> {
|
||||||
pub memory: &'a Arc<RwLock<GuestMemoryMmap>>,
|
pub memory: &'a Arc<RwLock<GuestMemoryMmap>>,
|
||||||
pub vm_fd: &'a Arc<VmFd>,
|
pub vm_fd: &'a Arc<VmFd>,
|
||||||
@ -486,186 +196,6 @@ impl VmState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CpuManager {
|
|
||||||
boot_vcpus: u8,
|
|
||||||
io_bus: Arc<devices::Bus>,
|
|
||||||
mmio_bus: Arc<devices::Bus>,
|
|
||||||
ioapic: Option<Arc<Mutex<ioapic::Ioapic>>>,
|
|
||||||
vm_memory: Arc<RwLock<GuestMemoryMmap>>,
|
|
||||||
cpuid: CpuId,
|
|
||||||
fd: Arc<VmFd>,
|
|
||||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
|
||||||
vcpus_pause_signalled: Arc<AtomicBool>,
|
|
||||||
reset_evt: EventFd,
|
|
||||||
threads: Vec<thread::JoinHandle<()>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CpuManager {
|
|
||||||
fn new(
|
|
||||||
boot_vcpus: u8,
|
|
||||||
device_manager: &DeviceManager,
|
|
||||||
guest_memory: Arc<RwLock<GuestMemoryMmap>>,
|
|
||||||
fd: Arc<VmFd>,
|
|
||||||
cpuid: CpuId,
|
|
||||||
reset_evt: EventFd,
|
|
||||||
) -> CpuManager {
|
|
||||||
CpuManager {
|
|
||||||
boot_vcpus,
|
|
||||||
io_bus: device_manager.io_bus().clone(),
|
|
||||||
mmio_bus: device_manager.mmio_bus().clone(),
|
|
||||||
ioapic: device_manager.ioapic().clone(),
|
|
||||||
vm_memory: guest_memory,
|
|
||||||
cpuid,
|
|
||||||
fd,
|
|
||||||
vcpus_kill_signalled: Arc::new(AtomicBool::new(false)),
|
|
||||||
vcpus_pause_signalled: Arc::new(AtomicBool::new(false)),
|
|
||||||
threads: Vec::with_capacity(boot_vcpus as usize),
|
|
||||||
reset_evt,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running.
|
|
||||||
fn start_boot_vcpus(&mut self, entry_addr: GuestAddress) -> Result<()> {
|
|
||||||
let creation_ts = std::time::Instant::now();
|
|
||||||
|
|
||||||
let vcpu_thread_barrier = Arc::new(Barrier::new((self.boot_vcpus + 1) as usize));
|
|
||||||
|
|
||||||
for cpu_id in 0..self.boot_vcpus {
|
|
||||||
let ioapic = if let Some(ioapic) = &self.ioapic {
|
|
||||||
Some(ioapic.clone())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut vcpu = Vcpu::new(
|
|
||||||
cpu_id,
|
|
||||||
&self.fd,
|
|
||||||
self.io_bus.clone(),
|
|
||||||
self.mmio_bus.clone(),
|
|
||||||
ioapic,
|
|
||||||
creation_ts,
|
|
||||||
)?;
|
|
||||||
vcpu.configure(entry_addr, &self.vm_memory, self.cpuid.clone())?;
|
|
||||||
|
|
||||||
let vcpu_thread_barrier = vcpu_thread_barrier.clone();
|
|
||||||
|
|
||||||
let reset_evt = self.reset_evt.try_clone().unwrap();
|
|
||||||
let vcpu_kill_signalled = self.vcpus_kill_signalled.clone();
|
|
||||||
let vcpu_pause_signalled = self.vcpus_pause_signalled.clone();
|
|
||||||
self.threads.push(
|
|
||||||
thread::Builder::new()
|
|
||||||
.name(format!("vcpu{}", vcpu.id))
|
|
||||||
.spawn(move || {
|
|
||||||
unsafe {
|
|
||||||
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {
|
|
||||||
}
|
|
||||||
// This uses an async signal safe handler to kill the vcpu handles.
|
|
||||||
register_signal_handler(
|
|
||||||
VCPU_RTSIG_OFFSET,
|
|
||||||
vmm_sys_util::signal::SignalHandler::Siginfo(handle_signal),
|
|
||||||
true,
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
.expect("Failed to register vcpu signal handler");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block until all CPUs are ready.
|
|
||||||
vcpu_thread_barrier.wait();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// vcpu.run() returns false on a KVM_EXIT_SHUTDOWN (triple-fault) so trigger a reset
|
|
||||||
match vcpu.run() {
|
|
||||||
Err(e) => {
|
|
||||||
error!("VCPU generated error: {:?}", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Ok(true) => {}
|
|
||||||
Ok(false) => {
|
|
||||||
reset_evt.write(1).unwrap();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We've been told to terminate
|
|
||||||
if vcpu_kill_signalled.load(Ordering::SeqCst) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we are being told to pause, we park the thread
|
|
||||||
// until the pause boolean is toggled.
|
|
||||||
// The resume operation is responsible for toggling
|
|
||||||
// the boolean and unpark the thread.
|
|
||||||
// We enter a loop because park() could spuriously
|
|
||||||
// return. We will then park() again unless the
|
|
||||||
// pause boolean has been toggled.
|
|
||||||
while vcpu_pause_signalled.load(Ordering::SeqCst) {
|
|
||||||
thread::park();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.map_err(Error::VcpuSpawn)?,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unblock all CPU threads.
|
|
||||||
vcpu_thread_barrier.wait();
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn shutdown(&mut self) -> Result<()> {
|
|
||||||
// Tell the vCPUs to stop themselves next time they go through the loop
|
|
||||||
self.vcpus_kill_signalled.store(true, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads
|
|
||||||
// this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set
|
|
||||||
// above.
|
|
||||||
for thread in self.threads.iter() {
|
|
||||||
let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap();
|
|
||||||
unsafe {
|
|
||||||
libc::pthread_kill(thread.as_pthread_t(), signum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all the threads to finish
|
|
||||||
for thread in self.threads.drain(..) {
|
|
||||||
thread.join().map_err(|_| Error::ThreadCleanup)?
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pause(&self) -> Result<()> {
|
|
||||||
// Tell the vCPUs to pause themselves next time they exit
|
|
||||||
self.vcpus_pause_signalled.store(true, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads
|
|
||||||
// this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set
|
|
||||||
// above.
|
|
||||||
for thread in self.threads.iter() {
|
|
||||||
let signum = validate_signal_num(VCPU_RTSIG_OFFSET, true).unwrap();
|
|
||||||
unsafe {
|
|
||||||
libc::pthread_kill(thread.as_pthread_t(), signum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn resume(&self) -> Result<()> {
|
|
||||||
// Toggle the vCPUs pause boolean
|
|
||||||
self.vcpus_pause_signalled.store(false, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// Unpark all the VCPU threads.
|
|
||||||
// Once unparked, the next thing they will do is checking for the pause
|
|
||||||
// boolean. Since it'll be set to false, they will exit their pause loop
|
|
||||||
// and go back to vmx root.
|
|
||||||
for vcpu_thread in self.threads.iter() {
|
|
||||||
vcpu_thread.thread().unpark();
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Vm {
|
pub struct Vm {
|
||||||
kernel: File,
|
kernel: File,
|
||||||
memory: Arc<RwLock<GuestMemoryMmap>>,
|
memory: Arc<RwLock<GuestMemoryMmap>>,
|
||||||
@ -675,7 +205,7 @@ pub struct Vm {
|
|||||||
on_tty: bool,
|
on_tty: bool,
|
||||||
signals: Option<Signals>,
|
signals: Option<Signals>,
|
||||||
state: RwLock<VmState>,
|
state: RwLock<VmState>,
|
||||||
cpu_manager: CpuManager,
|
cpu_manager: cpu::CpuManager,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_host_cpu_phys_bits() -> u8 {
|
fn get_host_cpu_phys_bits() -> u8 {
|
||||||
@ -802,7 +332,7 @@ impl Vm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Patch tsc deadline timer bit
|
// Patch tsc deadline timer bit
|
||||||
cpuid_patches.push(CpuidPatch {
|
cpuid_patches.push(cpu::CpuidPatch {
|
||||||
function: 1,
|
function: 1,
|
||||||
index: 0,
|
index: 0,
|
||||||
flags_bit: None,
|
flags_bit: None,
|
||||||
@ -824,7 +354,7 @@ impl Vm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Patch hypervisor bit
|
// Patch hypervisor bit
|
||||||
cpuid_patches.push(CpuidPatch {
|
cpuid_patches.push(cpu::CpuidPatch {
|
||||||
function: 1,
|
function: 1,
|
||||||
index: 0,
|
index: 0,
|
||||||
flags_bit: None,
|
flags_bit: None,
|
||||||
@ -839,7 +369,7 @@ impl Vm {
|
|||||||
.get_supported_cpuid(MAX_KVM_CPUID_ENTRIES)
|
.get_supported_cpuid(MAX_KVM_CPUID_ENTRIES)
|
||||||
.map_err(Error::VmSetup)?;
|
.map_err(Error::VmSetup)?;
|
||||||
|
|
||||||
CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
|
cpu::CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
|
||||||
|
|
||||||
let ioapic = GsiApic::new(
|
let ioapic = GsiApic::new(
|
||||||
X86_64_IRQ_BASE,
|
X86_64_IRQ_BASE,
|
||||||
@ -891,7 +421,7 @@ impl Vm {
|
|||||||
let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0;
|
let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0;
|
||||||
|
|
||||||
let boot_vcpus = config.cpus.cpu_count;
|
let boot_vcpus = config.cpus.cpu_count;
|
||||||
let cpu_manager = CpuManager::new(
|
let cpu_manager = cpu::CpuManager::new(
|
||||||
boot_vcpus,
|
boot_vcpus,
|
||||||
&device_manager,
|
&device_manager,
|
||||||
guest_memory.clone(),
|
guest_memory.clone(),
|
||||||
@ -1034,7 +564,7 @@ impl Vm {
|
|||||||
signals.close();
|
signals.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.cpu_manager.shutdown()?;
|
self.cpu_manager.shutdown().map_err(Error::CpuManager)?;
|
||||||
|
|
||||||
// Wait for all the threads to finish
|
// Wait for all the threads to finish
|
||||||
for thread in self.threads.drain(..) {
|
for thread in self.threads.drain(..) {
|
||||||
@ -1051,7 +581,7 @@ impl Vm {
|
|||||||
|
|
||||||
state.valid_transition(new_state)?;
|
state.valid_transition(new_state)?;
|
||||||
|
|
||||||
self.cpu_manager.pause()?;
|
self.cpu_manager.pause().map_err(Error::CpuManager)?;
|
||||||
|
|
||||||
*state = new_state;
|
*state = new_state;
|
||||||
|
|
||||||
@ -1064,7 +594,7 @@ impl Vm {
|
|||||||
|
|
||||||
state.valid_transition(new_state)?;
|
state.valid_transition(new_state)?;
|
||||||
|
|
||||||
self.cpu_manager.resume()?;
|
self.cpu_manager.resume().map_err(Error::CpuManager)?;
|
||||||
|
|
||||||
// And we're back to the Running state.
|
// And we're back to the Running state.
|
||||||
*state = new_state;
|
*state = new_state;
|
||||||
@ -1092,7 +622,9 @@ impl Vm {
|
|||||||
|
|
||||||
let entry_addr = self.load_kernel()?;
|
let entry_addr = self.load_kernel()?;
|
||||||
|
|
||||||
self.cpu_manager.start_boot_vcpus(entry_addr)?;
|
self.cpu_manager
|
||||||
|
.start_boot_vcpus(entry_addr)
|
||||||
|
.map_err(Error::CpuManager)?;
|
||||||
|
|
||||||
if self.devices.console().input_enabled() {
|
if self.devices.console().input_enabled() {
|
||||||
let console = self.devices.console().clone();
|
let console = self.devices.console().clone();
|
||||||
|
Loading…
Reference in New Issue
Block a user