2020-02-12 03:37:33 +00:00
|
|
|
// Copyright © 2020, Oracle and/or its affiliates.
|
|
|
|
//
|
2019-05-08 10:22:53 +00:00
|
|
|
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE-BSD-3-Clause file.
|
|
|
|
//
|
2019-02-28 13:16:58 +00:00
|
|
|
// Copyright © 2019 Intel Corporation
|
|
|
|
//
|
2019-05-08 10:22:53 +00:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
|
|
|
//
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-04 09:26:43 +00:00
|
|
|
use crate::config::NumaConfig;
|
2020-04-06 15:24:46 +00:00
|
|
|
use crate::config::{
|
2021-09-16 19:47:36 +00:00
|
|
|
DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig, UserDeviceConfig,
|
|
|
|
ValidationError, VmConfig, VsockConfig,
|
2020-04-06 15:24:46 +00:00
|
|
|
};
|
2021-09-16 19:47:36 +00:00
|
|
|
use crate::cpu;
|
2021-09-10 11:27:08 +00:00
|
|
|
use crate::device_manager::{self, Console, DeviceManager, DeviceManagerError, PtyPair};
|
2020-11-30 09:06:13 +00:00
|
|
|
use crate::device_tree::DeviceTree;
|
2021-10-05 15:53:08 +00:00
|
|
|
use crate::memory_manager::{
|
|
|
|
Error as MemoryManagerError, MemoryManager, MemoryManagerSnapshotData,
|
|
|
|
};
|
2020-06-23 09:39:39 +00:00
|
|
|
use crate::migration::{get_vm_snapshot, url_to_path, VM_SNAPSHOT_FILE};
|
2020-09-09 23:33:58 +00:00
|
|
|
use crate::seccomp_filters::{get_seccomp_filter, Thread};
|
2021-08-06 23:28:42 +00:00
|
|
|
use crate::GuestMemoryMmap;
|
2020-06-11 15:27:46 +00:00
|
|
|
use crate::{
|
|
|
|
PciDeviceInfo, CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, MEMORY_MANAGER_SNAPSHOT_ID,
|
|
|
|
};
|
2019-11-22 13:54:52 +00:00
|
|
|
use anyhow::anyhow;
|
2020-10-13 07:44:37 +00:00
|
|
|
use arch::get_host_cpu_phys_bits;
|
2021-09-30 10:38:16 +00:00
|
|
|
#[cfg(all(feature = "tdx", feature = "acpi"))]
|
|
|
|
use arch::x86_64::tdx::TdVmmDataRegionType;
|
2021-05-13 13:30:59 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
2021-09-30 10:38:16 +00:00
|
|
|
use arch::x86_64::tdx::{TdVmmDataRegion, TdvfSection};
|
2020-06-02 02:29:54 +00:00
|
|
|
use arch::EntryPoint;
|
2021-08-06 23:28:42 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
|
|
|
use arch::{NumaNode, NumaNodes};
|
2021-01-12 15:10:05 +00:00
|
|
|
use devices::AcpiNotificationFlags;
|
2020-09-03 20:50:56 +00:00
|
|
|
use hypervisor::vm::{HypervisorVmError, VmmOps};
|
2019-09-27 08:39:56 +00:00
|
|
|
use linux_loader::cmdline::Cmdline;
|
2020-05-12 09:49:12 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
2020-06-25 06:20:12 +00:00
|
|
|
use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent;
|
2021-04-19 07:50:46 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
use linux_loader::loader::pe::Error::InvalidImageMagicNumber;
|
2019-02-28 13:16:58 +00:00
|
|
|
use linux_loader::loader::KernelLoader;
|
2021-08-17 03:40:11 +00:00
|
|
|
use seccompiler::{apply_filter, SeccompAction};
|
2021-01-03 09:43:10 +00:00
|
|
|
use signal_hook::{
|
|
|
|
consts::{SIGINT, SIGTERM, SIGWINCH},
|
|
|
|
iterator::backend::Handle,
|
|
|
|
iterator::Signals,
|
|
|
|
};
|
2020-10-13 07:44:37 +00:00
|
|
|
use std::cmp;
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
|
|
|
use std::collections::BTreeMap;
|
|
|
|
use std::collections::HashMap;
|
2020-03-15 17:56:07 +00:00
|
|
|
use std::convert::TryInto;
|
2020-09-03 20:50:56 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
use std::fmt;
|
2020-02-25 00:09:54 +00:00
|
|
|
use std::fs::{File, OpenOptions};
|
2020-11-04 15:00:23 +00:00
|
|
|
use std::io::{self, Read, Write};
|
2020-05-12 09:49:12 +00:00
|
|
|
use std::io::{Seek, SeekFrom};
|
2020-06-24 10:20:13 +00:00
|
|
|
use std::num::Wrapping;
|
2020-02-11 16:22:40 +00:00
|
|
|
use std::ops::Deref;
|
2021-09-08 14:35:52 +00:00
|
|
|
use std::panic::AssertUnwindSafe;
|
2019-11-11 14:31:11 +00:00
|
|
|
use std::sync::{Arc, Mutex, RwLock};
|
2019-11-11 13:55:50 +00:00
|
|
|
use std::{result, str, thread};
|
2020-09-03 20:50:56 +00:00
|
|
|
use vm_device::Bus;
|
2021-09-17 08:59:30 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
use vm_device::BusDevice;
|
2021-09-27 11:43:04 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "tdx"))]
|
|
|
|
use vm_memory::Address;
|
|
|
|
use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic};
|
2021-09-24 12:30:01 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
2021-09-27 11:43:04 +00:00
|
|
|
use vm_memory::{GuestMemory, GuestMemoryRegion};
|
2019-05-12 11:53:47 +00:00
|
|
|
use vm_migration::{
|
2021-09-27 11:43:04 +00:00
|
|
|
protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
|
|
|
|
SnapshotDataSection, Snapshottable, Transportable,
|
2019-05-12 11:53:47 +00:00
|
|
|
};
|
2019-09-04 14:28:48 +00:00
|
|
|
use vmm_sys_util::eventfd::EventFd;
|
vmm: ensure signal handlers run on the right thread
Despite setting up a dedicated thread for signal handling, we weren't
making sure that the signals we were listening for there were actually
dispatched to the right thread. While the signal-hook provides an
iterator API, so we can know that we're only processing the signals
coming out of the iterator on our signal handling thread, the actual
signal handling code from signal-hook, which pushes the signals onto
the iterator, can run on any thread. This can lead to seccomp
violations when the signal-hook signal handler does something that
isn't allowed on that thread by our seccomp policy.
To reproduce, resize a terminal running cloud-hypervisor continuously
for a few minutes. Eventually, the kernel will deliver a SIGWINCH to
a thread with a restrictive seccomp policy, and a seccomp violation
will trigger.
As part of this change, it's also necessary to allow rt_sigreturn(2)
on the signal handling thread, so signal handlers are actually allowed
to run on it. The fact that this didn't seem to be needed before
makes me think that signal handlers were almost _never_ actually
running on the signal handling thread.
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-02 19:16:45 +00:00
|
|
|
use vmm_sys_util::signal::unblock_signal;
|
2019-03-18 20:59:50 +00:00
|
|
|
use vmm_sys_util::terminal::Terminal;
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2020-09-04 11:48:08 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
2021-06-30 14:38:54 +00:00
|
|
|
use arch::aarch64::gic::gicv3_its::kvm::{KvmGicV3Its, GIC_V3_ITS_SNAPSHOT_ID};
|
2020-09-04 11:48:08 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
use arch::aarch64::gic::kvm::create_gic;
|
2021-06-01 13:53:21 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
use devices::interrupt_controller::{self, InterruptController};
|
2020-09-04 11:48:08 +00:00
|
|
|
|
2019-05-10 08:21:53 +00:00
|
|
|
/// Errors associated with VM management
|
2019-02-28 13:16:58 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum Error {
|
|
|
|
/// Cannot open the kernel image
|
|
|
|
KernelFile(io::Error),
|
|
|
|
|
2020-03-15 17:56:07 +00:00
|
|
|
/// Cannot open the initramfs image
|
|
|
|
InitramfsFile(io::Error),
|
|
|
|
|
2019-02-28 13:16:58 +00:00
|
|
|
/// Cannot load the kernel in memory
|
|
|
|
KernelLoad(linux_loader::loader::Error),
|
|
|
|
|
2021-04-19 07:50:46 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
/// Cannot load the UEFI binary in memory
|
|
|
|
UefiLoad(arch::aarch64::uefi::Error),
|
|
|
|
|
2020-03-15 17:56:07 +00:00
|
|
|
/// Cannot load the initramfs in memory
|
|
|
|
InitramfsLoad,
|
|
|
|
|
2019-02-28 13:16:58 +00:00
|
|
|
/// Cannot load the command line in memory
|
2020-01-24 08:34:51 +00:00
|
|
|
LoadCmdLine(linux_loader::loader::Error),
|
|
|
|
|
|
|
|
/// Cannot modify the command line
|
|
|
|
CmdLineInsertStr(linux_loader::cmdline::Error),
|
|
|
|
|
|
|
|
/// Cannot configure system
|
|
|
|
ConfigureSystem(arch::Error),
|
2019-02-28 14:26:30 +00:00
|
|
|
|
2020-06-09 10:28:02 +00:00
|
|
|
/// Cannot enable interrupt controller
|
2021-06-01 13:53:21 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
EnableInterruptController(interrupt_controller::Error),
|
2020-06-09 10:28:02 +00:00
|
|
|
|
2019-10-01 08:14:08 +00:00
|
|
|
PoisonedState,
|
|
|
|
|
2019-05-14 01:12:40 +00:00
|
|
|
/// Cannot create a device manager.
|
|
|
|
DeviceManager(DeviceManagerError),
|
|
|
|
|
2019-09-06 15:42:41 +00:00
|
|
|
/// Write to the console failed.
|
2019-08-02 14:23:52 +00:00
|
|
|
Console(vmm_sys_util::errno::Error),
|
2019-07-22 19:29:02 +00:00
|
|
|
|
2021-01-14 03:03:53 +00:00
|
|
|
/// Write to the pty console failed.
|
|
|
|
PtyConsole(io::Error),
|
|
|
|
|
2019-05-07 18:34:03 +00:00
|
|
|
/// Cannot setup terminal in raw mode.
|
2019-08-02 14:23:52 +00:00
|
|
|
SetTerminalRaw(vmm_sys_util::errno::Error),
|
2019-05-07 18:34:03 +00:00
|
|
|
|
|
|
|
/// Cannot setup terminal in canonical mode.
|
2019-08-02 14:23:52 +00:00
|
|
|
SetTerminalCanon(vmm_sys_util::errno::Error),
|
2019-05-07 18:34:03 +00:00
|
|
|
|
2019-06-10 09:14:02 +00:00
|
|
|
/// Memory is overflow
|
|
|
|
MemOverflow,
|
2019-05-22 20:06:49 +00:00
|
|
|
|
2019-08-30 17:24:01 +00:00
|
|
|
/// Cannot spawn a signal handler thread
|
|
|
|
SignalHandlerSpawn(io::Error),
|
2019-09-03 09:00:15 +00:00
|
|
|
|
|
|
|
/// Failed to join on vCPU threads
|
2020-01-24 08:34:51 +00:00
|
|
|
ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
|
2019-09-23 23:12:07 +00:00
|
|
|
|
2021-06-24 13:29:08 +00:00
|
|
|
/// VM config is missing.
|
|
|
|
VmMissingConfig,
|
|
|
|
|
2019-10-01 14:41:50 +00:00
|
|
|
/// VM is not created
|
|
|
|
VmNotCreated,
|
|
|
|
|
2020-02-25 23:03:06 +00:00
|
|
|
/// VM is already created
|
|
|
|
VmAlreadyCreated,
|
|
|
|
|
2019-10-10 16:00:44 +00:00
|
|
|
/// VM is not running
|
|
|
|
VmNotRunning,
|
2019-10-01 15:03:38 +00:00
|
|
|
|
2019-10-01 14:41:50 +00:00
|
|
|
/// Cannot clone EventFd.
|
|
|
|
EventFdClone(io::Error),
|
2019-10-11 12:47:57 +00:00
|
|
|
|
|
|
|
/// Invalid VM state transition
|
|
|
|
InvalidStateTransition(VmState, VmState),
|
2019-08-14 12:10:29 +00:00
|
|
|
|
2019-11-11 13:55:50 +00:00
|
|
|
/// Error from CPU handling
|
|
|
|
CpuManager(cpu::Error),
|
2019-12-05 15:24:26 +00:00
|
|
|
|
2019-11-18 23:24:31 +00:00
|
|
|
/// Cannot pause devices
|
|
|
|
PauseDevices(MigratableError),
|
|
|
|
|
|
|
|
/// Cannot resume devices
|
|
|
|
ResumeDevices(MigratableError),
|
2019-11-21 18:04:08 +00:00
|
|
|
|
|
|
|
/// Cannot pause CPUs
|
|
|
|
PauseCpus(MigratableError),
|
|
|
|
|
|
|
|
/// Cannot resume cpus
|
|
|
|
ResumeCpus(MigratableError),
|
2019-11-22 13:54:52 +00:00
|
|
|
|
|
|
|
/// Cannot pause VM
|
|
|
|
Pause(MigratableError),
|
|
|
|
|
|
|
|
/// Cannot resume VM
|
|
|
|
Resume(MigratableError),
|
2019-12-19 15:47:36 +00:00
|
|
|
|
|
|
|
/// Memory manager error
|
|
|
|
MemoryManager(MemoryManagerError),
|
2020-02-28 11:29:43 +00:00
|
|
|
|
2020-03-04 02:16:07 +00:00
|
|
|
/// Eventfd write error
|
|
|
|
EventfdError(std::io::Error),
|
2019-11-24 17:39:06 +00:00
|
|
|
|
|
|
|
/// Cannot snapshot VM
|
|
|
|
Snapshot(MigratableError),
|
|
|
|
|
2020-02-25 23:03:06 +00:00
|
|
|
/// Cannot restore VM
|
|
|
|
Restore(MigratableError),
|
|
|
|
|
2019-11-24 17:39:06 +00:00
|
|
|
/// Cannot send VM snapshot
|
|
|
|
SnapshotSend(MigratableError),
|
2020-04-07 12:50:19 +00:00
|
|
|
|
|
|
|
/// Cannot convert source URL from Path into &str
|
|
|
|
RestoreSourceUrlPathToStr,
|
2020-04-06 15:24:46 +00:00
|
|
|
|
|
|
|
/// Failed to validate config
|
|
|
|
ConfigValidation(ValidationError),
|
2020-04-28 15:02:46 +00:00
|
|
|
|
|
|
|
/// No more that one virtio-vsock device
|
|
|
|
TooManyVsockDevices,
|
2020-06-11 15:40:27 +00:00
|
|
|
|
|
|
|
/// Failed serializing into JSON
|
|
|
|
SerializeJson(serde_json::Error),
|
2020-08-31 13:05:49 +00:00
|
|
|
|
|
|
|
/// Invalid configuration for NUMA.
|
|
|
|
InvalidNumaConfig,
|
2020-09-09 23:33:58 +00:00
|
|
|
|
|
|
|
/// Cannot create seccomp filter
|
2021-08-17 03:40:11 +00:00
|
|
|
CreateSeccompFilter(seccompiler::Error),
|
2020-09-09 23:33:58 +00:00
|
|
|
|
|
|
|
/// Cannot apply seccomp filter
|
2021-08-17 03:40:11 +00:00
|
|
|
ApplySeccompFilter(seccompiler::Error),
|
2020-09-14 21:50:30 +00:00
|
|
|
|
|
|
|
/// Failed resizing a memory zone.
|
|
|
|
ResizeZone,
|
2020-09-03 20:50:56 +00:00
|
|
|
|
2020-11-09 13:29:05 +00:00
|
|
|
/// Cannot activate virtio devices
|
|
|
|
ActivateVirtioDevices(device_manager::DeviceManagerError),
|
2021-01-13 10:04:33 +00:00
|
|
|
|
|
|
|
/// Power button not supported
|
|
|
|
PowerButtonNotSupported,
|
|
|
|
|
|
|
|
/// Error triggering power button
|
|
|
|
PowerButton(device_manager::DeviceManagerError),
|
2021-02-08 17:27:47 +00:00
|
|
|
|
2021-04-29 16:11:32 +00:00
|
|
|
/// Kernel lacks PVH header
|
|
|
|
KernelMissingPvhHeader,
|
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
/// Error doing I/O on TDX firmware file
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
LoadTdvf(std::io::Error),
|
|
|
|
|
|
|
|
/// Error parsing TDVF
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
ParseTdvf(arch::x86_64::tdx::TdvfError),
|
|
|
|
|
|
|
|
/// Error populating HOB
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
PopulateHob(arch::x86_64::tdx::TdvfError),
|
|
|
|
|
|
|
|
/// Error allocating TDVF memory
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
AllocatingTdvfMemory(crate::memory_manager::Error),
|
|
|
|
|
|
|
|
/// Error enabling TDX VM
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-03-25 17:01:21 +00:00
|
|
|
InitializeTdxVm(hypervisor::HypervisorVmError),
|
2021-02-08 17:27:47 +00:00
|
|
|
|
|
|
|
/// Error enabling TDX memory region
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-03-25 17:01:21 +00:00
|
|
|
InitializeTdxMemoryRegion(hypervisor::HypervisorVmError),
|
2021-02-08 17:27:47 +00:00
|
|
|
|
|
|
|
/// Error finalizing TDX setup
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-03-25 17:01:21 +00:00
|
|
|
FinalizeTdx(hypervisor::HypervisorVmError),
|
2019-02-28 14:26:30 +00:00
|
|
|
}
|
2019-11-11 13:55:50 +00:00
|
|
|
pub type Result<T> = result::Result<T, Error>;
|
2019-02-28 14:26:30 +00:00
|
|
|
|
2019-10-11 12:47:57 +00:00
|
|
|
#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq)]
|
2019-10-01 08:14:08 +00:00
|
|
|
pub enum VmState {
|
|
|
|
Created,
|
2019-10-10 16:00:44 +00:00
|
|
|
Running,
|
2019-10-01 08:14:08 +00:00
|
|
|
Shutdown,
|
2019-10-10 15:16:58 +00:00
|
|
|
Paused,
|
2019-10-01 08:14:08 +00:00
|
|
|
}
|
|
|
|
|
2019-10-11 12:47:57 +00:00
|
|
|
impl VmState {
|
|
|
|
fn valid_transition(self, new_state: VmState) -> Result<()> {
|
|
|
|
match self {
|
|
|
|
VmState::Created => match new_state {
|
2020-06-22 13:35:27 +00:00
|
|
|
VmState::Created | VmState::Shutdown => {
|
2019-10-11 12:47:57 +00:00
|
|
|
Err(Error::InvalidStateTransition(self, new_state))
|
|
|
|
}
|
2020-06-22 13:35:27 +00:00
|
|
|
VmState::Running | VmState::Paused => Ok(()),
|
2019-10-11 12:47:57 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
VmState::Running => match new_state {
|
|
|
|
VmState::Created | VmState::Running => {
|
|
|
|
Err(Error::InvalidStateTransition(self, new_state))
|
|
|
|
}
|
|
|
|
VmState::Paused | VmState::Shutdown => Ok(()),
|
|
|
|
},
|
|
|
|
|
|
|
|
VmState::Shutdown => match new_state {
|
|
|
|
VmState::Paused | VmState::Created | VmState::Shutdown => {
|
|
|
|
Err(Error::InvalidStateTransition(self, new_state))
|
|
|
|
}
|
|
|
|
VmState::Running => Ok(()),
|
|
|
|
},
|
|
|
|
|
|
|
|
VmState::Paused => match new_state {
|
|
|
|
VmState::Created | VmState::Paused => {
|
|
|
|
Err(Error::InvalidStateTransition(self, new_state))
|
|
|
|
}
|
|
|
|
VmState::Running | VmState::Shutdown => Ok(()),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-03 20:50:56 +00:00
|
|
|
// Debug I/O port
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
const DEBUG_IOPORT: u16 = 0x80;
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port";
|
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
/// Debug I/O port, see:
|
|
|
|
/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html
|
|
|
|
///
|
|
|
|
/// Since we're not a physical platform, we can freely assign code ranges for
|
|
|
|
/// debugging specific parts of our virtual platform.
|
|
|
|
pub enum DebugIoPortRange {
|
|
|
|
Firmware,
|
|
|
|
Bootloader,
|
|
|
|
Kernel,
|
|
|
|
Userspace,
|
|
|
|
Custom,
|
|
|
|
}
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
impl DebugIoPortRange {
|
|
|
|
fn from_u8(value: u8) -> DebugIoPortRange {
|
|
|
|
match value {
|
|
|
|
0x00..=0x1f => DebugIoPortRange::Firmware,
|
|
|
|
0x20..=0x3f => DebugIoPortRange::Bootloader,
|
|
|
|
0x40..=0x5f => DebugIoPortRange::Kernel,
|
|
|
|
0x60..=0x7f => DebugIoPortRange::Userspace,
|
|
|
|
_ => DebugIoPortRange::Custom,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
impl fmt::Display for DebugIoPortRange {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
match self {
|
|
|
|
DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX),
|
|
|
|
DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX),
|
|
|
|
DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX),
|
|
|
|
DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX),
|
|
|
|
DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct VmOps {
|
|
|
|
memory: GuestMemoryAtomic<GuestMemoryMmap>,
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
io_bus: Arc<Bus>,
|
|
|
|
mmio_bus: Arc<Bus>,
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
timestamp: std::time::Instant,
|
2021-09-17 08:59:30 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
pci_config_io: Arc<Mutex<dyn BusDevice>>,
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl VmOps {
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
// Log debug io port codes.
|
|
|
|
fn log_debug_ioport(&self, code: u8) {
|
|
|
|
let elapsed = self.timestamp.elapsed();
|
|
|
|
|
2021-10-26 15:11:32 +00:00
|
|
|
info!(
|
2020-09-03 20:50:56 +00:00
|
|
|
"[{} code 0x{:x}] {}.{:>06} seconds",
|
|
|
|
DebugIoPortRange::from_u8(code),
|
|
|
|
code,
|
|
|
|
elapsed.as_secs(),
|
|
|
|
elapsed.as_micros()
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl VmmOps for VmOps {
|
2021-01-11 17:38:48 +00:00
|
|
|
fn guest_mem_write(&self, gpa: u64, buf: &[u8]) -> hypervisor::vm::Result<usize> {
|
2020-09-03 20:50:56 +00:00
|
|
|
self.memory
|
|
|
|
.memory()
|
|
|
|
.write(buf, GuestAddress(gpa))
|
|
|
|
.map_err(|e| HypervisorVmError::GuestMemWrite(e.into()))
|
|
|
|
}
|
|
|
|
|
2021-01-11 17:38:48 +00:00
|
|
|
fn guest_mem_read(&self, gpa: u64, buf: &mut [u8]) -> hypervisor::vm::Result<usize> {
|
2020-09-03 20:50:56 +00:00
|
|
|
self.memory
|
|
|
|
.memory()
|
|
|
|
.read(buf, GuestAddress(gpa))
|
|
|
|
.map_err(|e| HypervisorVmError::GuestMemRead(e.into()))
|
|
|
|
}
|
|
|
|
|
2021-01-11 17:38:48 +00:00
|
|
|
fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> {
|
2021-02-11 16:21:45 +00:00
|
|
|
if let Err(vm_device::BusError::MissingAddressRange) = self.mmio_bus.read(gpa, data) {
|
|
|
|
warn!("Guest MMIO read to unregistered address 0x{:x}", gpa);
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-01-11 17:38:48 +00:00
|
|
|
fn mmio_write(&self, gpa: u64, data: &[u8]) -> hypervisor::vm::Result<()> {
|
|
|
|
match self.mmio_bus.write(gpa, data) {
|
2021-02-11 16:21:45 +00:00
|
|
|
Err(vm_device::BusError::MissingAddressRange) => {
|
|
|
|
warn!("Guest MMIO write to unregistered address 0x{:x}", gpa);
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
2020-12-04 10:17:51 +00:00
|
|
|
Ok(Some(barrier)) => {
|
|
|
|
info!("Waiting for barrier");
|
|
|
|
barrier.wait();
|
|
|
|
info!("Barrier released");
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
};
|
2020-09-03 20:50:56 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
2021-01-11 17:38:48 +00:00
|
|
|
fn pio_read(&self, port: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> {
|
2021-09-17 08:59:30 +00:00
|
|
|
use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE};
|
|
|
|
|
|
|
|
if (PCI_CONFIG_IO_PORT..(PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE)).contains(&port) {
|
|
|
|
self.pci_config_io.lock().unwrap().read(
|
|
|
|
PCI_CONFIG_IO_PORT,
|
|
|
|
port - PCI_CONFIG_IO_PORT,
|
|
|
|
data,
|
|
|
|
);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2021-02-11 16:21:45 +00:00
|
|
|
if let Err(vm_device::BusError::MissingAddressRange) = self.io_bus.read(port, data) {
|
|
|
|
warn!("Guest PIO read to unregistered address 0x{:x}", port);
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
2021-01-11 17:38:48 +00:00
|
|
|
fn pio_write(&self, port: u64, data: &[u8]) -> hypervisor::vm::Result<()> {
|
2021-09-17 08:59:30 +00:00
|
|
|
use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE};
|
|
|
|
|
2021-01-11 17:38:48 +00:00
|
|
|
if port == DEBUG_IOPORT as u64 && data.len() == 1 {
|
2020-09-03 20:50:56 +00:00
|
|
|
self.log_debug_ioport(data[0]);
|
2020-12-03 14:49:32 +00:00
|
|
|
return Ok(());
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
|
|
|
|
2021-09-17 08:59:30 +00:00
|
|
|
if (PCI_CONFIG_IO_PORT..(PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE)).contains(&port) {
|
|
|
|
self.pci_config_io.lock().unwrap().write(
|
|
|
|
PCI_CONFIG_IO_PORT,
|
|
|
|
port - PCI_CONFIG_IO_PORT,
|
|
|
|
data,
|
|
|
|
);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2021-01-11 17:38:48 +00:00
|
|
|
match self.io_bus.write(port, data) {
|
2021-02-11 16:21:45 +00:00
|
|
|
Err(vm_device::BusError::MissingAddressRange) => {
|
|
|
|
warn!("Guest PIO write to unregistered address 0x{:x}", port);
|
2020-09-03 20:50:56 +00:00
|
|
|
}
|
2020-12-04 10:17:51 +00:00
|
|
|
Ok(Some(barrier)) => {
|
|
|
|
info!("Waiting for barrier");
|
|
|
|
barrier.wait();
|
|
|
|
info!("Barrier released");
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
};
|
2020-09-03 20:50:56 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-01 09:54:27 +00:00
|
|
|
pub fn physical_bits(max_phys_bits: u8) -> u8 {
|
2020-10-13 07:44:37 +00:00
|
|
|
let host_phys_bits = get_host_cpu_phys_bits();
|
2021-07-20 08:45:59 +00:00
|
|
|
|
2021-10-01 09:54:27 +00:00
|
|
|
cmp::min(host_phys_bits, max_phys_bits)
|
2020-10-13 07:44:37 +00:00
|
|
|
}
|
|
|
|
|
vmm: ensure signal handlers run on the right thread
Despite setting up a dedicated thread for signal handling, we weren't
making sure that the signals we were listening for there were actually
dispatched to the right thread. While the signal-hook provides an
iterator API, so we can know that we're only processing the signals
coming out of the iterator on our signal handling thread, the actual
signal handling code from signal-hook, which pushes the signals onto
the iterator, can run on any thread. This can lead to seccomp
violations when the signal-hook signal handler does something that
isn't allowed on that thread by our seccomp policy.
To reproduce, resize a terminal running cloud-hypervisor continuously
for a few minutes. Eventually, the kernel will deliver a SIGWINCH to
a thread with a restrictive seccomp policy, and a seccomp violation
will trigger.
As part of this change, it's also necessary to allow rt_sigreturn(2)
on the signal handling thread, so signal handlers are actually allowed
to run on it. The fact that this didn't seem to be needed before
makes me think that signal handlers were almost _never_ actually
running on the signal handling thread.
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-02 19:16:45 +00:00
|
|
|
pub const HANDLED_SIGNALS: [i32; 3] = [SIGWINCH, SIGTERM, SIGINT];
|
|
|
|
|
2019-11-07 12:38:09 +00:00
|
|
|
pub struct Vm {
|
2021-02-08 16:06:27 +00:00
|
|
|
kernel: Option<File>,
|
2020-03-15 17:56:07 +00:00
|
|
|
initramfs: Option<File>,
|
2019-09-04 11:59:14 +00:00
|
|
|
threads: Vec<thread::JoinHandle<()>>,
|
2020-03-04 13:39:15 +00:00
|
|
|
device_manager: Arc<Mutex<DeviceManager>>,
|
2019-12-05 14:50:38 +00:00
|
|
|
config: Arc<Mutex<VmConfig>>,
|
2019-05-30 15:17:57 +00:00
|
|
|
on_tty: bool,
|
2020-11-23 10:00:57 +00:00
|
|
|
signals: Option<Handle>,
|
2019-10-01 08:14:08 +00:00
|
|
|
state: RwLock<VmState>,
|
2019-11-11 14:31:11 +00:00
|
|
|
cpu_manager: Arc<Mutex<cpu::CpuManager>>,
|
2019-12-19 15:47:36 +00:00
|
|
|
memory_manager: Arc<Mutex<MemoryManager>>,
|
2020-07-28 17:01:48 +00:00
|
|
|
#[cfg_attr(not(feature = "kvm"), allow(dead_code))]
|
2020-07-03 09:10:50 +00:00
|
|
|
// The hypervisor abstracted virtual machine.
|
|
|
|
vm: Arc<dyn hypervisor::Vm>,
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
saved_clock: Option<hypervisor::ClockData>,
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-04 09:26:43 +00:00
|
|
|
numa_nodes: NumaNodes,
|
2020-09-09 23:33:58 +00:00
|
|
|
seccomp_action: SeccompAction,
|
2020-09-30 14:58:06 +00:00
|
|
|
exit_evt: EventFd,
|
2021-07-20 22:18:33 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
2019-09-18 14:36:14 +00:00
|
|
|
}
|
|
|
|
|
2019-09-24 14:00:00 +00:00
|
|
|
impl Vm {
|
2020-06-23 09:39:39 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2020-03-16 23:39:50 +00:00
|
|
|
fn new_from_memory_manager(
|
2020-03-16 14:50:06 +00:00
|
|
|
config: Arc<Mutex<VmConfig>>,
|
2020-03-16 23:39:50 +00:00
|
|
|
memory_manager: Arc<Mutex<MemoryManager>>,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm: Arc<dyn hypervisor::Vm>,
|
2020-03-16 14:50:06 +00:00
|
|
|
exit_evt: EventFd,
|
|
|
|
reset_evt: EventFd,
|
2020-08-04 02:45:53 +00:00
|
|
|
seccomp_action: &SeccompAction,
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
2021-05-20 06:47:45 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))] _saved_clock: Option<
|
|
|
|
hypervisor::ClockData,
|
|
|
|
>,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt: EventFd,
|
2021-08-06 13:39:09 +00:00
|
|
|
restoring: bool,
|
2020-03-16 14:50:06 +00:00
|
|
|
) -> Result<Self> {
|
2020-04-06 15:24:46 +00:00
|
|
|
config
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.validate()
|
|
|
|
.map_err(Error::ConfigValidation)?;
|
|
|
|
|
2021-05-04 09:33:55 +00:00
|
|
|
info!("Booting VM from config: {:?}", &config);
|
|
|
|
|
2020-09-11 14:15:29 +00:00
|
|
|
// Create NUMA nodes based on NumaConfig.
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-11 14:15:29 +00:00
|
|
|
let numa_nodes =
|
|
|
|
Self::create_numa_nodes(config.lock().unwrap().numa.clone(), &memory_manager)?;
|
|
|
|
|
2021-07-20 07:59:32 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let force_iommu = config.lock().unwrap().tdx.is_some();
|
|
|
|
#[cfg(not(feature = "tdx"))]
|
|
|
|
let force_iommu = false;
|
|
|
|
|
2019-12-20 15:17:49 +00:00
|
|
|
let device_manager = DeviceManager::new(
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.clone(),
|
2020-01-31 16:23:49 +00:00
|
|
|
config.clone(),
|
2019-12-20 15:17:49 +00:00
|
|
|
memory_manager.clone(),
|
|
|
|
&exit_evt,
|
|
|
|
&reset_evt,
|
2020-08-04 02:45:53 +00:00
|
|
|
seccomp_action.clone(),
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-11 14:15:29 +00:00
|
|
|
numa_nodes.clone(),
|
2020-11-09 13:29:05 +00:00
|
|
|
&activate_evt,
|
2021-07-20 07:59:32 +00:00
|
|
|
force_iommu,
|
2021-08-06 13:39:09 +00:00
|
|
|
restoring,
|
2019-12-20 15:17:49 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::DeviceManager)?;
|
2019-03-07 13:56:43 +00:00
|
|
|
|
2020-09-03 20:50:56 +00:00
|
|
|
let memory = memory_manager.lock().unwrap().guest_memory();
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
let io_bus = Arc::clone(device_manager.lock().unwrap().io_bus());
|
|
|
|
let mmio_bus = Arc::clone(device_manager.lock().unwrap().mmio_bus());
|
|
|
|
// Create the VmOps structure, which implements the VmmOps trait.
|
|
|
|
// And send it to the hypervisor.
|
2021-09-17 08:59:30 +00:00
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
let pci_config_io =
|
|
|
|
device_manager.lock().unwrap().pci_config_io() as Arc<Mutex<dyn BusDevice>>;
|
2021-07-29 09:15:10 +00:00
|
|
|
let vm_ops: Arc<dyn VmmOps> = Arc::new(VmOps {
|
2020-09-03 20:50:56 +00:00
|
|
|
memory,
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
io_bus,
|
|
|
|
mmio_bus,
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
timestamp: std::time::Instant::now(),
|
2021-09-17 08:59:30 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
pci_config_io,
|
2021-07-29 09:15:10 +00:00
|
|
|
});
|
2020-09-03 20:50:56 +00:00
|
|
|
|
2020-10-30 13:34:16 +00:00
|
|
|
let exit_evt_clone = exit_evt.try_clone().map_err(Error::EventFdClone)?;
|
2021-06-03 13:27:46 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let tdx_enabled = config.lock().unwrap().tdx.is_some();
|
2019-11-11 13:55:50 +00:00
|
|
|
let cpu_manager = cpu::CpuManager::new(
|
2020-03-16 17:14:15 +00:00
|
|
|
&config.lock().unwrap().cpus.clone(),
|
2019-11-07 12:38:09 +00:00
|
|
|
&device_manager,
|
2020-07-08 13:12:27 +00:00
|
|
|
&memory_manager,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.clone(),
|
2020-10-30 13:34:16 +00:00
|
|
|
exit_evt_clone,
|
2019-11-07 12:38:09 +00:00
|
|
|
reset_evt,
|
2021-07-20 22:18:33 +00:00
|
|
|
hypervisor.clone(),
|
2020-09-09 22:15:26 +00:00
|
|
|
seccomp_action.clone(),
|
2020-11-18 16:37:52 +00:00
|
|
|
vm_ops,
|
2021-06-01 15:11:49 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
2021-06-03 13:27:46 +00:00
|
|
|
tdx_enabled,
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2021-06-17 10:41:30 +00:00
|
|
|
&numa_nodes,
|
2019-11-11 14:56:10 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::CpuManager)?;
|
2019-11-07 12:38:09 +00:00
|
|
|
|
2020-03-16 17:14:15 +00:00
|
|
|
let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0;
|
2021-02-08 16:06:27 +00:00
|
|
|
let kernel = config
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.kernel
|
|
|
|
.as_ref()
|
|
|
|
.map(|k| File::open(&k.path))
|
|
|
|
.transpose()
|
2020-03-16 17:15:55 +00:00
|
|
|
.map_err(Error::KernelFile)?;
|
|
|
|
|
|
|
|
let initramfs = config
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.initramfs
|
|
|
|
.as_ref()
|
|
|
|
.map(|i| File::open(&i.path))
|
|
|
|
.transpose()
|
|
|
|
.map_err(Error::InitramfsFile)?;
|
|
|
|
|
2019-11-07 12:38:09 +00:00
|
|
|
Ok(Vm {
|
2019-02-28 13:16:58 +00:00
|
|
|
kernel,
|
2020-03-15 17:56:07 +00:00
|
|
|
initramfs,
|
2020-03-04 13:39:15 +00:00
|
|
|
device_manager,
|
2019-03-07 13:56:43 +00:00
|
|
|
config,
|
2019-05-30 15:17:57 +00:00
|
|
|
on_tty,
|
2019-11-07 12:38:09 +00:00
|
|
|
threads: Vec::with_capacity(1),
|
2019-09-09 12:43:03 +00:00
|
|
|
signals: None,
|
2019-10-01 08:14:08 +00:00
|
|
|
state: RwLock::new(VmState::Created),
|
2019-11-07 12:38:09 +00:00
|
|
|
cpu_manager,
|
2019-12-19 15:47:36 +00:00
|
|
|
memory_manager,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm,
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
saved_clock: _saved_clock,
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-04 09:26:43 +00:00
|
|
|
numa_nodes,
|
2020-09-09 23:33:58 +00:00
|
|
|
seccomp_action: seccomp_action.clone(),
|
2020-09-30 14:58:06 +00:00
|
|
|
exit_evt,
|
2021-07-20 22:18:33 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
hypervisor,
|
2019-02-28 13:16:58 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-08-05 10:01:35 +00:00
|
|
|
#[cfg(any(target_arch = "aarch64", feature = "acpi"))]
|
2020-09-04 09:26:43 +00:00
|
|
|
fn create_numa_nodes(
|
|
|
|
configs: Option<Vec<NumaConfig>>,
|
2020-08-28 17:18:33 +00:00
|
|
|
memory_manager: &Arc<Mutex<MemoryManager>>,
|
2020-09-04 09:26:43 +00:00
|
|
|
) -> Result<NumaNodes> {
|
|
|
|
let mm = memory_manager.lock().unwrap();
|
|
|
|
let mm_zones = mm.memory_zones();
|
|
|
|
let mut numa_nodes = BTreeMap::new();
|
|
|
|
|
|
|
|
if let Some(configs) = &configs {
|
|
|
|
for config in configs.iter() {
|
2020-09-04 15:47:34 +00:00
|
|
|
if numa_nodes.contains_key(&config.guest_numa_id) {
|
2020-09-04 09:26:43 +00:00
|
|
|
error!("Can't define twice the same NUMA node");
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
|
2020-09-11 14:15:29 +00:00
|
|
|
let mut node = NumaNode::default();
|
2020-09-04 09:26:43 +00:00
|
|
|
|
|
|
|
if let Some(memory_zones) = &config.memory_zones {
|
|
|
|
for memory_zone in memory_zones.iter() {
|
|
|
|
if let Some(mm_zone) = mm_zones.get(memory_zone) {
|
2020-09-10 08:08:02 +00:00
|
|
|
node.memory_regions.extend(mm_zone.regions().clone());
|
2020-09-21 09:59:15 +00:00
|
|
|
if let Some(virtiomem_zone) = mm_zone.virtio_mem_zone() {
|
|
|
|
node.hotplug_regions.push(virtiomem_zone.region().clone());
|
|
|
|
}
|
2020-09-11 14:15:29 +00:00
|
|
|
node.memory_zones.push(memory_zone.clone());
|
2020-09-04 09:26:43 +00:00
|
|
|
} else {
|
|
|
|
error!("Unknown memory zone '{}'", memory_zone);
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-08-28 17:18:33 +00:00
|
|
|
|
2020-08-31 13:05:49 +00:00
|
|
|
if let Some(cpus) = &config.cpus {
|
2020-09-04 09:26:43 +00:00
|
|
|
node.cpus.extend(cpus);
|
2020-08-28 17:18:33 +00:00
|
|
|
}
|
2020-08-31 13:05:49 +00:00
|
|
|
|
|
|
|
if let Some(distances) = &config.distances {
|
|
|
|
for distance in distances.iter() {
|
|
|
|
let dest = distance.destination;
|
|
|
|
let dist = distance.distance;
|
|
|
|
|
2021-06-23 22:05:38 +00:00
|
|
|
if !configs.iter().any(|cfg| cfg.guest_numa_id == dest) {
|
2020-08-31 13:05:49 +00:00
|
|
|
error!("Unknown destination NUMA node {}", dest);
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
|
2020-09-04 09:26:43 +00:00
|
|
|
if node.distances.contains_key(&dest) {
|
|
|
|
error!("Destination NUMA node {} has been already set", dest);
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
|
|
|
|
node.distances.insert(dest, dist);
|
2020-08-31 13:05:49 +00:00
|
|
|
}
|
|
|
|
}
|
2020-09-04 09:26:43 +00:00
|
|
|
|
2021-07-09 09:22:35 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
if let Some(sgx_epc_sections) = &config.sgx_epc_sections {
|
|
|
|
if let Some(sgx_epc_region) = mm.sgx_epc_region() {
|
|
|
|
let mm_sections = sgx_epc_region.epc_sections();
|
|
|
|
for sgx_epc_section in sgx_epc_sections.iter() {
|
|
|
|
if let Some(mm_section) = mm_sections.get(sgx_epc_section) {
|
|
|
|
node.sgx_epc_sections.push(mm_section.clone());
|
|
|
|
} else {
|
|
|
|
error!("Unknown SGX EPC section '{}'", sgx_epc_section);
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error!("Missing SGX EPC region");
|
|
|
|
return Err(Error::InvalidNumaConfig);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-04 15:47:34 +00:00
|
|
|
numa_nodes.insert(config.guest_numa_id, node);
|
2020-08-28 17:18:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-04 09:26:43 +00:00
|
|
|
Ok(numa_nodes)
|
2020-08-28 17:18:33 +00:00
|
|
|
}
|
|
|
|
|
2021-03-04 23:34:45 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2020-03-16 23:39:50 +00:00
|
|
|
pub fn new(
|
|
|
|
config: Arc<Mutex<VmConfig>>,
|
|
|
|
exit_evt: EventFd,
|
|
|
|
reset_evt: EventFd,
|
2020-07-30 22:33:52 +00:00
|
|
|
seccomp_action: &SeccompAction,
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt: EventFd,
|
2021-03-04 23:34:45 +00:00
|
|
|
serial_pty: Option<PtyPair>,
|
|
|
|
console_pty: Option<PtyPair>,
|
vmm: notify virtio-console of pty resizes
When a pty is resized (using the TIOCSWINSZ ioctl -- see ioctl_tty(2)),
the kernel will send a SIGWINCH signal to the pty's foreground process
group to notify it of the resize. This is the only way to be notified
by the kernel of a pty resize.
We can't just make the cloud-hypervisor process's process group the
foreground process group though, because a process can only set the
foreground process group of its controlling terminal, and
cloud-hypervisor's controlling terminal will often be the terminal the
user is running it in. To work around this, we fork a subprocess in a
new process group, and set its process group to be the foreground
process group of the pty. The subprocess additionally must be running
in a new session so that it can have a different controlling
terminal. This subprocess writes a byte to a pipe every time the pty
is resized, and the virtio-console device can listen for this in its
epoll loop.
Alternatives I considered were to have the subprocess just send
SIGWINCH to its parent, and to use an eventfd instead of a pipe.
I decided against the signal approach because re-purposing a signal
that has a very specific meaning (even if this use was only slightly
different to its normal meaning) felt unclean, and because it would
have required using pidfds to avoid race conditions if
cloud-hypervisor had terminated, which added complexity. I decided
against using an eventfd because using a pipe instead allows the child
to be notified (via poll(2)) when nothing is reading from the pipe any
more, meaning it can be reliably notified of parent death and
terminate itself immediately.
I used clone3(2) instead of fork(2) because without
CLONE_CLEAR_SIGHAND the subprocess would inherit signal-hook's signal
handlers, and there's no other straightforward way to restore all signal
handlers to their defaults in the child process. The only way to do
it would be to iterate through all possible signals, or maintain a
global list of monitored signals ourselves (vmm:vm::HANDLED_SIGNALS is
insufficient because it doesn't take into account e.g. the SIGSYS
signal handler that catches seccomp violations).
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-10 11:12:17 +00:00
|
|
|
console_resize_pipe: Option<File>,
|
2020-03-16 23:39:50 +00:00
|
|
|
) -> Result<Self> {
|
2021-02-08 17:27:47 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let tdx_enabled = config.lock().unwrap().tdx.is_some();
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor.check_required_extensions().unwrap();
|
2021-02-08 17:27:47 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let vm = hypervisor
|
|
|
|
.create_vm_with_type(if tdx_enabled {
|
|
|
|
2 // KVM_X86_TDX_VM
|
|
|
|
} else {
|
|
|
|
0 // KVM_X86_LEGACY_VM
|
|
|
|
})
|
|
|
|
.unwrap();
|
|
|
|
#[cfg(not(feature = "tdx"))]
|
2020-07-03 09:10:50 +00:00
|
|
|
let vm = hypervisor.create_vm().unwrap();
|
2021-02-08 17:27:47 +00:00
|
|
|
|
2020-06-02 02:29:54 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.enable_split_irq().unwrap();
|
2021-10-01 09:54:27 +00:00
|
|
|
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
|
2021-10-29 08:30:01 +00:00
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
let sgx_epc_config = config.lock().unwrap().sgx_epc.clone();
|
|
|
|
|
2020-04-07 11:33:18 +00:00
|
|
|
let memory_manager = MemoryManager::new(
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.clone(),
|
2020-04-07 11:33:18 +00:00
|
|
|
&config.lock().unwrap().memory.clone(),
|
2021-09-29 04:54:22 +00:00
|
|
|
None,
|
2020-10-13 07:44:37 +00:00
|
|
|
phys_bits,
|
2021-07-26 18:30:01 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
tdx_enabled,
|
2021-10-05 13:14:01 +00:00
|
|
|
None,
|
2021-10-29 08:30:01 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
sgx_epc_config,
|
2020-04-07 11:33:18 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::MemoryManager)?;
|
2020-03-16 23:39:50 +00:00
|
|
|
|
2020-07-03 09:10:50 +00:00
|
|
|
let new_vm = Vm::new_from_memory_manager(
|
2020-03-16 23:48:12 +00:00
|
|
|
config,
|
|
|
|
memory_manager,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm,
|
2020-03-16 23:48:12 +00:00
|
|
|
exit_evt,
|
|
|
|
reset_evt,
|
2020-07-30 22:33:52 +00:00
|
|
|
seccomp_action,
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor,
|
2021-05-20 06:47:45 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
None,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt,
|
2021-08-06 13:39:09 +00:00
|
|
|
false,
|
2020-04-29 09:09:04 +00:00
|
|
|
)?;
|
|
|
|
|
|
|
|
// The device manager must create the devices from here as it is part
|
|
|
|
// of the regular code path creating everything from scratch.
|
2020-07-03 09:10:50 +00:00
|
|
|
new_vm
|
|
|
|
.device_manager
|
2020-04-29 09:09:04 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
vmm: notify virtio-console of pty resizes
When a pty is resized (using the TIOCSWINSZ ioctl -- see ioctl_tty(2)),
the kernel will send a SIGWINCH signal to the pty's foreground process
group to notify it of the resize. This is the only way to be notified
by the kernel of a pty resize.
We can't just make the cloud-hypervisor process's process group the
foreground process group though, because a process can only set the
foreground process group of its controlling terminal, and
cloud-hypervisor's controlling terminal will often be the terminal the
user is running it in. To work around this, we fork a subprocess in a
new process group, and set its process group to be the foreground
process group of the pty. The subprocess additionally must be running
in a new session so that it can have a different controlling
terminal. This subprocess writes a byte to a pipe every time the pty
is resized, and the virtio-console device can listen for this in its
epoll loop.
Alternatives I considered were to have the subprocess just send
SIGWINCH to its parent, and to use an eventfd instead of a pipe.
I decided against the signal approach because re-purposing a signal
that has a very specific meaning (even if this use was only slightly
different to its normal meaning) felt unclean, and because it would
have required using pidfds to avoid race conditions if
cloud-hypervisor had terminated, which added complexity. I decided
against using an eventfd because using a pipe instead allows the child
to be notified (via poll(2)) when nothing is reading from the pipe any
more, meaning it can be reliably notified of parent death and
terminate itself immediately.
I used clone3(2) instead of fork(2) because without
CLONE_CLEAR_SIGHAND the subprocess would inherit signal-hook's signal
handlers, and there's no other straightforward way to restore all signal
handlers to their defaults in the child process. The only way to do
it would be to iterate through all possible signals, or maintain a
global list of monitored signals ourselves (vmm:vm::HANDLED_SIGNALS is
insufficient because it doesn't take into account e.g. the SIGSYS
signal handler that catches seccomp violations).
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-10 11:12:17 +00:00
|
|
|
.create_devices(serial_pty, console_pty, console_resize_pipe)
|
2020-04-29 09:09:04 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-07-03 09:10:50 +00:00
|
|
|
Ok(new_vm)
|
2020-03-16 23:48:12 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 22:33:52 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2020-03-16 23:48:12 +00:00
|
|
|
pub fn new_from_snapshot(
|
|
|
|
snapshot: &Snapshot,
|
|
|
|
exit_evt: EventFd,
|
|
|
|
reset_evt: EventFd,
|
2020-11-04 14:58:18 +00:00
|
|
|
source_url: Option<&str>,
|
2020-04-07 13:54:33 +00:00
|
|
|
prefault: bool,
|
2020-07-30 22:33:52 +00:00
|
|
|
seccomp_action: &SeccompAction,
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt: EventFd,
|
2020-03-16 23:48:12 +00:00
|
|
|
) -> Result<Self> {
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor.check_required_extensions().unwrap();
|
2020-07-03 09:10:50 +00:00
|
|
|
let vm = hypervisor.create_vm().unwrap();
|
2020-06-02 02:29:54 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.enable_split_irq().unwrap();
|
2020-06-23 09:39:39 +00:00
|
|
|
let vm_snapshot = get_vm_snapshot(snapshot).map_err(Error::Restore)?;
|
2020-08-23 07:45:44 +00:00
|
|
|
let config = vm_snapshot.config;
|
2020-08-21 20:06:56 +00:00
|
|
|
if let Some(state) = vm_snapshot.state {
|
2020-09-24 21:15:13 +00:00
|
|
|
vm.set_state(state)
|
2020-08-21 20:06:56 +00:00
|
|
|
.map_err(|e| Error::Restore(MigratableError::Restore(e.into())))?;
|
|
|
|
}
|
2020-03-16 23:48:12 +00:00
|
|
|
|
|
|
|
let memory_manager = if let Some(memory_manager_snapshot) =
|
|
|
|
snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID)
|
|
|
|
{
|
2021-10-01 09:54:27 +00:00
|
|
|
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
|
2020-03-16 23:48:12 +00:00
|
|
|
MemoryManager::new_from_snapshot(
|
|
|
|
memory_manager_snapshot,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm.clone(),
|
2020-03-16 23:48:12 +00:00
|
|
|
&config.lock().unwrap().memory.clone(),
|
|
|
|
source_url,
|
2020-04-07 13:54:33 +00:00
|
|
|
prefault,
|
2020-10-13 07:44:37 +00:00
|
|
|
phys_bits,
|
2020-03-16 23:48:12 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::MemoryManager)?
|
|
|
|
} else {
|
|
|
|
return Err(Error::Restore(MigratableError::Restore(anyhow!(
|
|
|
|
"Missing memory manager snapshot"
|
|
|
|
))));
|
|
|
|
};
|
|
|
|
|
2020-03-16 23:39:50 +00:00
|
|
|
Vm::new_from_memory_manager(
|
|
|
|
config,
|
|
|
|
memory_manager,
|
2020-07-03 09:10:50 +00:00
|
|
|
vm,
|
2020-03-16 23:39:50 +00:00
|
|
|
exit_evt,
|
|
|
|
reset_evt,
|
2020-07-30 22:33:52 +00:00
|
|
|
seccomp_action,
|
2020-06-02 02:29:54 +00:00
|
|
|
hypervisor,
|
2021-05-20 06:47:45 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
vm_snapshot.clock,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt,
|
2021-08-06 13:39:09 +00:00
|
|
|
true,
|
2020-03-16 23:39:50 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2020-11-12 16:55:55 +00:00
|
|
|
pub fn new_from_migration(
|
|
|
|
config: Arc<Mutex<VmConfig>>,
|
|
|
|
exit_evt: EventFd,
|
|
|
|
reset_evt: EventFd,
|
|
|
|
seccomp_action: &SeccompAction,
|
|
|
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt: EventFd,
|
2021-10-05 15:53:08 +00:00
|
|
|
memory_manager_data: &MemoryManagerSnapshotData,
|
2020-11-12 16:55:55 +00:00
|
|
|
) -> Result<Self> {
|
|
|
|
hypervisor.check_required_extensions().unwrap();
|
|
|
|
let vm = hypervisor.create_vm().unwrap();
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
vm.enable_split_irq().unwrap();
|
2021-10-01 09:54:27 +00:00
|
|
|
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
|
2020-11-12 16:55:55 +00:00
|
|
|
|
|
|
|
let memory_manager = MemoryManager::new(
|
|
|
|
vm.clone(),
|
|
|
|
&config.lock().unwrap().memory.clone(),
|
2021-09-29 04:54:22 +00:00
|
|
|
None,
|
2020-11-12 16:55:55 +00:00
|
|
|
phys_bits,
|
2021-07-26 18:30:01 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
false,
|
2021-10-05 15:53:08 +00:00
|
|
|
Some(memory_manager_data),
|
2021-10-29 08:30:01 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
None,
|
2020-11-12 16:55:55 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::MemoryManager)?;
|
|
|
|
|
|
|
|
Vm::new_from_memory_manager(
|
|
|
|
config,
|
|
|
|
memory_manager,
|
|
|
|
vm,
|
|
|
|
exit_evt,
|
|
|
|
reset_evt,
|
|
|
|
seccomp_action,
|
|
|
|
hypervisor,
|
2021-05-20 06:47:45 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-11-12 16:55:55 +00:00
|
|
|
None,
|
2020-11-09 13:29:05 +00:00
|
|
|
activate_evt,
|
2021-08-06 13:39:09 +00:00
|
|
|
true,
|
2020-11-12 16:55:55 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2020-03-15 17:56:07 +00:00
|
|
|
fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result<arch::InitramfsConfig> {
|
|
|
|
let mut initramfs = self.initramfs.as_ref().unwrap();
|
|
|
|
let size: usize = initramfs
|
|
|
|
.seek(SeekFrom::End(0))
|
|
|
|
.map_err(|_| Error::InitramfsLoad)?
|
|
|
|
.try_into()
|
|
|
|
.unwrap();
|
|
|
|
initramfs
|
|
|
|
.seek(SeekFrom::Start(0))
|
|
|
|
.map_err(|_| Error::InitramfsLoad)?;
|
|
|
|
|
|
|
|
let address =
|
|
|
|
arch::initramfs_load_addr(guest_mem, size).map_err(|_| Error::InitramfsLoad)?;
|
|
|
|
let address = GuestAddress(address);
|
|
|
|
|
|
|
|
guest_mem
|
|
|
|
.read_from(address, &mut initramfs, size)
|
|
|
|
.map_err(|_| Error::InitramfsLoad)?;
|
|
|
|
|
2021-05-18 14:32:20 +00:00
|
|
|
info!("Initramfs loaded: address = 0x{:x}", address.0);
|
2020-03-15 17:56:07 +00:00
|
|
|
Ok(arch::InitramfsConfig { address, size })
|
|
|
|
}
|
|
|
|
|
2021-09-24 07:43:14 +00:00
|
|
|
fn get_cmdline(&mut self) -> Result<Cmdline> {
|
2019-09-27 08:39:56 +00:00
|
|
|
let mut cmdline = Cmdline::new(arch::CMDLINE_MAX_SIZE);
|
|
|
|
cmdline
|
2019-12-05 14:50:38 +00:00
|
|
|
.insert_str(self.config.lock().unwrap().cmdline.args.clone())
|
2020-01-24 08:34:51 +00:00
|
|
|
.map_err(Error::CmdLineInsertStr)?;
|
2020-03-04 13:39:15 +00:00
|
|
|
for entry in self.device_manager.lock().unwrap().cmdline_additions() {
|
2020-01-24 08:34:51 +00:00
|
|
|
cmdline.insert_str(entry).map_err(Error::CmdLineInsertStr)?;
|
2019-09-11 15:22:00 +00:00
|
|
|
}
|
2021-09-24 07:43:14 +00:00
|
|
|
Ok(cmdline)
|
2020-05-28 03:31:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
fn load_kernel(&mut self) -> Result<EntryPoint> {
|
2020-06-09 10:28:02 +00:00
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
|
|
|
let mem = guest_memory.memory();
|
2021-02-08 16:06:27 +00:00
|
|
|
let mut kernel = self.kernel.as_ref().unwrap();
|
2020-06-09 10:28:02 +00:00
|
|
|
let entry_addr = match linux_loader::loader::pe::PE::load(
|
|
|
|
mem.deref(),
|
|
|
|
Some(GuestAddress(arch::get_kernel_start())),
|
2021-02-08 16:06:27 +00:00
|
|
|
&mut kernel,
|
2020-06-09 10:28:02 +00:00
|
|
|
None,
|
|
|
|
) {
|
|
|
|
Ok(entry_addr) => entry_addr,
|
2021-04-19 07:50:46 +00:00
|
|
|
// Try to load the binary as kernel PE file at first.
|
|
|
|
// If failed, retry to load it as UEFI binary.
|
|
|
|
// As the UEFI binary is formatless, it must be the last option to try.
|
|
|
|
Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => {
|
|
|
|
arch::aarch64::uefi::load_uefi(
|
|
|
|
mem.deref(),
|
|
|
|
GuestAddress(arch::get_uefi_start()),
|
|
|
|
&mut kernel,
|
|
|
|
)
|
|
|
|
.map_err(Error::UefiLoad)?;
|
|
|
|
// The entry point offset in UEFI image is always 0.
|
|
|
|
return Ok(EntryPoint {
|
|
|
|
entry_addr: GuestAddress(arch::get_uefi_start()),
|
|
|
|
});
|
|
|
|
}
|
2020-06-09 10:28:02 +00:00
|
|
|
Err(e) => {
|
|
|
|
return Err(Error::KernelLoad(e));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
let entry_point_addr: GuestAddress = entry_addr.kernel_load;
|
|
|
|
|
|
|
|
Ok(EntryPoint {
|
|
|
|
entry_addr: entry_point_addr,
|
|
|
|
})
|
2020-05-28 03:31:26 +00:00
|
|
|
}
|
2019-09-11 15:22:00 +00:00
|
|
|
|
2020-05-28 03:31:26 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
|
|
fn load_kernel(&mut self) -> Result<EntryPoint> {
|
2021-05-18 14:32:20 +00:00
|
|
|
info!("Loading kernel");
|
2021-09-24 07:43:14 +00:00
|
|
|
let cmdline = self.get_cmdline()?;
|
2019-12-19 15:47:36 +00:00
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
2020-02-11 16:22:40 +00:00
|
|
|
let mem = guest_memory.memory();
|
2021-02-08 16:06:27 +00:00
|
|
|
let mut kernel = self.kernel.as_ref().unwrap();
|
2020-03-18 18:42:03 +00:00
|
|
|
let entry_addr = match linux_loader::loader::elf::Elf::load(
|
2020-02-11 16:22:40 +00:00
|
|
|
mem.deref(),
|
2019-02-28 13:16:58 +00:00
|
|
|
None,
|
2021-02-08 16:06:27 +00:00
|
|
|
&mut kernel,
|
2019-09-27 13:11:50 +00:00
|
|
|
Some(arch::layout::HIGH_RAM_START),
|
2019-06-10 09:14:02 +00:00
|
|
|
) {
|
|
|
|
Ok(entry_addr) => entry_addr,
|
2020-04-15 15:32:33 +00:00
|
|
|
Err(e) => {
|
|
|
|
return Err(Error::KernelLoad(e));
|
|
|
|
}
|
2019-06-10 09:14:02 +00:00
|
|
|
};
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2021-09-24 07:43:14 +00:00
|
|
|
linux_loader::loader::load_cmdline(mem.deref(), arch::layout::CMDLINE_START, &cmdline)
|
|
|
|
.map_err(Error::LoadCmdLine)?;
|
2020-03-15 17:56:07 +00:00
|
|
|
|
2021-05-18 14:32:20 +00:00
|
|
|
if let PvhEntryPresent(entry_addr) = entry_addr.pvh_boot_cap {
|
2021-04-29 16:11:32 +00:00
|
|
|
// Use the PVH kernel entry point to boot the guest
|
2021-05-18 14:32:20 +00:00
|
|
|
info!("Kernel loaded: entry_addr = 0x{:x}", entry_addr.0);
|
|
|
|
Ok(EntryPoint { entry_addr })
|
2021-04-29 16:11:32 +00:00
|
|
|
} else {
|
|
|
|
Err(Error::KernelMissingPvhHeader)
|
2020-05-28 03:31:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
2021-09-30 11:39:24 +00:00
|
|
|
fn configure_system(&mut self, #[cfg(feature = "acpi")] rsdp_addr: GuestAddress) -> Result<()> {
|
2021-05-18 14:32:20 +00:00
|
|
|
info!("Configuring system");
|
2020-09-14 15:24:42 +00:00
|
|
|
let mem = self.memory_manager.lock().unwrap().boot_guest_memory();
|
2020-05-28 03:31:26 +00:00
|
|
|
|
2020-03-15 17:56:07 +00:00
|
|
|
let initramfs_config = match self.initramfs {
|
2020-09-14 15:24:42 +00:00
|
|
|
Some(_) => Some(self.load_initramfs(&mem)?),
|
2020-03-15 17:56:07 +00:00
|
|
|
None => None,
|
|
|
|
};
|
|
|
|
|
2019-11-25 14:16:55 +00:00
|
|
|
let boot_vcpus = self.cpu_manager.lock().unwrap().boot_vcpus();
|
2019-11-06 17:20:55 +00:00
|
|
|
|
|
|
|
#[cfg(feature = "acpi")]
|
2021-09-30 11:39:24 +00:00
|
|
|
let rsdp_addr = Some(rsdp_addr);
|
|
|
|
#[cfg(not(feature = "acpi"))]
|
|
|
|
let rsdp_addr = None;
|
2019-11-06 17:20:55 +00:00
|
|
|
|
2020-07-09 08:25:37 +00:00
|
|
|
let sgx_epc_region = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.sgx_epc_region()
|
|
|
|
.as_ref()
|
|
|
|
.cloned();
|
|
|
|
|
2021-04-29 16:11:32 +00:00
|
|
|
arch::configure_system(
|
|
|
|
&mem,
|
|
|
|
arch::layout::CMDLINE_START,
|
|
|
|
&initramfs_config,
|
|
|
|
boot_vcpus,
|
|
|
|
rsdp_addr,
|
|
|
|
sgx_epc_region,
|
|
|
|
)
|
|
|
|
.map_err(Error::ConfigureSystem)?;
|
2020-05-28 03:31:26 +00:00
|
|
|
Ok(())
|
2019-02-28 13:16:58 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 07:20:22 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
2021-09-30 11:39:24 +00:00
|
|
|
fn configure_system(
|
|
|
|
&mut self,
|
|
|
|
#[cfg(feature = "acpi")] _rsdp_addr: GuestAddress,
|
|
|
|
) -> Result<()> {
|
2021-09-24 07:43:14 +00:00
|
|
|
let cmdline = self.get_cmdline()?;
|
2020-06-09 10:28:02 +00:00
|
|
|
let vcpu_mpidrs = self.cpu_manager.lock().unwrap().get_mpidrs();
|
2021-07-29 07:44:50 +00:00
|
|
|
let vcpu_topology = self.cpu_manager.lock().unwrap().get_vcpu_topology();
|
2020-09-14 15:24:42 +00:00
|
|
|
let mem = self.memory_manager.lock().unwrap().boot_guest_memory();
|
2020-07-14 10:11:03 +00:00
|
|
|
let initramfs_config = match self.initramfs {
|
2020-09-14 15:24:42 +00:00
|
|
|
Some(_) => Some(self.load_initramfs(&mem)?),
|
2020-07-14 10:11:03 +00:00
|
|
|
None => None,
|
|
|
|
};
|
2020-06-09 10:28:02 +00:00
|
|
|
|
|
|
|
let device_info = &self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.get_device_info()
|
|
|
|
.clone();
|
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
let pci_space_start: GuestAddress = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
|
|
|
.start_of_device_area();
|
2020-06-03 08:30:33 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
let pci_space_end: GuestAddress = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
|
|
|
.end_of_device_area();
|
2020-06-03 08:30:33 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
let pci_space_size = pci_space_end
|
|
|
|
.checked_offset_from(pci_space_start)
|
|
|
|
.ok_or(Error::MemOverflow)?
|
|
|
|
+ 1;
|
2020-06-03 08:30:33 +00:00
|
|
|
|
2020-10-21 09:52:21 +00:00
|
|
|
let pci_space = (pci_space_start.0, pci_space_size);
|
2020-06-03 08:30:33 +00:00
|
|
|
|
2021-08-12 13:37:44 +00:00
|
|
|
let virtio_iommu_bdf = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.iommu_attached_devices()
|
|
|
|
.as_ref()
|
|
|
|
.map(|(v, _)| *v);
|
|
|
|
|
2021-05-31 14:30:24 +00:00
|
|
|
let gic_device = create_gic(
|
2020-07-03 09:10:50 +00:00
|
|
|
&self.memory_manager.lock().as_ref().unwrap().vm,
|
2021-05-31 14:30:24 +00:00
|
|
|
self.cpu_manager.lock().unwrap().boot_vcpus() as u64,
|
|
|
|
)
|
|
|
|
.map_err(|e| {
|
|
|
|
Error::ConfigureSystem(arch::Error::AArch64Setup(arch::aarch64::Error::SetupGic(e)))
|
|
|
|
})?;
|
|
|
|
|
|
|
|
arch::configure_system(
|
2020-06-09 10:28:02 +00:00
|
|
|
&mem,
|
2021-09-24 07:43:14 +00:00
|
|
|
cmdline.as_str(),
|
2020-06-09 10:28:02 +00:00
|
|
|
vcpu_mpidrs,
|
2021-07-29 07:44:50 +00:00
|
|
|
vcpu_topology,
|
2020-06-09 10:28:02 +00:00
|
|
|
device_info,
|
2020-07-14 10:11:03 +00:00
|
|
|
&initramfs_config,
|
2020-06-03 08:30:33 +00:00
|
|
|
&pci_space,
|
2021-10-18 16:29:42 +00:00
|
|
|
virtio_iommu_bdf.map(|bdf| bdf.into()),
|
2021-05-31 14:30:24 +00:00
|
|
|
&*gic_device,
|
2021-08-06 23:41:33 +00:00
|
|
|
&self.numa_nodes,
|
2020-06-09 10:28:02 +00:00
|
|
|
)
|
|
|
|
.map_err(Error::ConfigureSystem)?;
|
|
|
|
|
2020-09-01 08:07:15 +00:00
|
|
|
// Update the GIC entity in device manager
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 12:03:05 +00:00
|
|
|
.get_interrupt_controller()
|
|
|
|
.unwrap()
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.set_gic_device(Arc::new(Mutex::new(gic_device)));
|
2020-09-01 08:07:15 +00:00
|
|
|
|
2021-06-01 13:53:21 +00:00
|
|
|
// Activate gic device
|
2020-06-09 10:28:02 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 13:53:21 +00:00
|
|
|
.get_interrupt_controller()
|
|
|
|
.unwrap()
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.enable()
|
2020-06-09 10:28:02 +00:00
|
|
|
.map_err(Error::EnableInterruptController)?;
|
|
|
|
|
|
|
|
Ok(())
|
2020-05-26 07:20:22 +00:00
|
|
|
}
|
|
|
|
|
2021-03-04 23:34:45 +00:00
|
|
|
pub fn serial_pty(&self) -> Option<PtyPair> {
|
2021-01-14 03:03:53 +00:00
|
|
|
self.device_manager.lock().unwrap().serial_pty()
|
|
|
|
}
|
|
|
|
|
2021-03-04 23:34:45 +00:00
|
|
|
pub fn console_pty(&self) -> Option<PtyPair> {
|
2021-01-14 03:03:53 +00:00
|
|
|
self.device_manager.lock().unwrap().console_pty()
|
|
|
|
}
|
|
|
|
|
vmm: notify virtio-console of pty resizes
When a pty is resized (using the TIOCSWINSZ ioctl -- see ioctl_tty(2)),
the kernel will send a SIGWINCH signal to the pty's foreground process
group to notify it of the resize. This is the only way to be notified
by the kernel of a pty resize.
We can't just make the cloud-hypervisor process's process group the
foreground process group though, because a process can only set the
foreground process group of its controlling terminal, and
cloud-hypervisor's controlling terminal will often be the terminal the
user is running it in. To work around this, we fork a subprocess in a
new process group, and set its process group to be the foreground
process group of the pty. The subprocess additionally must be running
in a new session so that it can have a different controlling
terminal. This subprocess writes a byte to a pipe every time the pty
is resized, and the virtio-console device can listen for this in its
epoll loop.
Alternatives I considered were to have the subprocess just send
SIGWINCH to its parent, and to use an eventfd instead of a pipe.
I decided against the signal approach because re-purposing a signal
that has a very specific meaning (even if this use was only slightly
different to its normal meaning) felt unclean, and because it would
have required using pidfds to avoid race conditions if
cloud-hypervisor had terminated, which added complexity. I decided
against using an eventfd because using a pipe instead allows the child
to be notified (via poll(2)) when nothing is reading from the pipe any
more, meaning it can be reliably notified of parent death and
terminate itself immediately.
I used clone3(2) instead of fork(2) because without
CLONE_CLEAR_SIGHAND the subprocess would inherit signal-hook's signal
handlers, and there's no other straightforward way to restore all signal
handlers to their defaults in the child process. The only way to do
it would be to iterate through all possible signals, or maintain a
global list of monitored signals ourselves (vmm:vm::HANDLED_SIGNALS is
insufficient because it doesn't take into account e.g. the SIGSYS
signal handler that catches seccomp violations).
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-10 11:12:17 +00:00
|
|
|
pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
|
|
|
|
self.device_manager.lock().unwrap().console_resize_pipe()
|
|
|
|
}
|
|
|
|
|
2019-09-30 09:53:49 +00:00
|
|
|
pub fn shutdown(&mut self) -> Result<()> {
|
2020-05-06 09:10:58 +00:00
|
|
|
let mut state = self.state.try_write().map_err(|_| Error::PoisonedState)?;
|
2019-10-11 12:47:57 +00:00
|
|
|
let new_state = VmState::Shutdown;
|
|
|
|
|
2020-05-06 09:10:58 +00:00
|
|
|
state.valid_transition(new_state)?;
|
2019-10-11 12:47:57 +00:00
|
|
|
|
2019-05-30 15:17:57 +00:00
|
|
|
if self.on_tty {
|
|
|
|
// Don't forget to set the terminal in canonical mode
|
|
|
|
// before to exit.
|
|
|
|
io::stdin()
|
|
|
|
.lock()
|
|
|
|
.set_canon_mode()
|
|
|
|
.map_err(Error::SetTerminalCanon)?;
|
|
|
|
}
|
|
|
|
|
2019-09-09 12:43:03 +00:00
|
|
|
// Trigger the termination of the signal_handler thread
|
|
|
|
if let Some(signals) = self.signals.take() {
|
|
|
|
signals.close();
|
|
|
|
}
|
|
|
|
|
2020-05-06 09:25:48 +00:00
|
|
|
// Wake up the DeviceManager threads so they will get terminated cleanly
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.resume()
|
|
|
|
.map_err(Error::Resume)?;
|
|
|
|
|
2019-11-11 14:31:11 +00:00
|
|
|
self.cpu_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.shutdown()
|
|
|
|
.map_err(Error::CpuManager)?;
|
2019-09-03 09:00:15 +00:00
|
|
|
|
2019-09-04 11:59:14 +00:00
|
|
|
// Wait for all the threads to finish
|
|
|
|
for thread in self.threads.drain(..) {
|
2020-01-24 08:34:51 +00:00
|
|
|
thread.join().map_err(Error::ThreadCleanup)?
|
2019-09-03 09:00:15 +00:00
|
|
|
}
|
2019-10-11 12:47:57 +00:00
|
|
|
*state = new_state;
|
2019-10-01 08:14:08 +00:00
|
|
|
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "shutdown");
|
|
|
|
|
2019-09-24 14:14:04 +00:00
|
|
|
Ok(())
|
2019-03-18 20:59:50 +00:00
|
|
|
}
|
|
|
|
|
2020-04-03 09:27:20 +00:00
|
|
|
pub fn resize(
|
|
|
|
&mut self,
|
|
|
|
desired_vcpus: Option<u8>,
|
|
|
|
desired_memory: Option<u64>,
|
2020-10-14 10:04:42 +00:00
|
|
|
desired_balloon: Option<u64>,
|
2020-04-03 09:27:20 +00:00
|
|
|
) -> Result<()> {
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "resizing");
|
|
|
|
|
2020-01-07 10:43:56 +00:00
|
|
|
if let Some(desired_vcpus) = desired_vcpus {
|
2020-01-17 16:48:46 +00:00
|
|
|
if self
|
|
|
|
.cpu_manager
|
2020-01-07 10:43:56 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.resize(desired_vcpus)
|
2020-01-17 16:48:46 +00:00
|
|
|
.map_err(Error::CpuManager)?
|
|
|
|
{
|
2020-03-04 13:39:15 +00:00
|
|
|
self.device_manager
|
2020-02-27 09:29:03 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::CPU_DEVICES_CHANGED)
|
2020-01-17 16:48:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
}
|
2020-01-07 10:43:56 +00:00
|
|
|
self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus;
|
|
|
|
}
|
|
|
|
|
2020-01-10 15:52:23 +00:00
|
|
|
if let Some(desired_memory) = desired_memory {
|
2020-03-26 13:36:15 +00:00
|
|
|
let new_region = self
|
2020-01-17 16:56:45 +00:00
|
|
|
.memory_manager
|
2020-01-10 16:16:29 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.resize(desired_memory)
|
2020-03-26 13:36:15 +00:00
|
|
|
.map_err(Error::MemoryManager)?;
|
|
|
|
|
2020-09-14 21:50:30 +00:00
|
|
|
let mut memory_config = &mut self.config.lock().unwrap().memory;
|
|
|
|
|
2020-03-26 13:36:15 +00:00
|
|
|
if let Some(new_region) = &new_region {
|
2020-03-23 11:10:26 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-23 20:42:17 +00:00
|
|
|
.update_memory(new_region)
|
2020-03-23 11:10:26 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
2020-03-23 07:20:15 +00:00
|
|
|
match memory_config.hotplug_method {
|
|
|
|
HotplugMethod::Acpi => {
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::MEMORY_DEVICES_CHANGED)
|
2020-03-23 07:20:15 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
}
|
|
|
|
HotplugMethod::VirtioMem => {}
|
|
|
|
}
|
2020-01-17 16:56:45 +00:00
|
|
|
}
|
2020-03-18 08:57:28 +00:00
|
|
|
|
2020-03-26 13:36:15 +00:00
|
|
|
// We update the VM config regardless of the actual guest resize
|
|
|
|
// operation result (happened or not), so that if the VM reboots
|
|
|
|
// it will be running with the last configure memory size.
|
2020-09-14 21:50:30 +00:00
|
|
|
match memory_config.hotplug_method {
|
|
|
|
HotplugMethod::Acpi => memory_config.size = desired_memory,
|
|
|
|
HotplugMethod::VirtioMem => {
|
|
|
|
if desired_memory > memory_config.size {
|
|
|
|
memory_config.hotplugged_size = Some(desired_memory - memory_config.size);
|
|
|
|
} else {
|
|
|
|
memory_config.hotplugged_size = None;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-01-10 15:52:23 +00:00
|
|
|
}
|
2020-04-03 09:27:20 +00:00
|
|
|
|
2020-10-14 10:04:42 +00:00
|
|
|
if let Some(desired_balloon) = desired_balloon {
|
|
|
|
self.device_manager
|
2020-04-03 09:27:20 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2020-10-14 10:04:42 +00:00
|
|
|
.resize_balloon(desired_balloon)
|
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
// Update the configuration value for the balloon size to ensure
|
|
|
|
// a reboot would use the right value.
|
|
|
|
if let Some(balloon_config) = &mut self.config.lock().unwrap().balloon {
|
|
|
|
balloon_config.size = desired_balloon;
|
|
|
|
}
|
2020-04-03 09:27:20 +00:00
|
|
|
}
|
|
|
|
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "resized");
|
|
|
|
|
2019-11-27 17:27:31 +00:00
|
|
|
Ok(())
|
2019-11-26 16:46:10 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 15:34:15 +00:00
|
|
|
pub fn resize_zone(&mut self, id: String, desired_memory: u64) -> Result<()> {
|
2020-09-14 21:50:30 +00:00
|
|
|
let memory_config = &mut self.config.lock().unwrap().memory;
|
|
|
|
|
|
|
|
if let Some(zones) = &mut memory_config.zones {
|
|
|
|
for zone in zones.iter_mut() {
|
|
|
|
if zone.id == id {
|
|
|
|
if desired_memory >= zone.size {
|
|
|
|
let hotplugged_size = desired_memory - zone.size;
|
|
|
|
self.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.resize_zone(&id, desired_memory - zone.size)
|
|
|
|
.map_err(Error::MemoryManager)?;
|
|
|
|
// We update the memory zone config regardless of the
|
|
|
|
// actual 'resize-zone' operation result (happened or
|
|
|
|
// not), so that if the VM reboots it will be running
|
|
|
|
// with the last configured memory zone size.
|
|
|
|
zone.hotplugged_size = Some(hotplugged_size);
|
|
|
|
|
|
|
|
return Ok(());
|
|
|
|
} else {
|
|
|
|
error!(
|
|
|
|
"Invalid to ask less ({}) than boot RAM ({}) for \
|
|
|
|
this memory zone",
|
|
|
|
desired_memory, zone.size,
|
|
|
|
);
|
|
|
|
return Err(Error::ResizeZone);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
error!("Could not find the memory zone {} for the resize", id);
|
|
|
|
Err(Error::ResizeZone)
|
2020-09-10 15:34:15 +00:00
|
|
|
}
|
|
|
|
|
2021-05-04 15:59:47 +00:00
|
|
|
fn add_to_config<T>(devices: &mut Option<Vec<T>>, device: T) {
|
|
|
|
if let Some(devices) = devices {
|
|
|
|
devices.push(device);
|
|
|
|
} else {
|
|
|
|
*devices = Some(vec![device]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_device(&mut self, mut device_cfg: DeviceConfig) -> Result<PciDeviceInfo> {
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.devices, device_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_device(&mut device_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-02-28 14:46:50 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.devices, device_cfg);
|
2020-02-28 11:29:43 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-02-27 13:00:46 +00:00
|
|
|
}
|
|
|
|
|
2021-07-30 14:48:58 +00:00
|
|
|
pub fn add_user_device(&mut self, mut device_cfg: UserDeviceConfig) -> Result<PciDeviceInfo> {
|
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
|
|
|
Self::add_to_config(&mut config.user_devices, device_cfg.clone());
|
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.add_user_device(&mut device_cfg)
|
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
|
|
|
Self::add_to_config(&mut config.user_devices, device_cfg);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn remove_device(&mut self, id: String) -> Result<()> {
|
2020-10-15 15:34:35 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.remove_device(id.clone())
|
2020-10-15 15:34:35 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-04-15 17:18:53 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Update VmConfig by removing the device. This is important to
|
|
|
|
// ensure the device would not be created in case of a reboot.
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2020-04-15 17:18:53 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Remove if VFIO device
|
|
|
|
if let Some(devices) = config.devices.as_mut() {
|
2021-10-28 10:03:51 +00:00
|
|
|
devices.retain(|dev| dev.id.as_ref() != Some(&id));
|
2020-10-15 15:34:35 +00:00
|
|
|
}
|
2020-04-15 17:18:53 +00:00
|
|
|
|
2021-10-27 21:22:55 +00:00
|
|
|
// Remove if VFIO user device
|
|
|
|
if let Some(user_devices) = config.user_devices.as_mut() {
|
2021-10-28 10:03:51 +00:00
|
|
|
user_devices.retain(|dev| dev.id.as_ref() != Some(&id));
|
2021-10-27 21:22:55 +00:00
|
|
|
}
|
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Remove if disk device
|
|
|
|
if let Some(disks) = config.disks.as_mut() {
|
2021-10-28 10:03:51 +00:00
|
|
|
disks.retain(|dev| dev.id.as_ref() != Some(&id));
|
2020-10-15 15:34:35 +00:00
|
|
|
}
|
2020-04-15 17:18:53 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Remove if net device
|
|
|
|
if let Some(net) = config.net.as_mut() {
|
2021-10-28 10:03:51 +00:00
|
|
|
net.retain(|dev| dev.id.as_ref() != Some(&id));
|
2020-10-15 15:34:35 +00:00
|
|
|
}
|
2020-04-28 15:04:32 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Remove if pmem device
|
|
|
|
if let Some(pmem) = config.pmem.as_mut() {
|
2021-10-28 10:03:51 +00:00
|
|
|
pmem.retain(|dev| dev.id.as_ref() != Some(&id));
|
2020-10-15 15:34:35 +00:00
|
|
|
}
|
2020-03-09 13:26:03 +00:00
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
// Remove if vsock device
|
|
|
|
if let Some(vsock) = config.vsock.as_ref() {
|
2021-10-28 10:03:51 +00:00
|
|
|
if vsock.id.as_ref() == Some(&id) {
|
2020-10-15 15:34:35 +00:00
|
|
|
config.vsock = None;
|
2020-03-09 10:49:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-15 15:34:35 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-10-15 15:34:35 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
Ok(())
|
2020-06-11 15:27:46 +00:00
|
|
|
}
|
2020-03-23 16:20:57 +00:00
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_disk(&mut self, mut disk_cfg: DiskConfig) -> Result<PciDeviceInfo> {
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.disks, disk_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_disk(&mut disk_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-03-23 16:20:57 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.disks, disk_cfg);
|
2020-04-14 09:21:24 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-04-14 09:21:24 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_fs(&mut self, mut fs_cfg: FsConfig) -> Result<PciDeviceInfo> {
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.fs, fs_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_fs(&mut fs_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-04-14 09:21:24 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.fs, fs_cfg);
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> {
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.pmem, pmem_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_pmem(&mut pmem_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-03-23 16:20:57 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.pmem, pmem_cfg);
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_net(&mut self, mut net_cfg: NetConfig) -> Result<PciDeviceInfo> {
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.net, net_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_net(&mut net_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-03-23 16:20:57 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
Self::add_to_config(&mut config.net, net_cfg);
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-03-23 16:20:57 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 10:03:51 +00:00
|
|
|
pub fn add_vsock(&mut self, mut vsock_cfg: VsockConfig) -> Result<PciDeviceInfo> {
|
2020-06-11 15:27:46 +00:00
|
|
|
if self.config.lock().unwrap().vsock.is_some() {
|
|
|
|
return Err(Error::TooManyVsockDevices);
|
|
|
|
}
|
2020-04-28 15:02:46 +00:00
|
|
|
|
2021-05-04 16:13:10 +00:00
|
|
|
{
|
|
|
|
// Validate on a clone of the config
|
|
|
|
let mut config = self.config.lock().unwrap().clone();
|
2021-10-28 10:03:51 +00:00
|
|
|
config.vsock = Some(vsock_cfg.clone());
|
2021-05-04 16:13:10 +00:00
|
|
|
config.validate().map_err(Error::ConfigValidation)?;
|
|
|
|
}
|
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
let pci_device_info = self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-10-28 10:03:51 +00:00
|
|
|
.add_vsock(&mut vsock_cfg)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
2020-04-28 15:02:46 +00:00
|
|
|
|
2020-06-11 15:27:46 +00:00
|
|
|
// Update VmConfig by adding the new device. This is important to
|
|
|
|
// ensure the device would be created in case of a reboot.
|
|
|
|
{
|
|
|
|
let mut config = self.config.lock().unwrap();
|
2021-10-28 10:03:51 +00:00
|
|
|
config.vsock = Some(vsock_cfg);
|
2020-04-28 15:02:46 +00:00
|
|
|
}
|
2020-06-11 15:27:46 +00:00
|
|
|
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-01-12 15:10:05 +00:00
|
|
|
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
|
2020-06-11 15:27:46 +00:00
|
|
|
.map_err(Error::DeviceManager)?;
|
|
|
|
|
|
|
|
Ok(pci_device_info)
|
2020-04-28 15:02:46 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 10:20:13 +00:00
|
|
|
pub fn counters(&self) -> Result<HashMap<String, HashMap<&'static str, Wrapping<u64>>>> {
|
2020-06-24 11:53:19 +00:00
|
|
|
Ok(self.device_manager.lock().unwrap().counters())
|
2020-06-24 10:20:13 +00:00
|
|
|
}
|
|
|
|
|
2020-09-30 14:58:06 +00:00
|
|
|
fn os_signal_handler(
|
2021-01-03 09:43:10 +00:00
|
|
|
mut signals: Signals,
|
2020-09-30 14:58:06 +00:00
|
|
|
console_input_clone: Arc<Console>,
|
|
|
|
on_tty: bool,
|
2021-09-08 14:35:52 +00:00
|
|
|
exit_evt: &EventFd,
|
2020-09-30 14:58:06 +00:00
|
|
|
) {
|
vmm: ensure signal handlers run on the right thread
Despite setting up a dedicated thread for signal handling, we weren't
making sure that the signals we were listening for there were actually
dispatched to the right thread. While the signal-hook provides an
iterator API, so we can know that we're only processing the signals
coming out of the iterator on our signal handling thread, the actual
signal handling code from signal-hook, which pushes the signals onto
the iterator, can run on any thread. This can lead to seccomp
violations when the signal-hook signal handler does something that
isn't allowed on that thread by our seccomp policy.
To reproduce, resize a terminal running cloud-hypervisor continuously
for a few minutes. Eventually, the kernel will deliver a SIGWINCH to
a thread with a restrictive seccomp policy, and a seccomp violation
will trigger.
As part of this change, it's also necessary to allow rt_sigreturn(2)
on the signal handling thread, so signal handlers are actually allowed
to run on it. The fact that this didn't seem to be needed before
makes me think that signal handlers were almost _never_ actually
running on the signal handling thread.
Signed-off-by: Alyssa Ross <hi@alyssa.is>
2021-09-02 19:16:45 +00:00
|
|
|
for sig in HANDLED_SIGNALS {
|
|
|
|
unblock_signal(sig).unwrap();
|
|
|
|
}
|
|
|
|
|
vm-virtio: Implement console size config feature
One of the features of the virtio console device is its size can be
configured and updated. Our first iteration of the console device
implementation is lack of this feature. As a result, it had a
default fixed size which could not be changed. This commit implements
the console config feature and lets us change the console size from
the vmm side.
During the activation of the device, vmm reads the current terminal
size, sets the console configuration accordinly, and lets the driver
know about this configuration by sending an interrupt. Later, if
someone changes the terminal size, the vmm detects the corresponding
event, updates the configuration, and sends interrupt as before. As a
result, the console device driver, in the guest, updates the console
size.
Signed-off-by: A K M Fazla Mehrab <fazla.mehrab.akm@intel.com>
2019-07-23 19:18:20 +00:00
|
|
|
for signal in signals.forever() {
|
2019-12-05 03:27:40 +00:00
|
|
|
match signal {
|
|
|
|
SIGWINCH => {
|
2021-09-10 11:27:08 +00:00
|
|
|
console_input_clone.update_console_size();
|
2019-12-05 03:27:40 +00:00
|
|
|
}
|
|
|
|
SIGTERM | SIGINT => {
|
|
|
|
if on_tty {
|
|
|
|
io::stdin()
|
|
|
|
.lock()
|
|
|
|
.set_canon_mode()
|
|
|
|
.expect("failed to restore terminal mode");
|
|
|
|
}
|
2020-09-30 14:58:06 +00:00
|
|
|
if exit_evt.write(1).is_err() {
|
|
|
|
std::process::exit(1);
|
|
|
|
}
|
2019-12-05 03:27:40 +00:00
|
|
|
}
|
|
|
|
_ => (),
|
vm-virtio: Implement console size config feature
One of the features of the virtio console device is its size can be
configured and updated. Our first iteration of the console device
implementation is lack of this feature. As a result, it had a
default fixed size which could not be changed. This commit implements
the console config feature and lets us change the console size from
the vmm side.
During the activation of the device, vmm reads the current terminal
size, sets the console configuration accordinly, and lets the driver
know about this configuration by sending an interrupt. Later, if
someone changes the terminal size, the vmm detects the corresponding
event, updates the configuration, and sends interrupt as before. As a
result, the console device driver, in the guest, updates the console
size.
Signed-off-by: A K M Fazla Mehrab <fazla.mehrab.akm@intel.com>
2019-07-23 19:18:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
fn init_tdx(&mut self) -> Result<()> {
|
|
|
|
let cpuid = self.cpu_manager.lock().unwrap().common_cpuid();
|
|
|
|
let max_vcpus = self.cpu_manager.lock().unwrap().max_vcpus() as u32;
|
|
|
|
self.vm
|
|
|
|
.tdx_init(&cpuid, max_vcpus)
|
2021-03-25 17:01:21 +00:00
|
|
|
.map_err(Error::InitializeTdxVm)?;
|
2021-02-08 17:27:47 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-05-13 13:30:59 +00:00
|
|
|
fn extract_tdvf_sections(&mut self) -> Result<Vec<TdvfSection>> {
|
2021-02-08 17:27:47 +00:00
|
|
|
use arch::x86_64::tdx::*;
|
|
|
|
// The TDVF file contains a table of section as well as code
|
|
|
|
let mut firmware_file =
|
|
|
|
File::open(&self.config.lock().unwrap().tdx.as_ref().unwrap().firmware)
|
|
|
|
.map_err(Error::LoadTdvf)?;
|
|
|
|
|
|
|
|
// For all the sections allocate some RAM backing them
|
2021-05-13 13:30:59 +00:00
|
|
|
parse_tdvf_sections(&mut firmware_file).map_err(Error::ParseTdvf)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-09-30 10:38:16 +00:00
|
|
|
fn populate_tdx_sections(
|
|
|
|
&mut self,
|
|
|
|
sections: &[TdvfSection],
|
|
|
|
vmm_data_regions: &[TdVmmDataRegion],
|
|
|
|
) -> Result<Option<u64>> {
|
2021-05-13 13:30:59 +00:00
|
|
|
use arch::x86_64::tdx::*;
|
|
|
|
// Get the memory end *before* we start adding TDVF ram regions
|
2021-07-05 16:41:35 +00:00
|
|
|
let boot_guest_memory = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
|
|
|
.boot_guest_memory();
|
2021-05-13 13:30:59 +00:00
|
|
|
for section in sections {
|
2021-07-05 16:41:35 +00:00
|
|
|
// No need to allocate if the section falls within guest RAM ranges
|
|
|
|
if boot_guest_memory.address_in_range(GuestAddress(section.address)) {
|
|
|
|
info!(
|
|
|
|
"Not allocating TDVF Section: {:x?} since it is already part of guest RAM",
|
|
|
|
section
|
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
info!("Allocating TDVF Section: {:x?}", section);
|
2021-02-08 17:27:47 +00:00
|
|
|
self.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.add_ram_region(GuestAddress(section.address), section.size as usize)
|
|
|
|
.map_err(Error::AllocatingTdvfMemory)?;
|
|
|
|
}
|
|
|
|
|
2021-05-13 13:30:59 +00:00
|
|
|
// The TDVF file contains a table of section as well as code
|
|
|
|
let mut firmware_file =
|
|
|
|
File::open(&self.config.lock().unwrap().tdx.as_ref().unwrap().firmware)
|
|
|
|
.map_err(Error::LoadTdvf)?;
|
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
// The guest memory at this point now has all the required regions so it
|
|
|
|
// is safe to copy from the TDVF file into it.
|
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
|
|
|
let mem = guest_memory.memory();
|
|
|
|
let mut hob_offset = None;
|
2021-05-13 13:30:59 +00:00
|
|
|
for section in sections {
|
2021-07-05 16:41:35 +00:00
|
|
|
info!("Populating TDVF Section: {:x?}", section);
|
2021-02-08 17:27:47 +00:00
|
|
|
match section.r#type {
|
|
|
|
TdvfSectionType::Bfv | TdvfSectionType::Cfv => {
|
|
|
|
info!("Copying section to guest memory");
|
|
|
|
firmware_file
|
|
|
|
.seek(SeekFrom::Start(section.data_offset as u64))
|
|
|
|
.map_err(Error::LoadTdvf)?;
|
|
|
|
mem.read_from(
|
|
|
|
GuestAddress(section.address),
|
|
|
|
&mut firmware_file,
|
|
|
|
section.data_size as usize,
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
TdvfSectionType::TdHob => {
|
|
|
|
hob_offset = Some(section.address);
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate HOB
|
|
|
|
let mut hob = TdHob::start(hob_offset.unwrap());
|
|
|
|
|
2021-07-05 16:41:35 +00:00
|
|
|
let mut sorted_sections = sections.to_vec();
|
|
|
|
sorted_sections.retain(|section| {
|
|
|
|
!matches!(section.r#type, TdvfSectionType::Bfv | TdvfSectionType::Cfv)
|
|
|
|
});
|
2021-09-30 10:38:16 +00:00
|
|
|
|
|
|
|
// Add VMM specific data memory region to TdvfSections as TdHob type
|
|
|
|
// to ensure the firmware won't ignore/reject the ranges.
|
|
|
|
for region in vmm_data_regions {
|
|
|
|
sorted_sections.push(TdvfSection {
|
|
|
|
data_offset: 0,
|
|
|
|
data_size: 0,
|
|
|
|
address: region.start_address,
|
|
|
|
size: region.length,
|
|
|
|
r#type: TdvfSectionType::TdHob,
|
|
|
|
attributes: 0,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-07-05 16:41:35 +00:00
|
|
|
sorted_sections.sort_by_key(|section| section.address);
|
|
|
|
sorted_sections.reverse();
|
|
|
|
let mut current_section = sorted_sections.pop();
|
|
|
|
|
|
|
|
// RAM regions interleaved with TDVF sections
|
|
|
|
let mut next_start_addr = 0;
|
|
|
|
for region in boot_guest_memory.iter() {
|
|
|
|
let region_start = region.start_addr().0;
|
|
|
|
let region_end = region.last_addr().0;
|
|
|
|
if region_start > next_start_addr {
|
|
|
|
next_start_addr = region_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
loop {
|
|
|
|
let (start, size, ram) = if let Some(section) = ¤t_section {
|
|
|
|
if section.address <= next_start_addr {
|
|
|
|
(section.address, section.size, false)
|
|
|
|
} else {
|
|
|
|
let last_addr = std::cmp::min(section.address - 1, region_end);
|
|
|
|
(next_start_addr, last_addr - next_start_addr + 1, true)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
(next_start_addr, region_end - next_start_addr + 1, true)
|
|
|
|
};
|
|
|
|
|
|
|
|
hob.add_memory_resource(&mem, start, size, ram)
|
|
|
|
.map_err(Error::PopulateHob)?;
|
|
|
|
|
|
|
|
if !ram {
|
|
|
|
current_section = sorted_sections.pop();
|
|
|
|
}
|
|
|
|
|
|
|
|
next_start_addr = start + size;
|
|
|
|
|
|
|
|
if next_start_addr > region_end {
|
|
|
|
break;
|
|
|
|
}
|
2021-02-08 17:27:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// MMIO regions
|
|
|
|
hob.add_mmio_resource(
|
|
|
|
&mem,
|
|
|
|
arch::layout::MEM_32BIT_DEVICES_START.raw_value(),
|
|
|
|
arch::layout::APIC_START.raw_value()
|
|
|
|
- arch::layout::MEM_32BIT_DEVICES_START.raw_value(),
|
|
|
|
)
|
|
|
|
.map_err(Error::PopulateHob)?;
|
|
|
|
let start_of_device_area = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.start_of_device_area()
|
|
|
|
.raw_value();
|
|
|
|
let end_of_device_area = self
|
|
|
|
.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.end_of_device_area()
|
|
|
|
.raw_value();
|
2021-03-10 13:58:27 +00:00
|
|
|
hob.add_mmio_resource(
|
|
|
|
&mem,
|
|
|
|
start_of_device_area,
|
|
|
|
end_of_device_area - start_of_device_area,
|
|
|
|
)
|
|
|
|
.map_err(Error::PopulateHob)?;
|
2021-02-08 17:27:47 +00:00
|
|
|
|
2021-09-30 10:38:16 +00:00
|
|
|
// Add VMM specific data to the TdHob. The content of the data is
|
|
|
|
// is written as part of the HOB, which will be retrieved from the
|
|
|
|
// firmware, and processed accordingly to the type.
|
|
|
|
for region in vmm_data_regions {
|
|
|
|
hob.add_td_vmm_data(&mem, *region)
|
|
|
|
.map_err(Error::PopulateHob)?;
|
|
|
|
}
|
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
hob.finish(&mem).map_err(Error::PopulateHob)?;
|
|
|
|
|
2021-05-13 13:30:59 +00:00
|
|
|
Ok(hob_offset)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "tdx")]
|
2021-09-30 10:38:16 +00:00
|
|
|
fn init_tdx_memory(
|
|
|
|
&mut self,
|
|
|
|
sections: &[TdvfSection],
|
|
|
|
regions: &[TdVmmDataRegion],
|
|
|
|
) -> Result<()> {
|
2021-05-13 13:30:59 +00:00
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
|
|
|
let mem = guest_memory.memory();
|
|
|
|
|
|
|
|
for section in sections {
|
2021-02-08 17:27:47 +00:00
|
|
|
self.vm
|
|
|
|
.tdx_init_memory_region(
|
|
|
|
mem.get_host_address(GuestAddress(section.address)).unwrap() as u64,
|
|
|
|
section.address,
|
|
|
|
section.size,
|
|
|
|
/* TDVF_SECTION_ATTRIBUTES_EXTENDMR */
|
|
|
|
section.attributes == 1,
|
|
|
|
)
|
2021-03-25 17:01:21 +00:00
|
|
|
.map_err(Error::InitializeTdxMemoryRegion)?;
|
2021-02-08 17:27:47 +00:00
|
|
|
}
|
2021-09-30 10:38:16 +00:00
|
|
|
|
|
|
|
// The same way we let the hypervisor know about the TDVF sections, we
|
|
|
|
// must declare the VMM specific regions shared with the guest so that
|
|
|
|
// they won't be discarded.
|
|
|
|
for region in regions {
|
|
|
|
self.vm
|
|
|
|
.tdx_init_memory_region(
|
|
|
|
mem.get_host_address(GuestAddress(region.start_address))
|
|
|
|
.unwrap() as u64,
|
|
|
|
region.start_address,
|
|
|
|
region.length,
|
|
|
|
false,
|
|
|
|
)
|
|
|
|
.map_err(Error::InitializeTdxMemoryRegion)?;
|
|
|
|
}
|
|
|
|
|
2021-05-13 13:30:59 +00:00
|
|
|
Ok(())
|
2021-02-08 17:27:47 +00:00
|
|
|
}
|
|
|
|
|
2021-09-08 14:17:17 +00:00
|
|
|
fn setup_signal_handler(&mut self) -> Result<()> {
|
2021-09-08 14:12:36 +00:00
|
|
|
let console = self.device_manager.lock().unwrap().console().clone();
|
|
|
|
let signals = Signals::new(&HANDLED_SIGNALS);
|
|
|
|
match signals {
|
|
|
|
Ok(signals) => {
|
|
|
|
self.signals = Some(signals.handle());
|
|
|
|
let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
|
|
|
|
let on_tty = self.on_tty;
|
|
|
|
let signal_handler_seccomp_filter =
|
|
|
|
get_seccomp_filter(&self.seccomp_action, Thread::SignalHandler)
|
|
|
|
.map_err(Error::CreateSeccompFilter)?;
|
|
|
|
self.threads.push(
|
|
|
|
thread::Builder::new()
|
|
|
|
.name("signal_handler".to_string())
|
|
|
|
.spawn(move || {
|
|
|
|
if !signal_handler_seccomp_filter.is_empty() {
|
|
|
|
if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
|
|
|
|
.map_err(Error::ApplySeccompFilter)
|
|
|
|
{
|
|
|
|
error!("Error applying seccomp filter: {:?}", e);
|
2021-09-08 14:35:52 +00:00
|
|
|
exit_evt.write(1).ok();
|
2021-09-08 14:12:36 +00:00
|
|
|
return;
|
2021-09-08 13:27:50 +00:00
|
|
|
}
|
2021-09-08 14:12:36 +00:00
|
|
|
}
|
2021-09-08 14:35:52 +00:00
|
|
|
std::panic::catch_unwind(AssertUnwindSafe(|| {
|
|
|
|
Vm::os_signal_handler(signals, console, on_tty, &exit_evt);
|
|
|
|
}))
|
|
|
|
.map_err(|_| {
|
|
|
|
error!("signal_handler thead panicked");
|
|
|
|
exit_evt.write(1).ok()
|
|
|
|
})
|
|
|
|
.ok();
|
2021-09-08 14:12:36 +00:00
|
|
|
})
|
|
|
|
.map_err(Error::SignalHandlerSpawn)?,
|
|
|
|
);
|
2021-09-08 13:27:50 +00:00
|
|
|
}
|
2021-09-08 14:12:36 +00:00
|
|
|
Err(e) => error!("Signal not found {}", e),
|
|
|
|
}
|
2021-09-08 14:17:17 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
2021-09-08 13:27:50 +00:00
|
|
|
|
2021-09-08 14:17:17 +00:00
|
|
|
fn setup_tty(&self) -> Result<()> {
|
2021-09-08 14:12:36 +00:00
|
|
|
if self.on_tty {
|
|
|
|
io::stdin()
|
|
|
|
.lock()
|
|
|
|
.set_raw_mode()
|
|
|
|
.map_err(Error::SetTerminalRaw)?;
|
2021-09-08 13:27:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-09-30 09:53:49 +00:00
|
|
|
pub fn boot(&mut self) -> Result<()> {
|
2021-05-18 14:32:20 +00:00
|
|
|
info!("Booting VM");
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "booting");
|
2019-10-11 12:47:57 +00:00
|
|
|
let current_state = self.get_state()?;
|
|
|
|
if current_state == VmState::Paused {
|
2019-11-22 13:54:52 +00:00
|
|
|
return self.resume().map_err(Error::Resume);
|
2019-10-11 12:47:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let new_state = VmState::Running;
|
|
|
|
current_state.valid_transition(new_state)?;
|
|
|
|
|
2021-02-08 16:06:27 +00:00
|
|
|
// Load kernel if configured
|
|
|
|
let entry_point = if self.kernel.as_ref().is_some() {
|
|
|
|
Some(self.load_kernel()?)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
2019-02-28 14:26:30 +00:00
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
// The initial TDX configuration must be done before the vCPUs are
|
|
|
|
// created
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
if self.config.lock().unwrap().tdx.is_some() {
|
|
|
|
self.init_tdx()?;
|
|
|
|
}
|
|
|
|
|
2021-02-08 16:06:27 +00:00
|
|
|
// Create and configure vcpus
|
2020-05-26 07:20:22 +00:00
|
|
|
self.cpu_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2020-05-28 03:31:26 +00:00
|
|
|
.create_boot_vcpus(entry_point)
|
2020-05-26 07:20:22 +00:00
|
|
|
.map_err(Error::CpuManager)?;
|
|
|
|
|
2021-05-13 13:30:59 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let sections = self.extract_tdvf_sections()?;
|
|
|
|
|
2021-09-30 11:39:24 +00:00
|
|
|
#[cfg(feature = "acpi")]
|
|
|
|
let rsdp_addr = {
|
|
|
|
let mem = self.memory_manager.lock().unwrap().guest_memory().memory();
|
|
|
|
|
|
|
|
let rsdp_addr = crate::acpi::create_acpi_tables(
|
|
|
|
&mem,
|
|
|
|
&self.device_manager,
|
|
|
|
&self.cpu_manager,
|
|
|
|
&self.memory_manager,
|
|
|
|
&self.numa_nodes,
|
|
|
|
);
|
|
|
|
info!("Created ACPI tables: rsdp_addr = 0x{:x}", rsdp_addr.0);
|
|
|
|
|
|
|
|
rsdp_addr
|
|
|
|
};
|
|
|
|
|
2021-09-30 10:38:16 +00:00
|
|
|
#[cfg(all(feature = "tdx", not(feature = "acpi")))]
|
|
|
|
let vmm_data_regions: Vec<TdVmmDataRegion> = Vec::new();
|
|
|
|
|
|
|
|
// Create a VMM specific data region to share the ACPI tables with
|
|
|
|
// the guest. Reserving 64kiB to ensure the ACPI tables will fit.
|
|
|
|
#[cfg(all(feature = "tdx", feature = "acpi"))]
|
|
|
|
let vmm_data_regions = vec![TdVmmDataRegion {
|
|
|
|
start_address: rsdp_addr.0,
|
|
|
|
length: 0x10000,
|
|
|
|
region_type: TdVmmDataRegionType::AcpiTables,
|
|
|
|
}];
|
|
|
|
|
|
|
|
// Configuring the TDX regions requires that the vCPUs are created.
|
2021-02-08 17:27:47 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let hob_address = if self.config.lock().unwrap().tdx.is_some() {
|
2021-09-30 10:38:16 +00:00
|
|
|
// TDX sections are written to memory.
|
|
|
|
self.populate_tdx_sections(§ions, &vmm_data_regions)?
|
2021-02-08 17:27:47 +00:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
|
2021-02-08 16:06:27 +00:00
|
|
|
// Configure shared state based on loaded kernel
|
2021-09-30 11:39:24 +00:00
|
|
|
entry_point
|
|
|
|
.map(|_| {
|
|
|
|
self.configure_system(
|
|
|
|
#[cfg(feature = "acpi")]
|
|
|
|
rsdp_addr,
|
|
|
|
)
|
|
|
|
})
|
|
|
|
.transpose()?;
|
2020-05-26 07:20:22 +00:00
|
|
|
|
2021-02-08 17:27:47 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
if let Some(hob_address) = hob_address {
|
|
|
|
// With the HOB address extracted the vCPUs can have
|
|
|
|
// their TDX state configured.
|
|
|
|
self.cpu_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.initialize_tdx(hob_address)
|
|
|
|
.map_err(Error::CpuManager)?;
|
2021-09-30 10:38:16 +00:00
|
|
|
// Let the hypervisor know which memory ranges are shared with the
|
|
|
|
// guest. This prevents the guest from ignoring/discarding memory
|
|
|
|
// regions provided by the host.
|
|
|
|
self.init_tdx_memory(§ions, &vmm_data_regions)?;
|
2021-02-08 17:27:47 +00:00
|
|
|
// With TDX memory and CPU state configured TDX setup is complete
|
2021-03-25 17:01:21 +00:00
|
|
|
self.vm.tdx_finalize().map_err(Error::FinalizeTdx)?;
|
2021-02-08 17:27:47 +00:00
|
|
|
}
|
|
|
|
|
2019-11-11 13:55:50 +00:00
|
|
|
self.cpu_manager
|
2019-11-11 14:31:11 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2020-05-26 07:20:22 +00:00
|
|
|
.start_boot_vcpus()
|
2019-11-11 13:55:50 +00:00
|
|
|
.map_err(Error::CpuManager)?;
|
2019-03-06 11:04:14 +00:00
|
|
|
|
2021-09-08 14:17:17 +00:00
|
|
|
self.setup_signal_handler()?;
|
|
|
|
self.setup_tty()?;
|
2019-09-24 14:06:56 +00:00
|
|
|
|
2019-10-01 08:14:08 +00:00
|
|
|
let mut state = self.state.try_write().map_err(|_| Error::PoisonedState)?;
|
2019-10-11 12:47:57 +00:00
|
|
|
*state = new_state;
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "booted");
|
2019-09-25 13:01:49 +00:00
|
|
|
Ok(())
|
2019-02-28 13:16:58 +00:00
|
|
|
}
|
2019-02-28 14:26:30 +00:00
|
|
|
|
2019-09-25 09:26:11 +00:00
|
|
|
/// Gets a thread-safe reference counted pointer to the VM configuration.
|
2019-12-05 14:50:38 +00:00
|
|
|
pub fn get_config(&self) -> Arc<Mutex<VmConfig>> {
|
2019-09-25 09:26:11 +00:00
|
|
|
Arc::clone(&self.config)
|
|
|
|
}
|
2019-10-01 08:14:08 +00:00
|
|
|
|
|
|
|
/// Get the VM state. Returns an error if the state is poisoned.
|
|
|
|
pub fn get_state(&self) -> Result<VmState> {
|
|
|
|
self.state
|
|
|
|
.try_read()
|
|
|
|
.map_err(|_| Error::PoisonedState)
|
2019-10-11 12:47:57 +00:00
|
|
|
.map(|state| *state)
|
2019-10-01 08:14:08 +00:00
|
|
|
}
|
2020-09-04 11:48:08 +00:00
|
|
|
|
2021-05-20 11:07:41 +00:00
|
|
|
/// Load saved clock from snapshot
|
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
pub fn load_clock_from_snapshot(
|
|
|
|
&mut self,
|
|
|
|
snapshot: &Snapshot,
|
|
|
|
) -> Result<Option<hypervisor::ClockData>> {
|
|
|
|
let vm_snapshot = get_vm_snapshot(snapshot).map_err(Error::Restore)?;
|
|
|
|
self.saved_clock = vm_snapshot.clock;
|
|
|
|
Ok(self.saved_clock)
|
|
|
|
}
|
|
|
|
|
2020-09-04 11:48:08 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
/// Add the vGIC section to the VM snapshot.
|
|
|
|
fn add_vgic_snapshot_section(
|
|
|
|
&self,
|
|
|
|
vm_snapshot: &mut Snapshot,
|
|
|
|
) -> std::result::Result<(), MigratableError> {
|
|
|
|
let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
|
2021-06-01 13:31:53 +00:00
|
|
|
let gic_device = Arc::clone(
|
2020-09-04 11:48:08 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 12:03:05 +00:00
|
|
|
.get_interrupt_controller()
|
|
|
|
.unwrap()
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.get_gic_device()
|
2021-06-01 13:31:53 +00:00
|
|
|
.unwrap(),
|
|
|
|
);
|
|
|
|
|
|
|
|
gic_device
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.set_gicr_typers(&saved_vcpu_states);
|
|
|
|
|
|
|
|
vm_snapshot.add_snapshot(
|
2021-09-02 08:29:29 +00:00
|
|
|
if let Some(gicv3_its) = gic_device
|
2020-09-04 11:48:08 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.as_any_concrete_mut()
|
2021-06-30 14:38:54 +00:00
|
|
|
.downcast_mut::<KvmGicV3Its>()
|
2021-09-02 08:29:29 +00:00
|
|
|
{
|
|
|
|
gicv3_its.snapshot()?
|
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Snapshot(anyhow!(
|
|
|
|
"GicDevice downcast to KvmGicV3Its failed when snapshotting VM!"
|
|
|
|
)));
|
|
|
|
},
|
2020-09-04 11:48:08 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
/// Restore the vGIC from the VM snapshot and enable the interrupt controller routing.
|
|
|
|
fn restore_vgic_and_enable_interrupt(
|
|
|
|
&self,
|
|
|
|
vm_snapshot: &Snapshot,
|
|
|
|
) -> std::result::Result<(), MigratableError> {
|
|
|
|
let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
|
|
|
|
// The number of vCPUs is the same as the number of saved vCPU states.
|
|
|
|
let vcpu_numbers = saved_vcpu_states.len();
|
|
|
|
|
|
|
|
// Creating a GIC device here, as the GIC will not be created when
|
|
|
|
// restoring the device manager. Note that currently only the bare GICv3
|
|
|
|
// without ITS is supported.
|
2021-06-01 13:31:53 +00:00
|
|
|
let mut gic_device = create_gic(&self.vm, vcpu_numbers.try_into().unwrap())
|
2020-09-04 11:48:08 +00:00
|
|
|
.map_err(|e| MigratableError::Restore(anyhow!("Could not create GIC: {:#?}", e)))?;
|
|
|
|
|
2021-06-01 13:31:53 +00:00
|
|
|
// Here we prepare the GICR_TYPER registers from the restored vCPU states.
|
|
|
|
gic_device.set_gicr_typers(&saved_vcpu_states);
|
|
|
|
|
|
|
|
let gic_device = Arc::new(Mutex::new(gic_device));
|
2020-09-04 11:48:08 +00:00
|
|
|
// Update the GIC entity in device manager
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 12:03:05 +00:00
|
|
|
.get_interrupt_controller()
|
|
|
|
.unwrap()
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 13:31:53 +00:00
|
|
|
.set_gic_device(Arc::clone(&gic_device));
|
2020-09-04 11:48:08 +00:00
|
|
|
|
|
|
|
// Restore GIC states.
|
2021-06-30 14:38:54 +00:00
|
|
|
if let Some(gicv3_its_snapshot) = vm_snapshot.snapshots.get(GIC_V3_ITS_SNAPSHOT_ID) {
|
2021-09-02 08:29:29 +00:00
|
|
|
if let Some(gicv3_its) = gic_device
|
2020-09-04 11:48:08 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.as_any_concrete_mut()
|
2021-06-30 14:38:54 +00:00
|
|
|
.downcast_mut::<KvmGicV3Its>()
|
2021-09-02 08:29:29 +00:00
|
|
|
{
|
|
|
|
gicv3_its.restore(*gicv3_its_snapshot.clone())?;
|
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"GicDevice downcast to KvmGicV3Its failed when restoring VM!"
|
|
|
|
)));
|
|
|
|
};
|
2020-09-04 11:48:08 +00:00
|
|
|
} else {
|
2021-06-30 14:38:54 +00:00
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Missing GicV3Its snapshot"
|
|
|
|
)));
|
2020-09-04 11:48:08 +00:00
|
|
|
}
|
|
|
|
|
2021-06-01 13:53:21 +00:00
|
|
|
// Activate gic device
|
2020-09-04 11:48:08 +00:00
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-06-01 13:53:21 +00:00
|
|
|
.get_interrupt_controller()
|
|
|
|
.unwrap()
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.enable()
|
2020-09-04 11:48:08 +00:00
|
|
|
.map_err(|e| {
|
|
|
|
MigratableError::Restore(anyhow!(
|
|
|
|
"Could not enable interrupt controller routing: {:#?}",
|
|
|
|
e
|
|
|
|
))
|
|
|
|
})?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2020-08-25 06:44:21 +00:00
|
|
|
|
|
|
|
/// Gets the actual size of the balloon.
|
2020-10-14 10:04:42 +00:00
|
|
|
pub fn balloon_size(&self) -> u64 {
|
|
|
|
self.device_manager.lock().unwrap().balloon_size()
|
2020-08-25 06:44:21 +00:00
|
|
|
}
|
2020-11-04 15:00:23 +00:00
|
|
|
|
|
|
|
pub fn receive_memory_regions<F>(
|
|
|
|
&mut self,
|
|
|
|
ranges: &MemoryRangeTable,
|
|
|
|
fd: &mut F,
|
|
|
|
) -> std::result::Result<(), MigratableError>
|
|
|
|
where
|
|
|
|
F: Read,
|
|
|
|
{
|
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
|
|
|
let mem = guest_memory.memory();
|
|
|
|
|
|
|
|
for range in ranges.regions() {
|
2021-09-24 13:29:13 +00:00
|
|
|
let mut offset: u64 = 0;
|
|
|
|
// Here we are manually handling the retry in case we can't the
|
|
|
|
// whole region at once because we can't use the implementation
|
|
|
|
// from vm-memory::GuestMemory of read_exact_from() as it is not
|
|
|
|
// following the correct behavior. For more info about this issue
|
|
|
|
// see: https://github.com/rust-vmm/vm-memory/issues/174
|
|
|
|
loop {
|
|
|
|
let bytes_read = mem
|
|
|
|
.read_from(
|
|
|
|
GuestAddress(range.gpa + offset),
|
|
|
|
fd,
|
|
|
|
(range.length - offset) as usize,
|
|
|
|
)
|
|
|
|
.map_err(|e| {
|
|
|
|
MigratableError::MigrateReceive(anyhow!(
|
|
|
|
"Error receiving memory from socket: {}",
|
|
|
|
e
|
|
|
|
))
|
|
|
|
})?;
|
|
|
|
offset += bytes_read as u64;
|
|
|
|
|
|
|
|
if offset == range.length {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-11-04 15:00:23 +00:00
|
|
|
}
|
2021-09-24 13:29:13 +00:00
|
|
|
|
2020-11-04 15:00:23 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn send_memory_regions<F>(
|
|
|
|
&mut self,
|
|
|
|
ranges: &MemoryRangeTable,
|
|
|
|
fd: &mut F,
|
|
|
|
) -> std::result::Result<(), MigratableError>
|
|
|
|
where
|
|
|
|
F: Write,
|
|
|
|
{
|
|
|
|
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
|
|
|
|
let mem = guest_memory.memory();
|
|
|
|
|
|
|
|
for range in ranges.regions() {
|
2021-09-24 13:29:13 +00:00
|
|
|
let mut offset: u64 = 0;
|
|
|
|
// Here we are manually handling the retry in case we can't the
|
|
|
|
// whole region at once because we can't use the implementation
|
|
|
|
// from vm-memory::GuestMemory of write_all_to() as it is not
|
|
|
|
// following the correct behavior. For more info about this issue
|
|
|
|
// see: https://github.com/rust-vmm/vm-memory/issues/174
|
|
|
|
loop {
|
|
|
|
let bytes_written = mem
|
|
|
|
.write_to(
|
|
|
|
GuestAddress(range.gpa + offset),
|
|
|
|
fd,
|
|
|
|
(range.length - offset) as usize,
|
|
|
|
)
|
|
|
|
.map_err(|e| {
|
|
|
|
MigratableError::MigrateSend(anyhow!(
|
|
|
|
"Error transferring memory to socket: {}",
|
|
|
|
e
|
|
|
|
))
|
|
|
|
})?;
|
|
|
|
offset += bytes_written as u64;
|
|
|
|
|
|
|
|
if offset == range.length {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-11-04 15:00:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn memory_range_table(&self) -> std::result::Result<MemoryRangeTable, MigratableError> {
|
2021-09-27 13:31:28 +00:00
|
|
|
self.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.memory_range_table(false)
|
2020-11-04 15:00:23 +00:00
|
|
|
}
|
2020-11-13 15:42:50 +00:00
|
|
|
|
2020-11-30 09:06:13 +00:00
|
|
|
pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
|
|
|
|
self.device_manager.lock().unwrap().device_tree()
|
|
|
|
}
|
2020-11-09 13:29:05 +00:00
|
|
|
|
|
|
|
pub fn activate_virtio_devices(&self) -> Result<()> {
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.activate_virtio_devices()
|
|
|
|
.map_err(Error::ActivateVirtioDevices)
|
|
|
|
}
|
2021-01-13 10:04:33 +00:00
|
|
|
|
2021-03-07 13:39:11 +00:00
|
|
|
#[cfg(target_arch = "x86_64")]
|
2021-01-13 10:04:33 +00:00
|
|
|
pub fn power_button(&self) -> Result<()> {
|
|
|
|
#[cfg(feature = "acpi")]
|
|
|
|
return self
|
|
|
|
.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.notify_power_button()
|
|
|
|
.map_err(Error::PowerButton);
|
|
|
|
#[cfg(not(feature = "acpi"))]
|
|
|
|
Err(Error::PowerButtonNotSupported)
|
|
|
|
}
|
2021-03-07 13:39:11 +00:00
|
|
|
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
pub fn power_button(&self) -> Result<()> {
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.notify_power_button()
|
|
|
|
.map_err(Error::PowerButton)
|
|
|
|
}
|
2021-10-05 15:53:08 +00:00
|
|
|
|
|
|
|
pub fn memory_manager_data(&self) -> MemoryManagerSnapshotData {
|
|
|
|
self.memory_manager.lock().unwrap().snapshot_data()
|
|
|
|
}
|
2019-02-28 13:16:58 +00:00
|
|
|
}
|
|
|
|
|
2019-11-22 13:54:52 +00:00
|
|
|
impl Pausable for Vm {
|
|
|
|
fn pause(&mut self) -> std::result::Result<(), MigratableError> {
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "pausing");
|
2019-11-22 13:54:52 +00:00
|
|
|
let mut state = self
|
|
|
|
.state
|
|
|
|
.try_write()
|
|
|
|
.map_err(|e| MigratableError::Pause(anyhow!("Could not get VM state: {}", e)))?;
|
|
|
|
let new_state = VmState::Paused;
|
|
|
|
|
|
|
|
state
|
|
|
|
.valid_transition(new_state)
|
|
|
|
.map_err(|e| MigratableError::Pause(anyhow!("Invalid transition: {:?}", e)))?;
|
|
|
|
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
{
|
|
|
|
let mut clock = self
|
2020-07-03 09:10:50 +00:00
|
|
|
.vm
|
2020-06-23 09:39:39 +00:00
|
|
|
.get_clock()
|
|
|
|
.map_err(|e| MigratableError::Pause(anyhow!("Could not get VM clock: {}", e)))?;
|
|
|
|
// Reset clock flags.
|
|
|
|
clock.flags = 0;
|
|
|
|
self.saved_clock = Some(clock);
|
|
|
|
}
|
2019-11-22 13:54:52 +00:00
|
|
|
self.cpu_manager.lock().unwrap().pause()?;
|
2020-03-04 13:39:15 +00:00
|
|
|
self.device_manager.lock().unwrap().pause()?;
|
2019-11-22 13:54:52 +00:00
|
|
|
|
|
|
|
*state = new_state;
|
|
|
|
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "paused");
|
2019-11-22 13:54:52 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn resume(&mut self) -> std::result::Result<(), MigratableError> {
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "resuming");
|
2019-11-22 13:54:52 +00:00
|
|
|
let mut state = self
|
|
|
|
.state
|
|
|
|
.try_write()
|
|
|
|
.map_err(|e| MigratableError::Resume(anyhow!("Could not get VM state: {}", e)))?;
|
|
|
|
let new_state = VmState::Running;
|
|
|
|
|
|
|
|
state
|
|
|
|
.valid_transition(new_state)
|
2020-06-23 09:39:39 +00:00
|
|
|
.map_err(|e| MigratableError::Resume(anyhow!("Invalid transition: {:?}", e)))?;
|
2019-11-22 13:54:52 +00:00
|
|
|
|
|
|
|
self.cpu_manager.lock().unwrap().resume()?;
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
{
|
|
|
|
if let Some(clock) = &self.saved_clock {
|
2020-07-03 09:10:50 +00:00
|
|
|
self.vm.set_clock(clock).map_err(|e| {
|
2020-06-23 09:39:39 +00:00
|
|
|
MigratableError::Resume(anyhow!("Could not set VM clock: {}", e))
|
|
|
|
})?;
|
|
|
|
}
|
|
|
|
}
|
2020-06-25 07:58:13 +00:00
|
|
|
self.device_manager.lock().unwrap().resume()?;
|
2019-11-22 13:54:52 +00:00
|
|
|
|
|
|
|
// And we're back to the Running state.
|
|
|
|
*state = new_state;
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "resumed");
|
2019-11-22 13:54:52 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-12 11:53:47 +00:00
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
pub struct VmSnapshot {
|
2020-02-25 00:09:54 +00:00
|
|
|
pub config: Arc<Mutex<VmConfig>>,
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
pub clock: Option<hypervisor::ClockData>,
|
2020-08-21 20:06:56 +00:00
|
|
|
pub state: Option<hypervisor::VmState>,
|
2021-07-20 22:18:33 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
pub common_cpuid: hypervisor::CpuId,
|
2019-05-12 11:53:47 +00:00
|
|
|
}
|
|
|
|
|
2020-02-25 00:09:54 +00:00
|
|
|
pub const VM_SNAPSHOT_ID: &str = "vm";
|
2019-05-12 11:53:47 +00:00
|
|
|
impl Snapshottable for Vm {
|
|
|
|
fn id(&self) -> String {
|
|
|
|
VM_SNAPSHOT_ID.to_string()
|
|
|
|
}
|
|
|
|
|
2020-08-21 12:31:58 +00:00
|
|
|
fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "snapshotting");
|
|
|
|
|
2021-03-03 15:38:53 +00:00
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
{
|
|
|
|
if self.config.lock().unwrap().tdx.is_some() {
|
|
|
|
return Err(MigratableError::Snapshot(anyhow!(
|
|
|
|
"Snapshot not possible with TDX VM"
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-12 11:53:47 +00:00
|
|
|
let current_state = self.get_state().unwrap();
|
|
|
|
if current_state != VmState::Paused {
|
|
|
|
return Err(MigratableError::Snapshot(anyhow!(
|
|
|
|
"Trying to snapshot while VM is running"
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
2021-07-20 22:18:33 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
let common_cpuid = {
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
let tdx_enabled = self.config.lock().unwrap().tdx.is_some();
|
2021-10-01 09:54:27 +00:00
|
|
|
let phys_bits = physical_bits(self.config.lock().unwrap().cpus.max_phys_bits);
|
2021-07-20 22:18:33 +00:00
|
|
|
arch::generate_common_cpuid(
|
|
|
|
self.hypervisor.clone(),
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
phys_bits,
|
|
|
|
self.config.lock().unwrap().cpus.kvm_hyperv,
|
|
|
|
#[cfg(feature = "tdx")]
|
|
|
|
tdx_enabled,
|
|
|
|
)
|
|
|
|
.map_err(|e| {
|
|
|
|
MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
|
|
|
|
})?
|
|
|
|
};
|
|
|
|
|
2019-05-12 11:53:47 +00:00
|
|
|
let mut vm_snapshot = Snapshot::new(VM_SNAPSHOT_ID);
|
2020-08-21 20:06:56 +00:00
|
|
|
let vm_state = self
|
|
|
|
.vm
|
|
|
|
.state()
|
|
|
|
.map_err(|e| MigratableError::Snapshot(e.into()))?;
|
2019-05-12 11:53:47 +00:00
|
|
|
let vm_snapshot_data = serde_json::to_vec(&VmSnapshot {
|
|
|
|
config: self.get_config(),
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-23 09:39:39 +00:00
|
|
|
clock: self.saved_clock,
|
2020-08-21 20:06:56 +00:00
|
|
|
state: Some(vm_state),
|
2021-07-20 22:18:33 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
|
|
|
common_cpuid,
|
2019-05-12 11:53:47 +00:00
|
|
|
})
|
|
|
|
.map_err(|e| MigratableError::Snapshot(e.into()))?;
|
|
|
|
|
|
|
|
vm_snapshot.add_snapshot(self.cpu_manager.lock().unwrap().snapshot()?);
|
|
|
|
vm_snapshot.add_snapshot(self.memory_manager.lock().unwrap().snapshot()?);
|
2020-09-04 11:48:08 +00:00
|
|
|
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
self.add_vgic_snapshot_section(&mut vm_snapshot)
|
|
|
|
.map_err(|e| MigratableError::Snapshot(e.into()))?;
|
|
|
|
|
2019-05-12 11:53:47 +00:00
|
|
|
vm_snapshot.add_snapshot(self.device_manager.lock().unwrap().snapshot()?);
|
|
|
|
vm_snapshot.add_data_section(SnapshotDataSection {
|
|
|
|
id: format!("{}-section", VM_SNAPSHOT_ID),
|
|
|
|
snapshot: vm_snapshot_data,
|
|
|
|
});
|
|
|
|
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "snapshotted");
|
2019-05-12 11:53:47 +00:00
|
|
|
Ok(vm_snapshot)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
|
2021-02-17 11:24:36 +00:00
|
|
|
event!("vm", "restoring");
|
|
|
|
|
2019-05-12 11:53:47 +00:00
|
|
|
let current_state = self
|
|
|
|
.get_state()
|
|
|
|
.map_err(|e| MigratableError::Restore(anyhow!("Could not get VM state: {:#?}", e)))?;
|
2020-06-22 13:35:27 +00:00
|
|
|
let new_state = VmState::Paused;
|
2019-05-12 11:53:47 +00:00
|
|
|
current_state.valid_transition(new_state).map_err(|e| {
|
|
|
|
MigratableError::Restore(anyhow!("Could not restore VM state: {:#?}", e))
|
|
|
|
})?;
|
|
|
|
|
|
|
|
if let Some(memory_manager_snapshot) = snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID) {
|
|
|
|
self.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.restore(*memory_manager_snapshot.clone())?;
|
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Missing memory manager snapshot"
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
2020-09-04 10:56:30 +00:00
|
|
|
if let Some(cpu_manager_snapshot) = snapshot.snapshots.get(CPU_MANAGER_SNAPSHOT_ID) {
|
|
|
|
self.cpu_manager
|
2019-05-12 11:53:47 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2020-09-04 10:56:30 +00:00
|
|
|
.restore(*cpu_manager_snapshot.clone())?;
|
2019-05-12 11:53:47 +00:00
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
2020-09-04 10:56:30 +00:00
|
|
|
"Missing CPU manager snapshot"
|
2019-05-12 11:53:47 +00:00
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
2020-09-04 10:56:30 +00:00
|
|
|
if let Some(device_manager_snapshot) = snapshot.snapshots.get(DEVICE_MANAGER_SNAPSHOT_ID) {
|
|
|
|
self.device_manager
|
2019-05-12 11:53:47 +00:00
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2020-09-04 10:56:30 +00:00
|
|
|
.restore(*device_manager_snapshot.clone())?;
|
2019-05-12 11:53:47 +00:00
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
2020-09-04 10:56:30 +00:00
|
|
|
"Missing device manager snapshot"
|
2019-05-12 11:53:47 +00:00
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
2020-09-04 11:48:08 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
self.restore_vgic_and_enable_interrupt(&snapshot)?;
|
|
|
|
|
2021-06-06 08:13:59 +00:00
|
|
|
if let Some(device_manager_snapshot) = snapshot.snapshots.get(DEVICE_MANAGER_SNAPSHOT_ID) {
|
|
|
|
self.device_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.restore_devices(*device_manager_snapshot.clone())?;
|
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Missing device manager snapshot"
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
2020-09-04 10:56:30 +00:00
|
|
|
// Now we can start all vCPUs from here.
|
|
|
|
self.cpu_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.start_restored_vcpus()
|
|
|
|
.map_err(|e| {
|
|
|
|
MigratableError::Restore(anyhow!("Cannot start restored vCPUs: {:#?}", e))
|
|
|
|
})?;
|
|
|
|
|
2021-09-08 14:17:17 +00:00
|
|
|
self.setup_signal_handler().map_err(|e| {
|
|
|
|
MigratableError::Restore(anyhow!("Could not setup signal handler: {:#?}", e))
|
2021-09-08 13:27:50 +00:00
|
|
|
})?;
|
2021-09-08 14:17:17 +00:00
|
|
|
self.setup_tty()
|
|
|
|
.map_err(|e| MigratableError::Restore(anyhow!("Could not setup tty: {:#?}", e)))?;
|
2019-05-12 11:53:47 +00:00
|
|
|
|
|
|
|
let mut state = self
|
|
|
|
.state
|
|
|
|
.try_write()
|
|
|
|
.map_err(|e| MigratableError::Restore(anyhow!("Could not set VM state: {:#?}", e)))?;
|
|
|
|
*state = new_state;
|
2021-02-17 11:24:36 +00:00
|
|
|
|
|
|
|
event!("vm", "restored");
|
2019-05-12 11:53:47 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-25 00:09:54 +00:00
|
|
|
impl Transportable for Vm {
|
|
|
|
fn send(
|
|
|
|
&self,
|
|
|
|
snapshot: &Snapshot,
|
|
|
|
destination_url: &str,
|
|
|
|
) -> std::result::Result<(), MigratableError> {
|
2021-03-12 10:46:36 +00:00
|
|
|
let mut vm_snapshot_path = url_to_path(destination_url)?;
|
|
|
|
vm_snapshot_path.push(VM_SNAPSHOT_FILE);
|
|
|
|
|
|
|
|
// Create the snapshot file
|
|
|
|
let mut vm_snapshot_file = OpenOptions::new()
|
|
|
|
.read(true)
|
|
|
|
.write(true)
|
|
|
|
.create_new(true)
|
|
|
|
.open(vm_snapshot_path)
|
|
|
|
.map_err(|e| MigratableError::MigrateSend(e.into()))?;
|
|
|
|
|
|
|
|
// Serialize and write the snapshot
|
|
|
|
let vm_snapshot =
|
|
|
|
serde_json::to_vec(snapshot).map_err(|e| MigratableError::MigrateSend(e.into()))?;
|
|
|
|
|
|
|
|
vm_snapshot_file
|
|
|
|
.write(&vm_snapshot)
|
|
|
|
.map_err(|e| MigratableError::MigrateSend(e.into()))?;
|
|
|
|
|
|
|
|
// Tell the memory manager to also send/write its own snapshot.
|
|
|
|
if let Some(memory_manager_snapshot) = snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID) {
|
|
|
|
self.memory_manager
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.send(&*memory_manager_snapshot.clone(), destination_url)?;
|
|
|
|
} else {
|
|
|
|
return Err(MigratableError::Restore(anyhow!(
|
|
|
|
"Missing memory manager snapshot"
|
|
|
|
)));
|
2020-02-25 00:09:54 +00:00
|
|
|
}
|
2021-03-12 10:46:36 +00:00
|
|
|
|
2020-02-25 00:09:54 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
2021-08-04 14:52:31 +00:00
|
|
|
|
|
|
|
impl Migratable for Vm {
|
|
|
|
fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
|
2021-08-04 15:03:20 +00:00
|
|
|
self.memory_manager.lock().unwrap().start_dirty_log()?;
|
|
|
|
self.device_manager.lock().unwrap().start_dirty_log()
|
2021-08-04 14:52:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
|
2021-08-04 15:03:20 +00:00
|
|
|
self.memory_manager.lock().unwrap().stop_dirty_log()?;
|
|
|
|
self.device_manager.lock().unwrap().stop_dirty_log()
|
2021-08-04 14:52:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
|
2021-08-04 15:03:20 +00:00
|
|
|
Ok(MemoryRangeTable::new_from_tables(vec![
|
|
|
|
self.memory_manager.lock().unwrap().dirty_log()?,
|
|
|
|
self.device_manager.lock().unwrap().dirty_log()?,
|
|
|
|
]))
|
2021-08-04 14:52:31 +00:00
|
|
|
}
|
2021-08-09 08:46:41 +00:00
|
|
|
|
|
|
|
fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
|
|
|
|
self.memory_manager.lock().unwrap().complete_migration()?;
|
|
|
|
self.device_manager.lock().unwrap().complete_migration()
|
|
|
|
}
|
2021-08-04 14:52:31 +00:00
|
|
|
}
|
2019-11-22 13:54:52 +00:00
|
|
|
|
2020-12-04 23:35:29 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2019-10-11 16:59:29 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
fn test_vm_state_transitions(state: VmState) {
|
|
|
|
match state {
|
|
|
|
VmState::Created => {
|
|
|
|
// Check the transitions from Created
|
|
|
|
assert!(state.valid_transition(VmState::Created).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Running).is_ok());
|
|
|
|
assert!(state.valid_transition(VmState::Shutdown).is_err());
|
2020-06-22 13:35:27 +00:00
|
|
|
assert!(state.valid_transition(VmState::Paused).is_ok());
|
2019-10-11 16:59:29 +00:00
|
|
|
}
|
|
|
|
VmState::Running => {
|
|
|
|
// Check the transitions from Running
|
|
|
|
assert!(state.valid_transition(VmState::Created).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Running).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Shutdown).is_ok());
|
|
|
|
assert!(state.valid_transition(VmState::Paused).is_ok());
|
|
|
|
}
|
|
|
|
VmState::Shutdown => {
|
|
|
|
// Check the transitions from Shutdown
|
|
|
|
assert!(state.valid_transition(VmState::Created).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Running).is_ok());
|
|
|
|
assert!(state.valid_transition(VmState::Shutdown).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Paused).is_err());
|
|
|
|
}
|
|
|
|
VmState::Paused => {
|
|
|
|
// Check the transitions from Paused
|
|
|
|
assert!(state.valid_transition(VmState::Created).is_err());
|
|
|
|
assert!(state.valid_transition(VmState::Running).is_ok());
|
|
|
|
assert!(state.valid_transition(VmState::Shutdown).is_ok());
|
|
|
|
assert!(state.valid_transition(VmState::Paused).is_err());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_vm_created_transitions() {
|
|
|
|
test_vm_state_transitions(VmState::Created);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_vm_running_transitions() {
|
|
|
|
test_vm_state_transitions(VmState::Running);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_vm_shutdown_transitions() {
|
|
|
|
test_vm_state_transitions(VmState::Shutdown);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_vm_paused_transitions() {
|
|
|
|
test_vm_state_transitions(VmState::Paused);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-08 12:48:46 +00:00
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2021-06-24 06:52:18 +00:00
|
|
|
use crate::GuestMemoryMmap;
|
2020-07-08 12:48:46 +00:00
|
|
|
use arch::aarch64::fdt::create_fdt;
|
2020-07-16 06:10:59 +00:00
|
|
|
use arch::aarch64::gic::kvm::create_gic;
|
2021-06-24 06:52:18 +00:00
|
|
|
use arch::aarch64::layout;
|
2021-03-26 17:58:17 +00:00
|
|
|
use arch::{DeviceType, MmioDeviceInfo};
|
2021-06-02 19:08:04 +00:00
|
|
|
use vm_memory::GuestAddress;
|
2020-07-08 12:48:46 +00:00
|
|
|
|
|
|
|
const LEN: u64 = 4096;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_create_fdt_with_devices() {
|
2021-06-24 06:52:18 +00:00
|
|
|
let regions = vec![(
|
2020-07-08 12:48:46 +00:00
|
|
|
GuestAddress(layout::RAM_64BIT_START),
|
|
|
|
(layout::FDT_MAX_SIZE + 0x1000) as usize,
|
2021-06-24 06:52:18 +00:00
|
|
|
)];
|
2020-07-08 12:48:46 +00:00
|
|
|
let mem = GuestMemoryMmap::from_ranges(®ions).expect("Cannot initialize memory");
|
|
|
|
|
2021-03-25 17:01:21 +00:00
|
|
|
let dev_info: HashMap<(DeviceType, std::string::String), MmioDeviceInfo> = [
|
2020-07-08 12:48:46 +00:00
|
|
|
(
|
|
|
|
(DeviceType::Serial, DeviceType::Serial.to_string()),
|
2021-03-23 11:41:36 +00:00
|
|
|
MmioDeviceInfo {
|
|
|
|
addr: 0x00,
|
|
|
|
irq: 33,
|
|
|
|
},
|
2020-07-08 12:48:46 +00:00
|
|
|
),
|
|
|
|
(
|
|
|
|
(DeviceType::Virtio(1), "virtio".to_string()),
|
2021-06-24 06:52:18 +00:00
|
|
|
MmioDeviceInfo { addr: LEN, irq: 34 },
|
2020-07-08 12:48:46 +00:00
|
|
|
),
|
|
|
|
(
|
2021-03-25 17:01:21 +00:00
|
|
|
(DeviceType::Rtc, "rtc".to_string()),
|
|
|
|
MmioDeviceInfo {
|
2021-06-24 06:52:18 +00:00
|
|
|
addr: 2 * LEN,
|
2021-03-23 11:41:36 +00:00
|
|
|
irq: 35,
|
2020-07-08 12:48:46 +00:00
|
|
|
},
|
|
|
|
),
|
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.cloned()
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
let hv = hypervisor::new().unwrap();
|
|
|
|
let vm = hv.create_vm().unwrap();
|
2020-10-17 07:16:38 +00:00
|
|
|
let gic = create_gic(&vm, 1).unwrap();
|
2020-07-08 12:48:46 +00:00
|
|
|
assert!(create_fdt(
|
|
|
|
&mem,
|
2021-09-24 07:43:14 +00:00
|
|
|
"console=tty0",
|
2020-07-08 12:48:46 +00:00
|
|
|
vec![0],
|
2021-07-29 07:44:50 +00:00
|
|
|
Some((0, 0, 0)),
|
2020-07-08 12:48:46 +00:00
|
|
|
&dev_info,
|
2020-08-23 07:45:44 +00:00
|
|
|
&*gic,
|
2020-07-08 12:48:46 +00:00
|
|
|
&None,
|
2020-10-21 09:52:21 +00:00
|
|
|
&(0x1_0000_0000, 0x1_0000),
|
2021-08-06 23:41:33 +00:00
|
|
|
&BTreeMap::new(),
|
2021-08-12 13:37:44 +00:00
|
|
|
None,
|
2020-07-08 12:48:46 +00:00
|
|
|
)
|
|
|
|
.is_ok())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-11 23:19:49 +00:00
|
|
|
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
|
2020-06-12 09:53:11 +00:00
|
|
|
#[test]
|
2019-02-28 13:16:58 +00:00
|
|
|
pub fn test_vm() {
|
2020-07-03 14:27:53 +00:00
|
|
|
use hypervisor::VmExit;
|
2021-09-27 11:43:04 +00:00
|
|
|
use vm_memory::{Address, GuestMemory, GuestMemoryRegion};
|
2019-02-28 13:16:58 +00:00
|
|
|
// This example based on https://lwn.net/Articles/658511/
|
|
|
|
let code = [
|
|
|
|
0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
|
|
|
|
0x00, 0xd8, /* add %bl, %al */
|
|
|
|
0x04, b'0', /* add $'0', %al */
|
|
|
|
0xee, /* out %al, (%dx) */
|
|
|
|
0xb0, b'\n', /* mov $'\n', %al */
|
|
|
|
0xee, /* out %al, (%dx) */
|
|
|
|
0xf4, /* hlt */
|
|
|
|
];
|
|
|
|
|
|
|
|
let mem_size = 0x1000;
|
|
|
|
let load_addr = GuestAddress(0x1000);
|
2020-02-06 08:00:41 +00:00
|
|
|
let mem = GuestMemoryMmap::from_ranges(&[(load_addr, mem_size)]).unwrap();
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2020-07-14 11:42:21 +00:00
|
|
|
let hv = hypervisor::new().unwrap();
|
2020-07-03 09:10:50 +00:00
|
|
|
let vm = hv.create_vm().expect("new VM creation failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2021-05-24 19:23:25 +00:00
|
|
|
for (index, region) in mem.iter().enumerate() {
|
2020-07-04 11:38:17 +00:00
|
|
|
let mem_region = vm.make_user_memory_region(
|
|
|
|
index as u32,
|
|
|
|
region.start_addr().raw_value(),
|
|
|
|
region.len() as u64,
|
|
|
|
region.as_ptr() as u64,
|
|
|
|
false,
|
2020-11-11 18:16:39 +00:00
|
|
|
false,
|
2020-07-04 11:38:17 +00:00
|
|
|
);
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2021-07-03 13:58:39 +00:00
|
|
|
vm.create_user_memory_region(mem_region)
|
2021-05-24 19:23:25 +00:00
|
|
|
.expect("Cannot configure guest memory");
|
|
|
|
}
|
2019-02-28 13:16:58 +00:00
|
|
|
mem.write_slice(&code, load_addr)
|
|
|
|
.expect("Writing code to memory failed");
|
|
|
|
|
2020-11-18 16:37:52 +00:00
|
|
|
let vcpu = vm.create_vcpu(0, None).expect("new Vcpu failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2020-07-03 09:10:50 +00:00
|
|
|
let mut vcpu_sregs = vcpu.get_sregs().expect("get sregs failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
vcpu_sregs.cs.base = 0;
|
|
|
|
vcpu_sregs.cs.selector = 0;
|
2020-07-03 09:10:50 +00:00
|
|
|
vcpu.set_sregs(&vcpu_sregs).expect("set sregs failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
|
2020-07-03 09:10:50 +00:00
|
|
|
let mut vcpu_regs = vcpu.get_regs().expect("get regs failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
vcpu_regs.rip = 0x1000;
|
|
|
|
vcpu_regs.rax = 2;
|
|
|
|
vcpu_regs.rbx = 3;
|
|
|
|
vcpu_regs.rflags = 2;
|
2020-07-03 09:10:50 +00:00
|
|
|
vcpu.set_regs(&vcpu_regs).expect("set regs failed");
|
2019-02-28 13:16:58 +00:00
|
|
|
|
|
|
|
loop {
|
2020-07-03 09:10:50 +00:00
|
|
|
match vcpu.run().expect("run failed") {
|
2020-07-03 14:27:53 +00:00
|
|
|
VmExit::IoOut(addr, data) => {
|
2019-02-28 13:16:58 +00:00
|
|
|
println!(
|
|
|
|
"IO out -- addr: {:#x} data [{:?}]",
|
|
|
|
addr,
|
2021-06-23 20:42:17 +00:00
|
|
|
str::from_utf8(data).unwrap()
|
2019-02-28 13:16:58 +00:00
|
|
|
);
|
|
|
|
}
|
2020-07-03 14:27:53 +00:00
|
|
|
VmExit::Reset => {
|
2019-02-28 13:16:58 +00:00
|
|
|
println!("HLT");
|
2020-06-11 16:42:29 +00:00
|
|
|
break;
|
2019-02-28 13:16:58 +00:00
|
|
|
}
|
2020-07-03 14:27:53 +00:00
|
|
|
r => panic!("unexpected exit reason: {:?}", r),
|
2019-02-28 13:16:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|