vmm: migration: Create destination VM right before to restore it

This is preliminary work to ensure a migrated VM is created right before
it is restored. This will be useful when moving to a design where the VM
is both created and restored simultaneously from the Snapshot.

In details, that means the MemoryManager is the object that must be
created upon receiving the config from the source VM, so that memory
content can be later received and filled into the GuestMemory.
Only after these steps happened, the snapshot is received from the
source VM, and the actual Vm object can be created from both the
snapshot and the MemoryManager previously created.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2022-10-18 14:35:38 +02:00
parent 7b31871a36
commit c52ccf3992
3 changed files with 130 additions and 143 deletions

View File

@ -20,12 +20,15 @@ use crate::config::{
};
#[cfg(feature = "guest_debug")]
use crate::coredump::GuestDebuggable;
use crate::memory_manager::MemoryManager;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
use crate::migration::get_vm_snapshot;
use crate::migration::{recv_vm_config, recv_vm_state};
use crate::seccomp_filters::{get_seccomp_filter, Thread};
use crate::vm::{Error as VmError, Vm, VmState};
use anyhow::anyhow;
#[cfg(target_arch = "x86_64")]
use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START};
use libc::{EFD_NONBLOCK, SIGINT, SIGTERM};
use memory_manager::MemoryManagerSnapshotData;
use pci::PciBdf;
@ -44,9 +47,11 @@ use std::panic::AssertUnwindSafe;
use std::path::PathBuf;
use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::{result, thread};
use thiserror::Error;
use tracer::trace_scoped;
use vm::physical_bits;
use vm_memory::bitmap::AtomicBitmap;
use vm_migration::{protocol::*, Migratable};
use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
@ -1119,7 +1124,7 @@ impl Vmm {
req: &Request,
socket: &mut T,
existing_memory_files: Option<HashMap<u32, File>>,
) -> std::result::Result<Vm, MigratableError>
) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
where
T: Read + Write,
{
@ -1141,6 +1146,65 @@ impl Vmm {
&vm_migration_config.common_cpuid,
)?;
let config = vm_migration_config.vm_config.clone();
self.vm_config = Some(vm_migration_config.vm_config);
self.hypervisor.check_required_extensions().unwrap();
let vm = self.hypervisor.create_vm().unwrap();
#[cfg(target_arch = "x86_64")]
{
vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
.unwrap();
vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
vm.enable_split_irq().unwrap();
}
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
let memory_manager = MemoryManager::new(
vm.clone(),
&config.lock().unwrap().memory.clone(),
None,
phys_bits,
#[cfg(feature = "tdx")]
false,
Some(&vm_migration_config.memory_manager_data),
existing_memory_files,
#[cfg(target_arch = "x86_64")]
None,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error creating MemoryManager from snapshot: {:?}",
e
))
})?;
Response::ok().write_to(socket)?;
Ok(memory_manager)
}
fn vm_receive_state<T>(
&mut self,
req: &Request,
socket: &mut T,
mm: Arc<Mutex<MemoryManager>>,
) -> std::result::Result<(), MigratableError>
where
T: Read + Write,
{
// Read in state data
let mut data: Vec<u8> = Vec::new();
data.resize_with(req.length() as usize, Default::default);
socket
.read_exact(&mut data)
.map_err(MigratableError::MigrateSocket)?;
let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
})?;
let exit_evt = self.exit_evt.try_clone().map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
})?;
@ -1155,9 +1219,12 @@ impl Vmm {
MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
})?;
self.vm_config = Some(vm_migration_config.vm_config);
let vm = Vm::new_from_migration(
let timestamp = Instant::now();
let hypervisor_vm = mm.lock().unwrap().vm.clone();
let mut vm = Vm::new_from_memory_manager(
self.vm_config.clone().unwrap(),
mm,
hypervisor_vm,
exit_evt,
reset_evt,
#[cfg(feature = "guest_debug")]
@ -1165,37 +1232,13 @@ impl Vmm {
&self.seccomp_action,
self.hypervisor.clone(),
activate_evt,
&vm_migration_config.memory_manager_data,
existing_memory_files,
true,
timestamp,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
})?;
Response::ok().write_to(socket)?;
Ok(vm)
}
fn vm_receive_state<T>(
&mut self,
req: &Request,
socket: &mut T,
mut vm: Vm,
) -> std::result::Result<(), MigratableError>
where
T: Read + Write,
{
// Read in state data
let mut data: Vec<u8> = Vec::new();
data.resize_with(req.length() as usize, Default::default);
socket
.read_exact(&mut data)
.map_err(MigratableError::MigrateSocket)?;
let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
})?;
// Create VM
vm.restore(snapshot).map_err(|e| {
Response::error().write_to(socket).ok();
@ -1212,7 +1255,7 @@ impl Vmm {
&mut self,
req: &Request,
socket: &mut T,
vm: &mut Vm,
memory_manager: &mut MemoryManager,
) -> std::result::Result<(), MigratableError>
where
T: Read + Write,
@ -1221,10 +1264,12 @@ impl Vmm {
let table = MemoryRangeTable::read_from(socket, req.length())?;
// And then read the memory itself
vm.receive_memory_regions(&table, socket).map_err(|e| {
Response::error().write_to(socket).ok();
e
})?;
memory_manager
.receive_memory_regions(&table, socket)
.map_err(|e| {
Response::error().write_to(socket).ok();
e
})?;
Response::ok().write_to(socket)?;
Ok(())
}
@ -1258,7 +1303,7 @@ impl Vmm {
})?;
let mut started = false;
let mut vm: Option<Vm> = None;
let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
let mut existing_memory_files = None;
loop {
let req = Request::read_from(&mut socket)?;
@ -1278,7 +1323,7 @@ impl Vmm {
Response::error().write_to(&mut socket)?;
continue;
}
vm = Some(self.vm_receive_config(
memory_manager = Some(self.vm_receive_config(
&req,
&mut socket,
existing_memory_files.take(),
@ -1292,8 +1337,8 @@ impl Vmm {
Response::error().write_to(&mut socket)?;
continue;
}
if let Some(vm) = vm.take() {
self.vm_receive_state(&req, &mut socket, vm)?;
if let Some(mm) = memory_manager.take() {
self.vm_receive_state(&req, &mut socket, mm)?;
} else {
warn!("Configuration not sent yet");
Response::error().write_to(&mut socket)?;
@ -1307,8 +1352,8 @@ impl Vmm {
Response::error().write_to(&mut socket)?;
continue;
}
if let Some(ref mut vm) = vm.as_mut() {
self.vm_receive_memory(&req, &mut socket, vm)?;
if let Some(mm) = memory_manager.as_ref() {
self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
} else {
warn!("Configuration not sent yet");
Response::error().write_to(&mut socket)?;

View File

@ -30,7 +30,7 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::ffi;
use std::fs::{File, OpenOptions};
use std::io;
use std::io::{self, Read};
use std::ops::Deref;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::path::PathBuf;
@ -1978,6 +1978,48 @@ impl MemoryManager {
debug!("coredump total bytes {}", total_bytes);
Ok(())
}
pub fn receive_memory_regions<F>(
&mut self,
ranges: &MemoryRangeTable,
fd: &mut F,
) -> std::result::Result<(), MigratableError>
where
F: Read,
{
let guest_memory = self.guest_memory();
let mem = guest_memory.memory();
for range in ranges.regions() {
let mut offset: u64 = 0;
// Here we are manually handling the retry in case we can't the
// whole region at once because we can't use the implementation
// from vm-memory::GuestMemory of read_exact_from() as it is not
// following the correct behavior. For more info about this issue
// see: https://github.com/rust-vmm/vm-memory/issues/174
loop {
let bytes_read = mem
.read_from(
GuestAddress(range.gpa + offset),
fd,
(range.length - offset) as usize,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error receiving memory from socket: {}",
e
))
})?;
offset += bytes_read as u64;
if offset == range.length {
break;
}
}
}
Ok(())
}
}
struct MemoryNotify {

View File

@ -72,8 +72,7 @@ use std::collections::BTreeMap;
use std::collections::HashMap;
use std::convert::TryInto;
use std::fs::{File, OpenOptions};
use std::io::{self, Read, Write};
use std::io::{Seek, SeekFrom};
use std::io::{self, Seek, SeekFrom, Write};
#[cfg(feature = "tdx")]
use std::mem;
#[cfg(feature = "guest_debug")]
@ -486,7 +485,7 @@ impl Vm {
pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH];
#[allow(clippy::too_many_arguments)]
fn new_from_memory_manager(
pub fn new_from_memory_manager(
config: Arc<Mutex<VmConfig>>,
memory_manager: Arc<Mutex<MemoryManager>>,
vm: Arc<dyn hypervisor::Vm>,
@ -857,63 +856,6 @@ impl Vm {
)
}
#[allow(clippy::too_many_arguments)]
pub fn new_from_migration(
config: Arc<Mutex<VmConfig>>,
exit_evt: EventFd,
reset_evt: EventFd,
#[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
memory_manager_data: &MemoryManagerSnapshotData,
existing_memory_files: Option<HashMap<u32, File>>,
) -> Result<Self> {
let timestamp = Instant::now();
hypervisor.check_required_extensions().unwrap();
let vm = hypervisor.create_vm().unwrap();
#[cfg(target_arch = "x86_64")]
{
vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
.unwrap();
vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
vm.enable_split_irq().unwrap();
}
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
let memory_manager = MemoryManager::new(
vm.clone(),
&config.lock().unwrap().memory.clone(),
None,
phys_bits,
#[cfg(feature = "tdx")]
false,
Some(memory_manager_data),
existing_memory_files,
#[cfg(target_arch = "x86_64")]
None,
)
.map_err(Error::MemoryManager)?;
Vm::new_from_memory_manager(
config,
memory_manager,
vm,
exit_evt,
reset_evt,
#[cfg(feature = "guest_debug")]
vm_debug_evt,
seccomp_action,
hypervisor,
activate_evt,
true,
timestamp,
)
}
fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result<arch::InitramfsConfig> {
let mut initramfs = self.initramfs.as_ref().unwrap();
let size: usize = initramfs
@ -2352,48 +2294,6 @@ impl Vm {
self.device_manager.lock().unwrap().balloon_size()
}
pub fn receive_memory_regions<F>(
&mut self,
ranges: &MemoryRangeTable,
fd: &mut F,
) -> std::result::Result<(), MigratableError>
where
F: Read,
{
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
let mem = guest_memory.memory();
for range in ranges.regions() {
let mut offset: u64 = 0;
// Here we are manually handling the retry in case we can't the
// whole region at once because we can't use the implementation
// from vm-memory::GuestMemory of read_exact_from() as it is not
// following the correct behavior. For more info about this issue
// see: https://github.com/rust-vmm/vm-memory/issues/174
loop {
let bytes_read = mem
.read_from(
GuestAddress(range.gpa + offset),
fd,
(range.length - offset) as usize,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error receiving memory from socket: {}",
e
))
})?;
offset += bytes_read as u64;
if offset == range.length {
break;
}
}
}
Ok(())
}
pub fn send_memory_fds(
&mut self,
socket: &mut UnixStream,