vmm: migration: Create destination VM right before to restore it

This is preliminary work to ensure a migrated VM is created right before
it is restored. This will be useful when moving to a design where the VM
is both created and restored simultaneously from the Snapshot.

In details, that means the MemoryManager is the object that must be
created upon receiving the config from the source VM, so that memory
content can be later received and filled into the GuestMemory.
Only after these steps happened, the snapshot is received from the
source VM, and the actual Vm object can be created from both the
snapshot and the MemoryManager previously created.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2022-10-18 14:35:38 +02:00
parent 7b31871a36
commit c52ccf3992
3 changed files with 130 additions and 143 deletions

View File

@ -20,12 +20,15 @@ use crate::config::{
}; };
#[cfg(feature = "guest_debug")] #[cfg(feature = "guest_debug")]
use crate::coredump::GuestDebuggable; use crate::coredump::GuestDebuggable;
use crate::memory_manager::MemoryManager;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))] #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
use crate::migration::get_vm_snapshot; use crate::migration::get_vm_snapshot;
use crate::migration::{recv_vm_config, recv_vm_state}; use crate::migration::{recv_vm_config, recv_vm_state};
use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::seccomp_filters::{get_seccomp_filter, Thread};
use crate::vm::{Error as VmError, Vm, VmState}; use crate::vm::{Error as VmError, Vm, VmState};
use anyhow::anyhow; use anyhow::anyhow;
#[cfg(target_arch = "x86_64")]
use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START};
use libc::{EFD_NONBLOCK, SIGINT, SIGTERM}; use libc::{EFD_NONBLOCK, SIGINT, SIGTERM};
use memory_manager::MemoryManagerSnapshotData; use memory_manager::MemoryManagerSnapshotData;
use pci::PciBdf; use pci::PciBdf;
@ -44,9 +47,11 @@ use std::panic::AssertUnwindSafe;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::{result, thread}; use std::{result, thread};
use thiserror::Error; use thiserror::Error;
use tracer::trace_scoped; use tracer::trace_scoped;
use vm::physical_bits;
use vm_memory::bitmap::AtomicBitmap; use vm_memory::bitmap::AtomicBitmap;
use vm_migration::{protocol::*, Migratable}; use vm_migration::{protocol::*, Migratable};
use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
@ -1119,7 +1124,7 @@ impl Vmm {
req: &Request, req: &Request,
socket: &mut T, socket: &mut T,
existing_memory_files: Option<HashMap<u32, File>>, existing_memory_files: Option<HashMap<u32, File>>,
) -> std::result::Result<Vm, MigratableError> ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
where where
T: Read + Write, T: Read + Write,
{ {
@ -1141,6 +1146,65 @@ impl Vmm {
&vm_migration_config.common_cpuid, &vm_migration_config.common_cpuid,
)?; )?;
let config = vm_migration_config.vm_config.clone();
self.vm_config = Some(vm_migration_config.vm_config);
self.hypervisor.check_required_extensions().unwrap();
let vm = self.hypervisor.create_vm().unwrap();
#[cfg(target_arch = "x86_64")]
{
vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
.unwrap();
vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
vm.enable_split_irq().unwrap();
}
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
let memory_manager = MemoryManager::new(
vm.clone(),
&config.lock().unwrap().memory.clone(),
None,
phys_bits,
#[cfg(feature = "tdx")]
false,
Some(&vm_migration_config.memory_manager_data),
existing_memory_files,
#[cfg(target_arch = "x86_64")]
None,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error creating MemoryManager from snapshot: {:?}",
e
))
})?;
Response::ok().write_to(socket)?;
Ok(memory_manager)
}
fn vm_receive_state<T>(
&mut self,
req: &Request,
socket: &mut T,
mm: Arc<Mutex<MemoryManager>>,
) -> std::result::Result<(), MigratableError>
where
T: Read + Write,
{
// Read in state data
let mut data: Vec<u8> = Vec::new();
data.resize_with(req.length() as usize, Default::default);
socket
.read_exact(&mut data)
.map_err(MigratableError::MigrateSocket)?;
let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
})?;
let exit_evt = self.exit_evt.try_clone().map_err(|e| { let exit_evt = self.exit_evt.try_clone().map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
})?; })?;
@ -1155,9 +1219,12 @@ impl Vmm {
MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
})?; })?;
self.vm_config = Some(vm_migration_config.vm_config); let timestamp = Instant::now();
let vm = Vm::new_from_migration( let hypervisor_vm = mm.lock().unwrap().vm.clone();
let mut vm = Vm::new_from_memory_manager(
self.vm_config.clone().unwrap(), self.vm_config.clone().unwrap(),
mm,
hypervisor_vm,
exit_evt, exit_evt,
reset_evt, reset_evt,
#[cfg(feature = "guest_debug")] #[cfg(feature = "guest_debug")]
@ -1165,37 +1232,13 @@ impl Vmm {
&self.seccomp_action, &self.seccomp_action,
self.hypervisor.clone(), self.hypervisor.clone(),
activate_evt, activate_evt,
&vm_migration_config.memory_manager_data, true,
existing_memory_files, timestamp,
) )
.map_err(|e| { .map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
})?; })?;
Response::ok().write_to(socket)?;
Ok(vm)
}
fn vm_receive_state<T>(
&mut self,
req: &Request,
socket: &mut T,
mut vm: Vm,
) -> std::result::Result<(), MigratableError>
where
T: Read + Write,
{
// Read in state data
let mut data: Vec<u8> = Vec::new();
data.resize_with(req.length() as usize, Default::default);
socket
.read_exact(&mut data)
.map_err(MigratableError::MigrateSocket)?;
let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
})?;
// Create VM // Create VM
vm.restore(snapshot).map_err(|e| { vm.restore(snapshot).map_err(|e| {
Response::error().write_to(socket).ok(); Response::error().write_to(socket).ok();
@ -1212,7 +1255,7 @@ impl Vmm {
&mut self, &mut self,
req: &Request, req: &Request,
socket: &mut T, socket: &mut T,
vm: &mut Vm, memory_manager: &mut MemoryManager,
) -> std::result::Result<(), MigratableError> ) -> std::result::Result<(), MigratableError>
where where
T: Read + Write, T: Read + Write,
@ -1221,10 +1264,12 @@ impl Vmm {
let table = MemoryRangeTable::read_from(socket, req.length())?; let table = MemoryRangeTable::read_from(socket, req.length())?;
// And then read the memory itself // And then read the memory itself
vm.receive_memory_regions(&table, socket).map_err(|e| { memory_manager
Response::error().write_to(socket).ok(); .receive_memory_regions(&table, socket)
e .map_err(|e| {
})?; Response::error().write_to(socket).ok();
e
})?;
Response::ok().write_to(socket)?; Response::ok().write_to(socket)?;
Ok(()) Ok(())
} }
@ -1258,7 +1303,7 @@ impl Vmm {
})?; })?;
let mut started = false; let mut started = false;
let mut vm: Option<Vm> = None; let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
let mut existing_memory_files = None; let mut existing_memory_files = None;
loop { loop {
let req = Request::read_from(&mut socket)?; let req = Request::read_from(&mut socket)?;
@ -1278,7 +1323,7 @@ impl Vmm {
Response::error().write_to(&mut socket)?; Response::error().write_to(&mut socket)?;
continue; continue;
} }
vm = Some(self.vm_receive_config( memory_manager = Some(self.vm_receive_config(
&req, &req,
&mut socket, &mut socket,
existing_memory_files.take(), existing_memory_files.take(),
@ -1292,8 +1337,8 @@ impl Vmm {
Response::error().write_to(&mut socket)?; Response::error().write_to(&mut socket)?;
continue; continue;
} }
if let Some(vm) = vm.take() { if let Some(mm) = memory_manager.take() {
self.vm_receive_state(&req, &mut socket, vm)?; self.vm_receive_state(&req, &mut socket, mm)?;
} else { } else {
warn!("Configuration not sent yet"); warn!("Configuration not sent yet");
Response::error().write_to(&mut socket)?; Response::error().write_to(&mut socket)?;
@ -1307,8 +1352,8 @@ impl Vmm {
Response::error().write_to(&mut socket)?; Response::error().write_to(&mut socket)?;
continue; continue;
} }
if let Some(ref mut vm) = vm.as_mut() { if let Some(mm) = memory_manager.as_ref() {
self.vm_receive_memory(&req, &mut socket, vm)?; self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
} else { } else {
warn!("Configuration not sent yet"); warn!("Configuration not sent yet");
Response::error().write_to(&mut socket)?; Response::error().write_to(&mut socket)?;

View File

@ -30,7 +30,7 @@ use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::ffi; use std::ffi;
use std::fs::{File, OpenOptions}; use std::fs::{File, OpenOptions};
use std::io; use std::io::{self, Read};
use std::ops::Deref; use std::ops::Deref;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::path::PathBuf; use std::path::PathBuf;
@ -1978,6 +1978,48 @@ impl MemoryManager {
debug!("coredump total bytes {}", total_bytes); debug!("coredump total bytes {}", total_bytes);
Ok(()) Ok(())
} }
pub fn receive_memory_regions<F>(
&mut self,
ranges: &MemoryRangeTable,
fd: &mut F,
) -> std::result::Result<(), MigratableError>
where
F: Read,
{
let guest_memory = self.guest_memory();
let mem = guest_memory.memory();
for range in ranges.regions() {
let mut offset: u64 = 0;
// Here we are manually handling the retry in case we can't the
// whole region at once because we can't use the implementation
// from vm-memory::GuestMemory of read_exact_from() as it is not
// following the correct behavior. For more info about this issue
// see: https://github.com/rust-vmm/vm-memory/issues/174
loop {
let bytes_read = mem
.read_from(
GuestAddress(range.gpa + offset),
fd,
(range.length - offset) as usize,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error receiving memory from socket: {}",
e
))
})?;
offset += bytes_read as u64;
if offset == range.length {
break;
}
}
}
Ok(())
}
} }
struct MemoryNotify { struct MemoryNotify {

View File

@ -72,8 +72,7 @@ use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::fs::{File, OpenOptions}; use std::fs::{File, OpenOptions};
use std::io::{self, Read, Write}; use std::io::{self, Seek, SeekFrom, Write};
use std::io::{Seek, SeekFrom};
#[cfg(feature = "tdx")] #[cfg(feature = "tdx")]
use std::mem; use std::mem;
#[cfg(feature = "guest_debug")] #[cfg(feature = "guest_debug")]
@ -486,7 +485,7 @@ impl Vm {
pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH]; pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH];
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
fn new_from_memory_manager( pub fn new_from_memory_manager(
config: Arc<Mutex<VmConfig>>, config: Arc<Mutex<VmConfig>>,
memory_manager: Arc<Mutex<MemoryManager>>, memory_manager: Arc<Mutex<MemoryManager>>,
vm: Arc<dyn hypervisor::Vm>, vm: Arc<dyn hypervisor::Vm>,
@ -857,63 +856,6 @@ impl Vm {
) )
} }
#[allow(clippy::too_many_arguments)]
pub fn new_from_migration(
config: Arc<Mutex<VmConfig>>,
exit_evt: EventFd,
reset_evt: EventFd,
#[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
memory_manager_data: &MemoryManagerSnapshotData,
existing_memory_files: Option<HashMap<u32, File>>,
) -> Result<Self> {
let timestamp = Instant::now();
hypervisor.check_required_extensions().unwrap();
let vm = hypervisor.create_vm().unwrap();
#[cfg(target_arch = "x86_64")]
{
vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
.unwrap();
vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
vm.enable_split_irq().unwrap();
}
let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
let memory_manager = MemoryManager::new(
vm.clone(),
&config.lock().unwrap().memory.clone(),
None,
phys_bits,
#[cfg(feature = "tdx")]
false,
Some(memory_manager_data),
existing_memory_files,
#[cfg(target_arch = "x86_64")]
None,
)
.map_err(Error::MemoryManager)?;
Vm::new_from_memory_manager(
config,
memory_manager,
vm,
exit_evt,
reset_evt,
#[cfg(feature = "guest_debug")]
vm_debug_evt,
seccomp_action,
hypervisor,
activate_evt,
true,
timestamp,
)
}
fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result<arch::InitramfsConfig> { fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result<arch::InitramfsConfig> {
let mut initramfs = self.initramfs.as_ref().unwrap(); let mut initramfs = self.initramfs.as_ref().unwrap();
let size: usize = initramfs let size: usize = initramfs
@ -2352,48 +2294,6 @@ impl Vm {
self.device_manager.lock().unwrap().balloon_size() self.device_manager.lock().unwrap().balloon_size()
} }
pub fn receive_memory_regions<F>(
&mut self,
ranges: &MemoryRangeTable,
fd: &mut F,
) -> std::result::Result<(), MigratableError>
where
F: Read,
{
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
let mem = guest_memory.memory();
for range in ranges.regions() {
let mut offset: u64 = 0;
// Here we are manually handling the retry in case we can't the
// whole region at once because we can't use the implementation
// from vm-memory::GuestMemory of read_exact_from() as it is not
// following the correct behavior. For more info about this issue
// see: https://github.com/rust-vmm/vm-memory/issues/174
loop {
let bytes_read = mem
.read_from(
GuestAddress(range.gpa + offset),
fd,
(range.length - offset) as usize,
)
.map_err(|e| {
MigratableError::MigrateReceive(anyhow!(
"Error receiving memory from socket: {}",
e
))
})?;
offset += bytes_read as u64;
if offset == range.length {
break;
}
}
}
Ok(())
}
pub fn send_memory_fds( pub fn send_memory_fds(
&mut self, &mut self,
socket: &mut UnixStream, socket: &mut UnixStream,