From c52ccf3992be7d9d7dbea2aad9f774551de88334 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 18 Oct 2022 14:35:38 +0200 Subject: [PATCH] vmm: migration: Create destination VM right before to restore it This is preliminary work to ensure a migrated VM is created right before it is restored. This will be useful when moving to a design where the VM is both created and restored simultaneously from the Snapshot. In details, that means the MemoryManager is the object that must be created upon receiving the config from the source VM, so that memory content can be later received and filled into the GuestMemory. Only after these steps happened, the snapshot is received from the source VM, and the actual Vm object can be created from both the snapshot and the MemoryManager previously created. Signed-off-by: Sebastien Boeuf --- vmm/src/lib.rs | 125 ++++++++++++++++++++++++++------------ vmm/src/memory_manager.rs | 44 +++++++++++++- vmm/src/vm.rs | 104 +------------------------------ 3 files changed, 130 insertions(+), 143 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 76c62fc48..e65da51c5 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -20,12 +20,15 @@ use crate::config::{ }; #[cfg(feature = "guest_debug")] use crate::coredump::GuestDebuggable; +use crate::memory_manager::MemoryManager; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] use crate::migration::get_vm_snapshot; use crate::migration::{recv_vm_config, recv_vm_state}; use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::vm::{Error as VmError, Vm, VmState}; use anyhow::anyhow; +#[cfg(target_arch = "x86_64")] +use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START}; use libc::{EFD_NONBLOCK, SIGINT, SIGTERM}; use memory_manager::MemoryManagerSnapshotData; use pci::PciBdf; @@ -44,9 +47,11 @@ use std::panic::AssertUnwindSafe; use std::path::PathBuf; use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; use std::sync::{Arc, Mutex}; +use std::time::Instant; use std::{result, thread}; use thiserror::Error; use tracer::trace_scoped; +use vm::physical_bits; use vm_memory::bitmap::AtomicBitmap; use vm_migration::{protocol::*, Migratable}; use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -1119,7 +1124,7 @@ impl Vmm { req: &Request, socket: &mut T, existing_memory_files: Option>, - ) -> std::result::Result + ) -> std::result::Result>, MigratableError> where T: Read + Write, { @@ -1141,6 +1146,65 @@ impl Vmm { &vm_migration_config.common_cpuid, )?; + let config = vm_migration_config.vm_config.clone(); + self.vm_config = Some(vm_migration_config.vm_config); + + self.hypervisor.check_required_extensions().unwrap(); + let vm = self.hypervisor.create_vm().unwrap(); + + #[cfg(target_arch = "x86_64")] + { + vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0) + .unwrap(); + vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap(); + vm.enable_split_irq().unwrap(); + } + + let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits); + + let memory_manager = MemoryManager::new( + vm.clone(), + &config.lock().unwrap().memory.clone(), + None, + phys_bits, + #[cfg(feature = "tdx")] + false, + Some(&vm_migration_config.memory_manager_data), + existing_memory_files, + #[cfg(target_arch = "x86_64")] + None, + ) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!( + "Error creating MemoryManager from snapshot: {:?}", + e + )) + })?; + + Response::ok().write_to(socket)?; + + Ok(memory_manager) + } + + fn vm_receive_state( + &mut self, + req: &Request, + socket: &mut T, + mm: Arc>, + ) -> std::result::Result<(), MigratableError> + where + T: Read + Write, + { + // Read in state data + let mut data: Vec = Vec::new(); + data.resize_with(req.length() as usize, Default::default); + socket + .read_exact(&mut data) + .map_err(MigratableError::MigrateSocket)?; + let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) + })?; + let exit_evt = self.exit_evt.try_clone().map_err(|e| { MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) })?; @@ -1155,9 +1219,12 @@ impl Vmm { MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) })?; - self.vm_config = Some(vm_migration_config.vm_config); - let vm = Vm::new_from_migration( + let timestamp = Instant::now(); + let hypervisor_vm = mm.lock().unwrap().vm.clone(); + let mut vm = Vm::new_from_memory_manager( self.vm_config.clone().unwrap(), + mm, + hypervisor_vm, exit_evt, reset_evt, #[cfg(feature = "guest_debug")] @@ -1165,37 +1232,13 @@ impl Vmm { &self.seccomp_action, self.hypervisor.clone(), activate_evt, - &vm_migration_config.memory_manager_data, - existing_memory_files, + true, + timestamp, ) .map_err(|e| { MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) })?; - Response::ok().write_to(socket)?; - - Ok(vm) - } - - fn vm_receive_state( - &mut self, - req: &Request, - socket: &mut T, - mut vm: Vm, - ) -> std::result::Result<(), MigratableError> - where - T: Read + Write, - { - // Read in state data - let mut data: Vec = Vec::new(); - data.resize_with(req.length() as usize, Default::default); - socket - .read_exact(&mut data) - .map_err(MigratableError::MigrateSocket)?; - let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) - })?; - // Create VM vm.restore(snapshot).map_err(|e| { Response::error().write_to(socket).ok(); @@ -1212,7 +1255,7 @@ impl Vmm { &mut self, req: &Request, socket: &mut T, - vm: &mut Vm, + memory_manager: &mut MemoryManager, ) -> std::result::Result<(), MigratableError> where T: Read + Write, @@ -1221,10 +1264,12 @@ impl Vmm { let table = MemoryRangeTable::read_from(socket, req.length())?; // And then read the memory itself - vm.receive_memory_regions(&table, socket).map_err(|e| { - Response::error().write_to(socket).ok(); - e - })?; + memory_manager + .receive_memory_regions(&table, socket) + .map_err(|e| { + Response::error().write_to(socket).ok(); + e + })?; Response::ok().write_to(socket)?; Ok(()) } @@ -1258,7 +1303,7 @@ impl Vmm { })?; let mut started = false; - let mut vm: Option = None; + let mut memory_manager: Option>> = None; let mut existing_memory_files = None; loop { let req = Request::read_from(&mut socket)?; @@ -1278,7 +1323,7 @@ impl Vmm { Response::error().write_to(&mut socket)?; continue; } - vm = Some(self.vm_receive_config( + memory_manager = Some(self.vm_receive_config( &req, &mut socket, existing_memory_files.take(), @@ -1292,8 +1337,8 @@ impl Vmm { Response::error().write_to(&mut socket)?; continue; } - if let Some(vm) = vm.take() { - self.vm_receive_state(&req, &mut socket, vm)?; + if let Some(mm) = memory_manager.take() { + self.vm_receive_state(&req, &mut socket, mm)?; } else { warn!("Configuration not sent yet"); Response::error().write_to(&mut socket)?; @@ -1307,8 +1352,8 @@ impl Vmm { Response::error().write_to(&mut socket)?; continue; } - if let Some(ref mut vm) = vm.as_mut() { - self.vm_receive_memory(&req, &mut socket, vm)?; + if let Some(mm) = memory_manager.as_ref() { + self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; } else { warn!("Configuration not sent yet"); Response::error().write_to(&mut socket)?; diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index aa94550bc..c1a794329 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -30,7 +30,7 @@ use std::collections::HashMap; use std::convert::TryInto; use std::ffi; use std::fs::{File, OpenOptions}; -use std::io; +use std::io::{self, Read}; use std::ops::Deref; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::PathBuf; @@ -1978,6 +1978,48 @@ impl MemoryManager { debug!("coredump total bytes {}", total_bytes); Ok(()) } + + pub fn receive_memory_regions( + &mut self, + ranges: &MemoryRangeTable, + fd: &mut F, + ) -> std::result::Result<(), MigratableError> + where + F: Read, + { + let guest_memory = self.guest_memory(); + let mem = guest_memory.memory(); + + for range in ranges.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't the + // whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of read_exact_from() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_read = mem + .read_from( + GuestAddress(range.gpa + offset), + fd, + (range.length - offset) as usize, + ) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!( + "Error receiving memory from socket: {}", + e + )) + })?; + offset += bytes_read as u64; + + if offset == range.length { + break; + } + } + } + + Ok(()) + } } struct MemoryNotify { diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index de5c6c240..218f55f40 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -72,8 +72,7 @@ use std::collections::BTreeMap; use std::collections::HashMap; use std::convert::TryInto; use std::fs::{File, OpenOptions}; -use std::io::{self, Read, Write}; -use std::io::{Seek, SeekFrom}; +use std::io::{self, Seek, SeekFrom, Write}; #[cfg(feature = "tdx")] use std::mem; #[cfg(feature = "guest_debug")] @@ -486,7 +485,7 @@ impl Vm { pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH]; #[allow(clippy::too_many_arguments)] - fn new_from_memory_manager( + pub fn new_from_memory_manager( config: Arc>, memory_manager: Arc>, vm: Arc, @@ -857,63 +856,6 @@ impl Vm { ) } - #[allow(clippy::too_many_arguments)] - pub fn new_from_migration( - config: Arc>, - exit_evt: EventFd, - reset_evt: EventFd, - #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, - seccomp_action: &SeccompAction, - hypervisor: Arc, - activate_evt: EventFd, - memory_manager_data: &MemoryManagerSnapshotData, - existing_memory_files: Option>, - ) -> Result { - let timestamp = Instant::now(); - - hypervisor.check_required_extensions().unwrap(); - let vm = hypervisor.create_vm().unwrap(); - - #[cfg(target_arch = "x86_64")] - { - vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0) - .unwrap(); - vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap(); - vm.enable_split_irq().unwrap(); - } - - let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits); - - let memory_manager = MemoryManager::new( - vm.clone(), - &config.lock().unwrap().memory.clone(), - None, - phys_bits, - #[cfg(feature = "tdx")] - false, - Some(memory_manager_data), - existing_memory_files, - #[cfg(target_arch = "x86_64")] - None, - ) - .map_err(Error::MemoryManager)?; - - Vm::new_from_memory_manager( - config, - memory_manager, - vm, - exit_evt, - reset_evt, - #[cfg(feature = "guest_debug")] - vm_debug_evt, - seccomp_action, - hypervisor, - activate_evt, - true, - timestamp, - ) - } - fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result { let mut initramfs = self.initramfs.as_ref().unwrap(); let size: usize = initramfs @@ -2352,48 +2294,6 @@ impl Vm { self.device_manager.lock().unwrap().balloon_size() } - pub fn receive_memory_regions( - &mut self, - ranges: &MemoryRangeTable, - fd: &mut F, - ) -> std::result::Result<(), MigratableError> - where - F: Read, - { - let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory(); - let mem = guest_memory.memory(); - - for range in ranges.regions() { - let mut offset: u64 = 0; - // Here we are manually handling the retry in case we can't the - // whole region at once because we can't use the implementation - // from vm-memory::GuestMemory of read_exact_from() as it is not - // following the correct behavior. For more info about this issue - // see: https://github.com/rust-vmm/vm-memory/issues/174 - loop { - let bytes_read = mem - .read_from( - GuestAddress(range.gpa + offset), - fd, - (range.length - offset) as usize, - ) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error receiving memory from socket: {}", - e - )) - })?; - offset += bytes_read as u64; - - if offset == range.length { - break; - } - } - } - - Ok(()) - } - pub fn send_memory_fds( &mut self, socket: &mut UnixStream,