From 59031531b6bbeb3d1c4e641a2620ea83161ed96e Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 27 Sep 2021 15:31:28 +0200 Subject: [PATCH] vmm: Simplify the way memory is snapshot and restored By using a single file for storing the memory ranges, we simplify the way snapshot/restore works by avoiding multiples files, but the main and more important point is that we have now a way to save only the ranges that matter. In particular, the ranges related to virtio-mem regions are not always fully hotplugged, meaning we don't want to save the entire region. That's where the usage of memory ranges is interesting as it lets us optimize the snapshot/restore process when one or multiple virtio-mem regions are involved. Signed-off-by: Sebastien Boeuf --- vmm/src/memory_manager.rs | 208 ++++++++++++++++++++------------------ vmm/src/vm.rs | 5 +- 2 files changed, 111 insertions(+), 102 deletions(-) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index cafdac26c..54ecf3d94 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -50,6 +50,8 @@ pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18; const DEFAULT_MEMORY_ZONE: &str = "mem0"; +const SNAPSHOT_FILENAME: &str = "memory-ranges"; + #[cfg(target_arch = "x86_64")] const X86_64_IRQ_BASE: u32 = 5; @@ -145,7 +147,7 @@ pub struct MemoryManager { #[cfg(target_arch = "x86_64")] sgx_epc_region: Option, user_provided_zones: bool, - snapshot_memory_regions: Vec, + snapshot_memory_ranges: MemoryRangeTable, memory_zones: MemoryZones, log_dirty: bool, // Enable dirty logging for created RAM regions @@ -496,23 +498,42 @@ impl MemoryManager { Ok((mem_regions, memory_zones)) } - fn fill_saved_regions(&mut self, saved_regions: Vec) -> Result<(), Error> { - for region in saved_regions { - if let Some(content) = region.content { - // Open (read only) the snapshot file for the given region. - let mut memory_region_file = OpenOptions::new() - .read(true) - .open(content) - .map_err(Error::SnapshotOpen)?; + fn fill_saved_regions( + &mut self, + file_path: PathBuf, + saved_regions: MemoryRangeTable, + ) -> Result<(), Error> { + if saved_regions.is_empty() { + return Ok(()); + } - self.guest_memory - .memory() - .read_exact_from( - GuestAddress(region.start_addr), - &mut memory_region_file, - region.size as usize, + // Open (read only) the snapshot file. + let mut memory_file = OpenOptions::new() + .read(true) + .open(file_path) + .map_err(Error::SnapshotOpen)?; + + let guest_memory = self.guest_memory.memory(); + for range in saved_regions.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't write + // the whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of read_exact_from() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_read = guest_memory + .read_from( + GuestAddress(range.gpa + offset), + &mut memory_file, + (range.length - offset) as usize, ) .map_err(Error::SnapshotCopy)?; + offset += bytes_read as u64; + + if offset == range.length { + break; + } } } @@ -792,7 +813,7 @@ impl MemoryManager { #[cfg(target_arch = "x86_64")] sgx_epc_region: None, user_provided_zones, - snapshot_memory_regions: Vec::new(), + snapshot_memory_ranges: MemoryRangeTable::default(), memory_zones, guest_ram_mappings: Vec::new(), #[cfg(feature = "acpi")] @@ -869,30 +890,16 @@ impl MemoryManager { )?; if let Some(source_url) = source_url { - let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?; + let mut memory_file_path = url_to_path(source_url).map_err(Error::Restore)?; + memory_file_path.push(String::from(SNAPSHOT_FILENAME)); let mem_snapshot: MemoryManagerSnapshotData = snapshot .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID) .map_err(Error::Restore)?; - // Here we turn the content file name into a content file path as - // this will be needed to copy the content of the saved memory - // region into the newly created memory region. - // We simply ignore the content files that are None, as they - // represent regions that have been directly saved by the user, with - // no need for saving into a dedicated external file. For these - // files, the VmConfig already contains the information on where to - // find them. - let mut saved_regions = mem_snapshot.memory_regions; - for region in saved_regions.iter_mut() { - if let Some(content) = &mut region.content { - let mut memory_region_path = vm_snapshot_path.clone(); - memory_region_path.push(content.clone()); - *content = memory_region_path.to_str().unwrap().to_owned(); - } - } - - mm.lock().unwrap().fill_saved_regions(saved_regions)?; + mm.lock() + .unwrap() + .fill_saved_regions(memory_file_path, mem_snapshot.memory_ranges)?; } Ok(mm) @@ -1509,19 +1516,42 @@ impl MemoryManager { &self.memory_zones } - pub fn memory_range_table(&self) -> std::result::Result { + pub fn memory_range_table( + &self, + snapshot: bool, + ) -> std::result::Result { let mut table = MemoryRangeTable::default(); for memory_zone in self.memory_zones.values() { + if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { + table.extend(virtio_mem_zone.plugged_ranges()); + } + for region in memory_zone.regions() { + if snapshot { + if let Some(file_offset) = region.file_offset() { + if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED) + && Self::is_hardlink(file_offset.file()) + { + // In this very specific case, we know the memory + // region is backed by a file on the host filesystem + // that can be accessed by the user, and additionally + // the mapping is shared, which means that modifications + // to the content are written to the actual file. + // When meeting these conditions, we can skip the + // copy of the memory content for this specific region, + // as we can assume the user will have it saved through + // the backing file already. + continue; + } + } + } + table.push(MemoryRange { gpa: region.start_addr().raw_value(), length: region.len() as u64, }); } - if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { - table.extend(virtio_mem_zone.plugged_ranges()); - } } Ok(table) @@ -1906,16 +1936,9 @@ impl Aml for MemoryManager { impl Pausable for MemoryManager {} -#[derive(Clone, Versionize)] -pub struct MemoryRegion { - content: Option, - start_addr: u64, - size: u64, -} - #[derive(Versionize)] pub struct MemoryManagerSnapshotData { - memory_regions: Vec, + memory_ranges: MemoryRangeTable, } impl VersionMapped for MemoryManagerSnapshotData {} @@ -1927,53 +1950,22 @@ impl Snapshottable for MemoryManager { fn snapshot(&mut self) -> result::Result { let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID); - let guest_memory = self.guest_memory.memory(); - let mut memory_regions: Vec = Vec::new(); + let memory_ranges = self.memory_range_table(true)?; - for (index, region) in guest_memory.iter().enumerate() { - if region.len() == 0 { - return Err(MigratableError::Snapshot(anyhow!("Zero length region"))); - } - - let mut content = Some(PathBuf::from(format!("memory-region-{}", index))); - if let Some(file_offset) = region.file_offset() { - if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED) - && Self::is_hardlink(file_offset.file()) - { - // In this very specific case, we know the memory region - // is backed by a file on the host filesystem that can be - // accessed by the user, and additionally the mapping is - // shared, which means that modifications to the content - // are written to the actual file. - // When meeting these conditions, we can skip the copy of - // the memory content for this specific region, as we can - // assume the user will have it saved through the backing - // file already. - content = None; - } - } - - memory_regions.push(MemoryRegion { - content: content.map(|p| p.to_str().unwrap().to_owned()), - start_addr: region.start_addr().0, - size: region.len(), - }); - } - - // Store locally this list of regions as it will be used through the + // Store locally this list of ranges as it will be used through the // Transportable::send() implementation. The point is to avoid the // duplication of code regarding the creation of the path for each // region. The 'snapshot' step creates the list of memory regions, // including information about the need to copy a memory region or // not. This saves the 'send' step having to go through the same // process, and instead it can directly proceed with storing the - // memory region content for the regions requiring it. - self.snapshot_memory_regions = memory_regions.clone(); + // memory range content for the ranges requiring it. + self.snapshot_memory_ranges = memory_ranges.clone(); memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state( MEMORY_MANAGER_SNAPSHOT_ID, - &MemoryManagerSnapshotData { memory_regions }, + &MemoryManagerSnapshotData { memory_ranges }, )?); Ok(memory_manager_snapshot) @@ -1986,29 +1978,43 @@ impl Transportable for MemoryManager { _snapshot: &Snapshot, destination_url: &str, ) -> result::Result<(), MigratableError> { - let vm_memory_snapshot_path = url_to_path(destination_url)?; + if self.snapshot_memory_ranges.is_empty() { + return Ok(()); + } + + let mut memory_file_path = url_to_path(destination_url)?; + memory_file_path.push(String::from(SNAPSHOT_FILENAME)); + + // Create the snapshot file for the entire memory + let mut memory_file = OpenOptions::new() + .read(true) + .write(true) + .create_new(true) + .open(memory_file_path) + .map_err(|e| MigratableError::MigrateSend(e.into()))?; let guest_memory = self.guest_memory.memory(); - for region in self.snapshot_memory_regions.iter() { - if let Some(content) = ®ion.content { - let mut memory_region_path = vm_memory_snapshot_path.clone(); - memory_region_path.push(content); - // Create the snapshot file for the region - let mut memory_region_file = OpenOptions::new() - .read(true) - .write(true) - .create_new(true) - .open(memory_region_path) - .map_err(|e| MigratableError::MigrateSend(e.into()))?; - - guest_memory - .write_all_to( - GuestAddress(region.start_addr), - &mut memory_region_file, - region.size as usize, + for range in self.snapshot_memory_ranges.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't read + // the whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of write_all_to() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_written = guest_memory + .write_to( + GuestAddress(range.gpa + offset), + &mut memory_file, + (range.length - offset) as usize, ) .map_err(|e| MigratableError::MigrateSend(e.into()))?; + offset += bytes_written as u64; + + if offset == range.length { + break; + } } } Ok(()) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 360cde873..9036c53ea 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -2188,7 +2188,10 @@ impl Vm { } pub fn memory_range_table(&self) -> std::result::Result { - self.memory_manager.lock().unwrap().memory_range_table() + self.memory_manager + .lock() + .unwrap() + .memory_range_table(false) } pub fn device_tree(&self) -> Arc> {