vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
Rob Bradford 2019-12-19 15:47:36 +00:00
parent bcfe546ea2
commit 260cebb8cf
4 changed files with 259 additions and 199 deletions

View File

@ -416,7 +416,7 @@ pub struct DeviceManager {
impl DeviceManager {
pub fn new(
vm_info: &VmInfo,
allocator: SystemAllocator,
allocator: Arc<Mutex<SystemAllocator>>,
mut mem_slots: u32,
_exit_evt: &EventFd,
reset_evt: &EventFd,
@ -435,7 +435,7 @@ impl DeviceManager {
let mut virt_iommu: Option<(u32, Vec<u32>)> = None;
let address_manager = Arc::new(AddressManager {
allocator: Arc::new(Mutex::new(allocator)),
allocator,
io_bus: Arc::new(io_bus),
mmio_bus: Arc::new(mmio_bus),
#[cfg(feature = "pci_support")]

View File

@ -30,6 +30,7 @@ pub mod api;
pub mod config;
pub mod cpu;
pub mod device_manager;
pub mod memory_manager;
pub mod vm;
#[cfg(feature = "acpi")]

216
vmm/src/memory_manager.rs Normal file
View File

@ -0,0 +1,216 @@
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use arch::RegionType;
use std::fs::{File, OpenOptions};
use std::io;
use std::os::unix::io::FromRawFd;
use std::path::PathBuf;
use std::sync::{Arc, Mutex, RwLock};
use vm_allocator::SystemAllocator;
use vm_memory::guest_memory::FileOffset;
use vm_memory::{
Address, Error as MmapError, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion,
GuestUsize,
};
use kvm_bindings::kvm_userspace_memory_region;
use kvm_ioctls::*;
pub struct MemoryManager {
guest_memory: Arc<RwLock<GuestMemoryMmap>>,
ram_regions: u32,
start_of_device_area: GuestAddress,
end_of_device_area: GuestAddress,
}
#[derive(Debug)]
pub enum Error {
/// Failed to create shared file.
SharedFileCreate(io::Error),
/// Failed to set shared file length.
SharedFileSetLen(io::Error),
/// Mmap backed guest memory error
GuestMemory(MmapError),
/// Failed to allocate a memory range.
MemoryRangeAllocation,
}
pub fn get_host_cpu_phys_bits() -> u8 {
use core::arch::x86_64;
unsafe {
let leaf = x86_64::__cpuid(0x8000_0000);
// Detect and handle AMD SME (Secure Memory Encryption) properly.
// Some physical address bits may become reserved when the feature is enabled.
// See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
let reduced = if leaf.eax >= 0x8000_001f
&& leaf.ebx == 0x6874_7541 // Vendor ID: AuthenticAMD
&& leaf.ecx == 0x444d_4163
&& leaf.edx == 0x6974_6e65
&& x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
{
(x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
} else {
0
};
if leaf.eax >= 0x8000_0008 {
let leaf = x86_64::__cpuid(0x8000_0008);
((leaf.eax & 0xff) - reduced) as u8
} else {
36
}
}
}
impl MemoryManager {
pub fn new(
allocator: Arc<Mutex<SystemAllocator>>,
fd: Arc<VmFd>,
boot_ram: u64,
backing_file: &Option<PathBuf>,
mergeable: bool,
) -> Result<Arc<Mutex<MemoryManager>>, Error> {
// Init guest memory
let arch_mem_regions = arch::arch_memory_regions(boot_ram);
let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let guest_memory = match backing_file {
Some(ref file) => {
let mut mem_regions = Vec::<(GuestAddress, usize, Option<FileOffset>)>::new();
for region in ram_regions.iter() {
if file.is_file() {
let file = OpenOptions::new()
.read(true)
.write(true)
.open(file)
.map_err(Error::SharedFileCreate)?;
file.set_len(region.1 as u64)
.map_err(Error::SharedFileSetLen)?;
mem_regions.push((region.0, region.1, Some(FileOffset::new(file, 0))));
} else if file.is_dir() {
let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
let fs = std::ffi::CString::new(fs_str).unwrap();
let mut path = fs.as_bytes_with_nul().to_owned();
let path_ptr = path.as_mut_ptr() as *mut _;
let fd = unsafe { libc::mkstemp(path_ptr) };
unsafe { libc::unlink(path_ptr) };
let f = unsafe { File::from_raw_fd(fd) };
f.set_len(region.1 as u64)
.map_err(Error::SharedFileSetLen)?;
mem_regions.push((region.0, region.1, Some(FileOffset::new(f, 0))));
}
}
GuestMemoryMmap::with_files(&mem_regions).map_err(Error::GuestMemory)?
}
None => GuestMemoryMmap::new(&ram_regions).map_err(Error::GuestMemory)?,
};
guest_memory
.with_regions(|index, region| {
let mem_region = kvm_userspace_memory_region {
slot: index as u32,
guest_phys_addr: region.start_addr().raw_value(),
memory_size: region.len() as u64,
userspace_addr: region.as_ptr() as u64,
flags: 0,
};
// Safe because the guest regions are guaranteed not to overlap.
unsafe {
fd.set_user_memory_region(mem_region)
.map_err(|e| io::Error::from_raw_os_error(e.errno()))
}?;
// Mark the pages as mergeable if explicitly asked for.
if mergeable {
// Safe because the address and size are valid since the
// mmap succeeded.
let ret = unsafe {
libc::madvise(
region.as_ptr() as *mut libc::c_void,
region.len() as libc::size_t,
libc::MADV_MERGEABLE,
)
};
if ret != 0 {
let err = io::Error::last_os_error();
// Safe to unwrap because the error is constructed with
// last_os_error(), which ensures the output will be Some().
let errno = err.raw_os_error().unwrap();
if errno == libc::EINVAL {
warn!("kernel not configured with CONFIG_KSM");
} else {
warn!("madvise error: {}", err);
}
warn!("failed to mark pages as mergeable");
}
}
Ok(())
})
.map_err(|_: io::Error| Error::GuestMemory(MmapError::NoMemoryRegion))?;
// Allocate RAM and Reserved address ranges.
for region in arch_mem_regions.iter() {
allocator
.lock()
.unwrap()
.allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
.ok_or(Error::MemoryRangeAllocation)?;
}
let end_of_device_area = GuestAddress((1 << get_host_cpu_phys_bits()) - 1);
let mem_end = guest_memory.end_addr();
let start_of_device_area = if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
arch::layout::RAM_64BIT_START
} else {
mem_end.unchecked_add(1)
};
// Convert the guest memory into an Arc. The point being able to use it
// anywhere in the code, no matter which thread might use it.
// Add the RwLock aspect to guest memory as we might want to perform
// additions to the memory during runtime.
let guest_memory = Arc::new(RwLock::new(guest_memory));
Ok(Arc::new(Mutex::new(MemoryManager {
guest_memory,
ram_regions: ram_regions.len() as u32,
start_of_device_area,
end_of_device_area,
})))
}
pub fn guest_memory(&self) -> Arc<RwLock<GuestMemoryMmap>> {
self.guest_memory.clone()
}
pub fn ram_regions(&self) -> u32 {
self.ram_regions
}
pub fn start_of_device_area(&self) -> GuestAddress {
self.start_of_device_area
}
pub fn end_of_device_area(&self) -> GuestAddress {
self.end_of_device_area
}
}

View File

@ -27,8 +27,9 @@ extern crate vm_virtio;
use crate::config::VmConfig;
use crate::cpu;
use crate::device_manager::{get_win_size, Console, DeviceManager, DeviceManagerError};
use crate::memory_manager::{get_host_cpu_phys_bits, Error as MemoryManagerError, MemoryManager};
use anyhow::anyhow;
use arch::RegionType;
use arch::layout;
use devices::{ioapic, HotPlugNotificationType};
use kvm_bindings::{kvm_enable_cap, kvm_userspace_memory_region, KVM_CAP_SPLIT_IRQCHIP};
use kvm_ioctls::*;
@ -37,19 +38,16 @@ use linux_loader::cmdline::Cmdline;
use linux_loader::loader::KernelLoader;
use signal_hook::{iterator::Signals, SIGINT, SIGTERM, SIGWINCH};
use std::ffi::CString;
use std::fs::{File, OpenOptions};
use std::fs::File;
use std::io;
use std::ops::Deref;
use std::os::unix::io::FromRawFd;
use std::sync::{Arc, Mutex, RwLock};
use std::{result, str, thread};
use vm_allocator::{GsiApic, SystemAllocator};
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
use vm_memory::guest_memory::FileOffset;
use vm_memory::{
Address, Bytes, Error as MmapError, GuestAddress, GuestMemory, GuestMemoryMmap,
GuestMemoryRegion, GuestUsize,
Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, GuestUsize,
};
use vmm_sys_util::eventfd::EventFd;
use vmm_sys_util::terminal::Terminal;
@ -78,9 +76,6 @@ pub enum Error {
/// Cannot open the kernel image
KernelFile(io::Error),
/// Mmap backed guest memory error
GuestMemory(MmapError),
/// Cannot load the kernel in memory
KernelLoad(linux_loader::loader::Error),
@ -110,15 +105,6 @@ pub enum Error {
/// Memory is overflow
MemOverflow,
/// Failed to create shared file.
SharedFileCreate(io::Error),
/// Failed to set shared file length.
SharedFileSetLen(io::Error),
/// Failed to allocate a memory range.
MemoryRangeAllocation,
/// Failed to allocate the IOAPIC memory range.
IoapicRangeAllocation,
@ -166,6 +152,9 @@ pub enum Error {
/// Cannot resume VM
Resume(MigratableError),
/// Memory manager error
MemoryManager(MemoryManagerError),
}
pub type Result<T> = result::Result<T, Error>;
@ -221,7 +210,6 @@ impl VmState {
pub struct Vm {
kernel: File,
memory: Arc<RwLock<GuestMemoryMmap>>,
threads: Vec<thread::JoinHandle<()>>,
devices: DeviceManager,
config: Arc<Mutex<VmConfig>>,
@ -229,34 +217,7 @@ pub struct Vm {
signals: Option<Signals>,
state: RwLock<VmState>,
cpu_manager: Arc<Mutex<cpu::CpuManager>>,
}
fn get_host_cpu_phys_bits() -> u8 {
use core::arch::x86_64;
unsafe {
let leaf = x86_64::__cpuid(0x8000_0000);
// Detect and handle AMD SME (Secure Memory Encryption) properly.
// Some physical address bits may become reserved when the feature is enabled.
// See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
let reduced = if leaf.eax >= 0x8000_001f
&& leaf.ebx == 0x6874_7541 // Vendor ID: AuthenticAMD
&& leaf.ecx == 0x444d_4163
&& leaf.edx == 0x6974_6e65
&& x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
{
(x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
} else {
0
};
if leaf.eax >= 0x8000_0008 {
let leaf = x86_64::__cpuid(0x8000_0008);
((leaf.eax & 0xff) - reduced) as u8
} else {
36
}
}
memory_manager: Arc<Mutex<MemoryManager>>,
}
impl Vm {
@ -285,111 +246,6 @@ impl Vm {
let fd = kvm.create_vm().map_err(Error::VmCreate)?;
let fd = Arc::new(fd);
// Init guest memory
let arch_mem_regions = arch::arch_memory_regions(config.lock().unwrap().memory.size);
let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let sub_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::SubRegion)
.map(|r| (r.0, r.1))
.collect();
// Check the number of reserved regions, and only take the first one
// that's acrtually a 32-bit hole.
let mut mem_hole = (GuestAddress(0), 0);
for region in sub_regions.iter() {
if region.0.unchecked_add(region.1 as u64).raw_value() <= 0x1_0000_0000 {
mem_hole = (region.0, region.1);
break;
}
}
let guest_memory = match config.lock().unwrap().memory.file {
Some(ref file) => {
let mut mem_regions = Vec::<(GuestAddress, usize, Option<FileOffset>)>::new();
for region in ram_regions.iter() {
if file.is_file() {
let file = OpenOptions::new()
.read(true)
.write(true)
.open(file)
.map_err(Error::SharedFileCreate)?;
file.set_len(region.1 as u64)
.map_err(Error::SharedFileSetLen)?;
mem_regions.push((region.0, region.1, Some(FileOffset::new(file, 0))));
} else if file.is_dir() {
let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
let fs = std::ffi::CString::new(fs_str).unwrap();
let mut path = fs.as_bytes_with_nul().to_owned();
let path_ptr = path.as_mut_ptr() as *mut _;
let fd = unsafe { libc::mkstemp(path_ptr) };
unsafe { libc::unlink(path_ptr) };
let f = unsafe { File::from_raw_fd(fd) };
f.set_len(region.1 as u64)
.map_err(Error::SharedFileSetLen)?;
mem_regions.push((region.0, region.1, Some(FileOffset::new(f, 0))));
}
}
GuestMemoryMmap::with_files(&mem_regions).map_err(Error::GuestMemory)?
}
None => GuestMemoryMmap::new(&ram_regions).map_err(Error::GuestMemory)?,
};
guest_memory
.with_regions(|index, region| {
let mem_region = kvm_userspace_memory_region {
slot: index as u32,
guest_phys_addr: region.start_addr().raw_value(),
memory_size: region.len() as u64,
userspace_addr: region.as_ptr() as u64,
flags: 0,
};
// Safe because the guest regions are guaranteed not to overlap.
unsafe {
fd.set_user_memory_region(mem_region)
.map_err(|e| io::Error::from_raw_os_error(e.errno()))
}?;
// Mark the pages as mergeable if explicitly asked for.
if config.lock().unwrap().memory.mergeable {
// Safe because the address and size are valid since the
// mmap succeeded.
let ret = unsafe {
libc::madvise(
region.as_ptr() as *mut libc::c_void,
region.len() as libc::size_t,
libc::MADV_MERGEABLE,
)
};
if ret != 0 {
let err = io::Error::last_os_error();
// Safe to unwrap because the error is constructed with
// last_os_error(), which ensures the output will be Some().
let errno = err.raw_os_error().unwrap();
if errno == libc::EINVAL {
warn!("kernel not configured with CONFIG_KSM");
} else {
warn!("madvise error: {}", err);
}
warn!("failed to mark pages as mergeable");
}
}
Ok(())
})
.map_err(|_: io::Error| Error::GuestMemory(MmapError::NoMemoryRegion))?;
// Set TSS
fd.set_tss_address(arch::x86_64::layout::KVM_TSS_ADDRESS.raw_value() as usize)
.map_err(Error::VmSetup)?;
@ -438,38 +294,34 @@ impl Vm {
);
// Let's allocate 64 GiB of addressable MMIO space, starting at 0.
let mut allocator = SystemAllocator::new(
GuestAddress(0),
1 << 16 as GuestUsize,
GuestAddress(0),
1 << get_host_cpu_phys_bits(),
mem_hole.0,
mem_hole.1 as GuestUsize,
vec![ioapic],
let allocator = Arc::new(Mutex::new(
SystemAllocator::new(
GuestAddress(0),
1 << 16 as GuestUsize,
GuestAddress(0),
1 << get_host_cpu_phys_bits(),
layout::MEM_32BIT_RESERVED_START,
layout::MEM_32BIT_DEVICES_SIZE,
vec![ioapic],
)
.ok_or(Error::CreateSystemAllocator)?,
));
let memory_config = config.lock().unwrap().memory.clone();
let memory_manager = MemoryManager::new(
allocator.clone(),
fd.clone(),
memory_config.size,
&memory_config.file,
memory_config.mergeable,
)
.ok_or(Error::CreateSystemAllocator)?;
// Allocate RAM and Reserved address ranges.
for region in arch_mem_regions.iter() {
allocator
.allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
.ok_or(Error::MemoryRangeAllocation)?;
}
let end_of_device_area = GuestAddress((1 << get_host_cpu_phys_bits()) - 1);
let mem_end = guest_memory.end_addr();
let start_of_device_area = if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
arch::layout::RAM_64BIT_START
} else {
mem_end.unchecked_add(1)
};
// Convert the guest memory into an Arc. The point being able to use it
// anywhere in the code, no matter which thread might use it.
// Add the RwLock aspect to guest memory as we might want to perform
// additions to the memory during runtime.
let guest_memory = Arc::new(RwLock::new(guest_memory));
.map_err(Error::MemoryManager)?;
let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area();
let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area();
let ram_regions = memory_manager.lock().unwrap().ram_regions();
let guest_memory = memory_manager.lock().unwrap().guest_memory();
let vm_info = VmInfo {
memory: &guest_memory,
vm_fd: &fd,
@ -478,14 +330,9 @@ impl Vm {
end_of_device_area,
};
let device_manager = DeviceManager::new(
&vm_info,
allocator,
ram_regions.len() as u32,
&exit_evt,
&reset_evt,
)
.map_err(Error::DeviceManager)?;
let device_manager =
DeviceManager::new(&vm_info, allocator, ram_regions, &exit_evt, &reset_evt)
.map_err(Error::DeviceManager)?;
let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0;
@ -504,7 +351,6 @@ impl Vm {
Ok(Vm {
kernel,
memory: guest_memory,
devices: device_manager,
config,
on_tty,
@ -512,6 +358,7 @@ impl Vm {
signals: None,
state: RwLock::new(VmState::Created),
cpu_manager,
memory_manager,
})
}
@ -525,7 +372,8 @@ impl Vm {
}
let cmdline_cstring = CString::new(cmdline).map_err(|_| Error::CmdLine)?;
let mem = self.memory.read().unwrap();
let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
let mem = guest_memory.read().unwrap();
let entry_addr = match linux_loader::loader::Elf::load(
mem.deref(),
None,
@ -720,11 +568,6 @@ impl Vm {
Ok(())
}
/// Gets an Arc to the guest memory owned by this VM.
pub fn get_memory(&self) -> Arc<RwLock<GuestMemoryMmap>> {
self.memory.clone()
}
pub fn handle_stdin(&self) -> Result<()> {
let mut out = [0u8; 64];
let count = io::stdin()