vm-allocator: Introduce an MMIO hole address allocator

With this new AddressAllocator as part of the SystemAllocator, the
VMM can now decide with finer granularity where to place memory.

By allocating the RAM and the hole into the MMIO address space, we
ensure that no memory will be allocated by accident where the RAM or
where the hole is.
And by creating the new MMIO hole address space, we create a subset
of the entire MMIO address space where we can place 32 bits BARs for
example.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2019-07-17 09:54:11 -07:00
parent a761b820c7
commit f98a69f42e
5 changed files with 133 additions and 16 deletions

View File

@ -6,8 +6,8 @@ pub mod layout;
use memory_model::{GuestAddress, GuestMemory}; use memory_model::{GuestAddress, GuestMemory};
/// Stub function that needs to be implemented when aarch64 functionality is added. /// Stub function that needs to be implemented when aarch64 functionality is added.
pub fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize)> { pub fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize, RegionType)> {
vec![(GuestAddress(0), size)] vec![(GuestAddress(0), size, RegionType::Ram)]
} }
/// Stub function that needs to be implemented when aarch64 functionality is added. /// Stub function that needs to be implemented when aarch64 functionality is added.

View File

@ -33,6 +33,15 @@ pub enum Error {
} }
pub type Result<T> = result::Result<T, Error>; pub type Result<T> = result::Result<T, Error>;
#[derive(PartialEq)]
pub enum RegionType {
/// RAM type
Ram,
/// Reserved type. Designate a region which should not be considered as
/// RAM. Useful to specify a PCI hole for instance.
Reserved,
}
// 1MB. We don't put anything above here except the kernel itself. // 1MB. We don't put anything above here except the kernel itself.
pub const HIMEM_START: GuestAddress = GuestAddress(0x100000); pub const HIMEM_START: GuestAddress = GuestAddress(0x100000);

View File

@ -11,9 +11,9 @@ pub mod layout;
mod mptable; mod mptable;
pub mod regs; pub mod regs;
use std::mem; use crate::RegionType;
use linux_loader::loader::bootparam::{boot_params, setup_header, E820_RAM}; use linux_loader::loader::bootparam::{boot_params, setup_header, E820_RAM};
use std::mem;
use vm_memory::{ use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize, Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize,
}; };
@ -52,7 +52,7 @@ const MEM_32BIT_GAP_SIZE: GuestUsize = (768 << 20);
/// These should be used to configure the GuestMemory structure for the platform. /// These should be used to configure the GuestMemory structure for the platform.
/// For x86_64 all addresses are valid from the start of the kernel except a /// For x86_64 all addresses are valid from the start of the kernel except a
/// carve out at the end of 32bit address space. /// carve out at the end of 32bit address space.
pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize)> { pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize, RegionType)> {
let memory_gap_start = FIRST_ADDR_PAST_32BITS let memory_gap_start = FIRST_ADDR_PAST_32BITS
.checked_sub(MEM_32BIT_GAP_SIZE as u64) .checked_sub(MEM_32BIT_GAP_SIZE as u64)
.expect("32-bit hole is too large"); .expect("32-bit hole is too large");
@ -61,17 +61,29 @@ pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize)> {
// case1: guest memory fits before the gap // case1: guest memory fits before the gap
if size as u64 <= memory_gap_start.raw_value() { if size as u64 <= memory_gap_start.raw_value() {
regions.push((GuestAddress(0), size as usize)); regions.push((GuestAddress(0), size as usize, RegionType::Ram));
// case2: guest memory extends beyond the gap // case2: guest memory extends beyond the gap
} else { } else {
// push memory before the gap // push memory before the gap
regions.push((GuestAddress(0), memory_gap_start.raw_value() as usize)); regions.push((
GuestAddress(0),
memory_gap_start.raw_value() as usize,
RegionType::Ram,
));
regions.push(( regions.push((
FIRST_ADDR_PAST_32BITS, FIRST_ADDR_PAST_32BITS,
requested_memory_size.unchecked_offset_from(memory_gap_start) as usize, requested_memory_size.unchecked_offset_from(memory_gap_start) as usize,
RegionType::Ram,
)); ));
} }
// Add the 32 bits hole as a "reserved" region.
regions.push((
memory_gap_start,
MEM_32BIT_GAP_SIZE as usize,
RegionType::Reserved,
));
regions regions
} }
@ -190,7 +202,7 @@ mod tests {
#[test] #[test]
fn regions_lt_4gb() { fn regions_lt_4gb() {
let regions = arch_memory_regions(1 << 29 as GuestUsize); let regions = arch_memory_regions(1 << 29 as GuestUsize);
assert_eq!(1, regions.len()); assert_eq!(2, regions.len());
assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(GuestAddress(0), regions[0].0);
assert_eq!(1usize << 29, regions[0].1); assert_eq!(1usize << 29, regions[0].1);
} }
@ -198,7 +210,7 @@ mod tests {
#[test] #[test]
fn regions_gt_4gb() { fn regions_gt_4gb() {
let regions = arch_memory_regions((1 << 32 as GuestUsize) + 0x8000); let regions = arch_memory_regions((1 << 32 as GuestUsize) + 0x8000);
assert_eq!(2, regions.len()); assert_eq!(3, regions.len());
assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(GuestAddress(0), regions[0].0);
assert_eq!(GuestAddress(1 << 32), regions[1].0); assert_eq!(GuestAddress(1 << 32), regions[1].0);
} }
@ -229,19 +241,34 @@ mod tests {
// Now assigning some memory that falls before the 32bit memory hole. // Now assigning some memory that falls before the 32bit memory hole.
let mem_size = 128 << 20; let mem_size = 128 << 20;
let arch_mem_regions = arch_memory_regions(mem_size); let arch_mem_regions = arch_memory_regions(mem_size);
let gm = GuestMemoryMmap::new(&arch_mem_regions).unwrap(); let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let gm = GuestMemoryMmap::new(&ram_regions).unwrap();
configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap(); configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap();
// Now assigning some memory that is equal to the start of the 32bit memory hole. // Now assigning some memory that is equal to the start of the 32bit memory hole.
let mem_size = 3328 << 20; let mem_size = 3328 << 20;
let arch_mem_regions = arch_memory_regions(mem_size); let arch_mem_regions = arch_memory_regions(mem_size);
let gm = GuestMemoryMmap::new(&arch_mem_regions).unwrap(); let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let gm = GuestMemoryMmap::new(&ram_regions).unwrap();
configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap(); configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap();
// Now assigning some memory that falls after the 32bit memory hole. // Now assigning some memory that falls after the 32bit memory hole.
let mem_size = 3330 << 20; let mem_size = 3330 << 20;
let arch_mem_regions = arch_memory_regions(mem_size); let arch_mem_regions = arch_memory_regions(mem_size);
let gm = GuestMemoryMmap::new(&arch_mem_regions).unwrap(); let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let gm = GuestMemoryMmap::new(&ram_regions).unwrap();
configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap(); configure_system(&gm, GuestAddress(0), 0, no_vcpus, None).unwrap();
} }

View File

@ -31,6 +31,7 @@ fn pagesize() -> usize {
/// let mut allocator = SystemAllocator::new( /// let mut allocator = SystemAllocator::new(
/// GuestAddress(0x1000), 0x10000, /// GuestAddress(0x1000), 0x10000,
/// GuestAddress(0x10000000), 0x10000000, /// GuestAddress(0x10000000), 0x10000000,
/// GuestAddress(0x20000000), 0x100000,
/// vec![GsiApic::new(5, 19)]).unwrap(); /// vec![GsiApic::new(5, 19)]).unwrap();
/// assert_eq!(allocator.allocate_irq(), Some(5)); /// assert_eq!(allocator.allocate_irq(), Some(5));
/// assert_eq!(allocator.allocate_irq(), Some(6)); /// assert_eq!(allocator.allocate_irq(), Some(6));
@ -40,6 +41,7 @@ fn pagesize() -> usize {
pub struct SystemAllocator { pub struct SystemAllocator {
io_address_space: AddressAllocator, io_address_space: AddressAllocator,
mmio_address_space: AddressAllocator, mmio_address_space: AddressAllocator,
mmio_hole_address_space: AddressAllocator,
gsi_allocator: GsiAllocator, gsi_allocator: GsiAllocator,
} }
@ -57,11 +59,14 @@ impl SystemAllocator {
io_size: GuestUsize, io_size: GuestUsize,
mmio_base: GuestAddress, mmio_base: GuestAddress,
mmio_size: GuestUsize, mmio_size: GuestUsize,
mmio_hole_base: GuestAddress,
mmio_hole_size: GuestUsize,
apics: Vec<GsiApic>, apics: Vec<GsiApic>,
) -> Option<Self> { ) -> Option<Self> {
Some(SystemAllocator { Some(SystemAllocator {
io_address_space: AddressAllocator::new(io_base, io_size)?, io_address_space: AddressAllocator::new(io_base, io_size)?,
mmio_address_space: AddressAllocator::new(mmio_base, mmio_size)?, mmio_address_space: AddressAllocator::new(mmio_base, mmio_size)?,
mmio_hole_address_space: AddressAllocator::new(mmio_hole_base, mmio_hole_size)?,
gsi_allocator: GsiAllocator::new(apics), gsi_allocator: GsiAllocator::new(apics),
}) })
} }
@ -101,6 +106,20 @@ impl SystemAllocator {
) )
} }
/// Reserves a section of `size` bytes of MMIO address space.
pub fn allocate_mmio_hole_addresses(
&mut self,
address: Option<GuestAddress>,
size: GuestUsize,
align_size: Option<GuestUsize>,
) -> Option<GuestAddress> {
self.mmio_hole_address_space.allocate(
address,
size,
Some(align_size.unwrap_or(pagesize() as u64)),
)
}
/// Free an IO address range. /// Free an IO address range.
/// We can only free a range if it matches exactly an already allocated range. /// We can only free a range if it matches exactly an already allocated range.
pub fn free_io_addresses(&mut self, address: GuestAddress, size: GuestUsize) { pub fn free_io_addresses(&mut self, address: GuestAddress, size: GuestUsize) {

View File

@ -22,6 +22,7 @@ extern crate vm_virtio;
extern crate vmm_sys_util; extern crate vmm_sys_util;
use crate::config::{SerialOutputMode, VmConfig}; use crate::config::{SerialOutputMode, VmConfig};
use arch::RegionType;
use devices::ioapic; use devices::ioapic;
use kvm_bindings::{ use kvm_bindings::{
kvm_enable_cap, kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_CAP_SPLIT_IRQCHIP, kvm_enable_cap, kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_CAP_SPLIT_IRQCHIP,
@ -66,6 +67,10 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
// 64 bit direct boot entry offset for bzImage // 64 bit direct boot entry offset for bzImage
const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200; const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
// IOAPIC address range
const IOAPIC_RANGE_ADDR: u64 = 0xfec0_0000;
const IOAPIC_RANGE_SIZE: u64 = 0x20;
/// Errors associated with VM management /// Errors associated with VM management
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -160,6 +165,12 @@ pub enum Error {
/// Failed to set shared file length. /// Failed to set shared file length.
SharedFileSetLen(io::Error), SharedFileSetLen(io::Error),
/// Failed to allocate a memory range.
MemoryRangeAllocation,
/// Failed to allocate the IOAPIC memory range.
IoapicRangeAllocation,
} }
pub type Result<T> = result::Result<T, Error>; pub type Result<T> = result::Result<T, Error>;
@ -667,8 +678,10 @@ impl DeviceManager {
for pmem_cfg in pmem_list_cfg.iter() { for pmem_cfg in pmem_list_cfg.iter() {
let size = pmem_cfg.size; let size = pmem_cfg.size;
// The memory needs to be 2MiB aligned in order to support
// hugepages.
let pmem_guest_addr = allocator let pmem_guest_addr = allocator
.allocate_mmio_addresses(None, size as GuestUsize, None) .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000))
.ok_or(DeviceManagerError::PmemRangeAllocation)?; .ok_or(DeviceManagerError::PmemRangeAllocation)?;
let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
@ -870,7 +883,7 @@ impl DeviceManager {
if let Some(ioapic) = &self.ioapic { if let Some(ioapic) = &self.ioapic {
// Insert IOAPIC // Insert IOAPIC
self.mmio_bus self.mmio_bus
.insert(ioapic.clone(), 0xfec0_0000, 0x20) .insert(ioapic.clone(), IOAPIC_RANGE_ADDR, IOAPIC_RANGE_SIZE)
.map_err(Error::BusError)?; .map_err(Error::BusError)?;
} }
@ -963,10 +976,31 @@ impl<'a> Vm<'a> {
// Init guest memory // Init guest memory
let arch_mem_regions = arch::arch_memory_regions(config.memory.size); let arch_mem_regions = arch::arch_memory_regions(config.memory.size);
let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Ram)
.map(|r| (r.0, r.1))
.collect();
let reserved_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
.iter()
.filter(|r| r.2 == RegionType::Reserved)
.map(|r| (r.0, r.1))
.collect();
// Check the number of reserved regions, and only take the first one
// that's acrtually a 32-bit hole.
let mut mem_hole = (GuestAddress(0), 0);
for region in reserved_regions.iter() {
if region.0.unchecked_add(region.1 as u64).raw_value() <= 0x1_0000_0000 {
mem_hole = (region.0, region.1);
break;
}
}
let guest_memory = match config.memory.file { let guest_memory = match config.memory.file {
Some(file) => { Some(file) => {
let mut mem_regions = Vec::<(GuestAddress, usize, Option<FileOffset>)>::new(); let mut mem_regions = Vec::<(GuestAddress, usize, Option<FileOffset>)>::new();
for region in arch_mem_regions.iter() { for region in ram_regions.iter() {
let file = OpenOptions::new() let file = OpenOptions::new()
.read(true) .read(true)
.write(true) .write(true)
@ -982,7 +1016,7 @@ impl<'a> Vm<'a> {
GuestMemoryMmap::with_files(&mem_regions).map_err(Error::GuestMemory)? GuestMemoryMmap::with_files(&mem_regions).map_err(Error::GuestMemory)?
} }
None => GuestMemoryMmap::new(&arch_mem_regions).map_err(Error::GuestMemory)?, None => GuestMemoryMmap::new(&ram_regions).map_err(Error::GuestMemory)?,
}; };
guest_memory guest_memory
@ -1077,10 +1111,38 @@ impl<'a> Vm<'a> {
1 << 16 as GuestUsize, 1 << 16 as GuestUsize,
GuestAddress(0), GuestAddress(0),
1 << 36 as GuestUsize, 1 << 36 as GuestUsize,
mem_hole.0,
mem_hole.1 as GuestUsize,
vec![ioapic], vec![ioapic],
) )
.ok_or(Error::CreateSystemAllocator)?; .ok_or(Error::CreateSystemAllocator)?;
// Allocate RAM and Reserved address ranges.
for region in arch_mem_regions.iter() {
allocator
.allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
.ok_or(Error::MemoryRangeAllocation)?;
}
// Allocate IOAPIC address in the memory hole if necessary.
if IOAPIC_RANGE_ADDR >= mem_hole.0.raw_value() && IOAPIC_RANGE_SIZE < mem_hole.1 as u64 {
allocator
.allocate_mmio_hole_addresses(
Some(GuestAddress(IOAPIC_RANGE_ADDR)),
IOAPIC_RANGE_SIZE as GuestUsize,
None,
)
.ok_or(Error::IoapicRangeAllocation)?;
} else {
allocator
.allocate_mmio_addresses(
Some(GuestAddress(IOAPIC_RANGE_ADDR)),
IOAPIC_RANGE_SIZE as GuestUsize,
None,
)
.ok_or(Error::IoapicRangeAllocation)?;
}
let device_manager = DeviceManager::new( let device_manager = DeviceManager::new(
guest_memory.clone(), guest_memory.clone(),
&mut allocator, &mut allocator,