cloud-hypervisor/vmm/src/memory_manager.rs

// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
#[cfg(target_arch = "x86_64")]
use crate::config::SgxEpcConfig;
use crate::config::{HotplugMethod, MemoryConfig, MemoryZoneConfig};
use crate::migration::url_to_path;
use crate::MEMORY_MANAGER_SNAPSHOT_ID;
use crate::{GuestMemoryMmap, GuestRegionMmap};
#[cfg(feature = "acpi")]
use acpi_tables::{aml, aml::Aml};
use anyhow::anyhow;
#[cfg(target_arch = "x86_64")]
use arch::x86_64::{SgxEpcRegion, SgxEpcSection};
use arch::{layout, RegionType};
#[cfg(target_arch = "x86_64")]
use devices::ioapic;
#[cfg(target_arch = "x86_64")]
use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
use std::collections::HashMap;
use std::convert::TryInto;
use std::ffi;
use std::fs::{File, OpenOptions};
use std::io;
use std::ops::Deref;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::path::PathBuf;
use std::result;
use std::sync::{Arc, Barrier, Mutex};
use versionize::{VersionMap, Versionize, VersionizeResult};
use versionize_derive::Versionize;
#[cfg(target_arch = "x86_64")]
use vm_allocator::GsiApic;
use vm_allocator::SystemAllocator;
use vm_device::BusDevice;
use vm_memory::guest_memory::FileOffset;
use vm_memory::{
    mmap::MmapRegionError, Address, Bytes, Error as MmapError, GuestAddress, GuestAddressSpace,
    GuestMemory, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
    GuestUsize, MmapRegion,
};
use vm_migration::{
    protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
    SnapshotDataSection, Snapshottable, Transportable, VersionMapped,
};

#[cfg(feature = "acpi")]
pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18;

const DEFAULT_MEMORY_ZONE: &str = "mem0";

#[cfg(target_arch = "x86_64")]
const X86_64_IRQ_BASE: u32 = 5;

const HOTPLUG_COUNT: usize = 8;

// Memory policy constants
const MPOL_BIND: u32 = 2;
const MPOL_MF_STRICT: u32 = 1;
const MPOL_MF_MOVE: u32 = 1 << 1;

#[derive(Default)]
struct HotPlugState {
    base: u64,
    length: u64,
    active: bool,
    inserting: bool,
    removing: bool,
}

pub struct VirtioMemZone {
    region: Arc<GuestRegionMmap>,
    resize_handler: virtio_devices::Resize,
    hotplugged_size: u64,
    hugepages: bool,
}

impl VirtioMemZone {
    pub fn region(&self) -> &Arc<GuestRegionMmap> {
        &self.region
    }
    pub fn resize_handler(&self) -> &virtio_devices::Resize {
        &self.resize_handler
    }
    pub fn hotplugged_size(&self) -> u64 {
        self.hotplugged_size
    }
    pub fn hugepages(&self) -> bool {
        self.hugepages
    }
}

#[derive(Default)]
pub struct MemoryZone {
    regions: Vec<Arc<GuestRegionMmap>>,
    virtio_mem_zone: Option<VirtioMemZone>,
}

impl MemoryZone {
    pub fn regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
        &self.regions
    }
    pub fn virtio_mem_zone(&self) -> &Option<VirtioMemZone> {
        &self.virtio_mem_zone
    }
}

pub type MemoryZones = HashMap<String, MemoryZone>;

struct GuestRamMapping {
    slot: u32,
    gpa: u64,
    size: u64,
}

pub struct MemoryManager {
    boot_guest_memory: GuestMemoryMmap,
    guest_memory: GuestMemoryAtomic<GuestMemoryMmap>,
    next_memory_slot: u32,
    start_of_device_area: GuestAddress,
    end_of_device_area: GuestAddress,
    pub vm: Arc<dyn hypervisor::Vm>,
    hotplug_slots: Vec<HotPlugState>,
    selected_slot: usize,
    mergeable: bool,
    allocator: Arc<Mutex<SystemAllocator>>,
    hotplug_method: HotplugMethod,
    boot_ram: u64,
    current_ram: u64,
    next_hotplug_slot: usize,
    snapshot: Mutex<Option<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
    shared: bool,
    hugepages: bool,
    hugepage_size: Option<u64>,
    #[cfg(target_arch = "x86_64")]
    sgx_epc_region: Option<SgxEpcRegion>,
    user_provided_zones: bool,
    snapshot_memory_regions: Vec<MemoryRegion>,
    memory_zones: MemoryZones,
    log_dirty: bool, // Enable dirty logging for created RAM regions

    // Keep track of calls to create_userspace_mapping() for guest RAM.
    // This is useful for getting the dirty pages as we need to know the
    // slots that the mapping is created in.
    guest_ram_mappings: Vec<GuestRamMapping>,

    #[cfg(feature = "acpi")]
    pub acpi_address: GuestAddress,
}

#[derive(Debug)]
pub enum Error {
    /// Failed to create shared file.
    SharedFileCreate(io::Error),

    /// Failed to set shared file length.
    SharedFileSetLen(io::Error),

    /// Mmap backed guest memory error
    GuestMemory(MmapError),

    /// Failed to allocate a memory range.
    MemoryRangeAllocation,

    /// Error from region creation
    GuestMemoryRegion(MmapRegionError),

    /// No ACPI slot available
    NoSlotAvailable,

    /// Not enough space in the hotplug RAM region
    InsufficientHotplugRam,

    /// The requested hotplug memory addition is not a valid size
    InvalidSize,

    /// Failed to create the user memory region.
    CreateUserMemoryRegion(hypervisor::HypervisorVmError),

    /// Failed to remove the user memory region.
    RemoveUserMemoryRegion(hypervisor::HypervisorVmError),

    /// Failed to EventFd.
    EventFdFail(io::Error),

    /// Eventfd write error
    EventfdError(io::Error),

    /// Failed to virtio-mem resize
    VirtioMemResizeFail(virtio_devices::mem::Error),

    /// Cannot restore VM
    Restore(MigratableError),

    /// Cannot create the system allocator
    CreateSystemAllocator,

    /// Invalid SGX EPC section size
    #[cfg(target_arch = "x86_64")]
    EpcSectionSizeInvalid,

    /// Failed allocating SGX EPC region
    #[cfg(target_arch = "x86_64")]
    SgxEpcRangeAllocation,

    /// Failed opening SGX virtual EPC device
    #[cfg(target_arch = "x86_64")]
    SgxVirtEpcOpen(io::Error),

    /// Failed setting the SGX virtual EPC section size
    #[cfg(target_arch = "x86_64")]
    SgxVirtEpcFileSetLen(io::Error),

    /// Failed opening SGX provisioning device
    #[cfg(target_arch = "x86_64")]
    SgxProvisionOpen(io::Error),

    /// Failed enabling SGX provisioning
    #[cfg(target_arch = "x86_64")]
    SgxEnableProvisioning(hypervisor::HypervisorVmError),

    /// Failed creating a new MmapRegion instance.
    #[cfg(target_arch = "x86_64")]
    NewMmapRegion(vm_memory::mmap::MmapRegionError),

    /// No memory zones found.
    MissingMemoryZones,

    /// Memory configuration is not valid.
    InvalidMemoryParameters,

    /// Forbidden operation. Impossible to resize guest memory if it is
    /// backed by user defined memory regions.
    InvalidResizeWithMemoryZones,

    /// It's invalid to try applying a NUMA policy to a memory zone that is
    /// memory mapped with MAP_SHARED.
    InvalidSharedMemoryZoneWithHostNuma,

    /// Failed applying NUMA memory policy.
    ApplyNumaPolicy(io::Error),

    /// Memory zone identifier is not unique.
    DuplicateZoneId,

    /// No virtio-mem resizing handler found.
    MissingVirtioMemHandler,

    /// Unknown memory zone.
    UnknownMemoryZone,

    /// Invalid size for resizing. Can be anything except 0.
    InvalidHotplugSize,

    /// Invalid hotplug method associated with memory zones resizing capability.
    InvalidHotplugMethodWithMemoryZones,

    /// Could not find specified memory zone identifier from hash map.
    MissingZoneIdentifier,

    /// Resizing the memory zone failed.
    ResizeZone,

    /// Guest address overflow
    GuestAddressOverFlow,

    /// Error opening snapshot file
    SnapshotOpen(io::Error),

    // Error copying snapshot into region
    SnapshotCopy(GuestMemoryError),

    /// Failed to allocate MMIO address
    AllocateMmioAddress,
}

const ENABLE_FLAG: usize = 0;
const INSERTING_FLAG: usize = 1;
const REMOVING_FLAG: usize = 2;
const EJECT_FLAG: usize = 3;

const BASE_OFFSET_LOW: u64 = 0;
const BASE_OFFSET_HIGH: u64 = 0x4;
const LENGTH_OFFSET_LOW: u64 = 0x8;
const LENGTH_OFFSET_HIGH: u64 = 0xC;
const STATUS_OFFSET: u64 = 0x14;
const SELECTION_OFFSET: u64 = 0;

// The MMIO address space size is subtracted with 64k. This is done for the
// following reasons:
//  - Reduce the addressable space size by at least 4k to workaround a Linux
//    bug when the VMM allocates devices at the end of the addressable space
//  - Windows requires the addressable space size to be 64k aligned
fn mmio_address_space_size(phys_bits: u8) -> u64 {
    (1 << phys_bits) - (1 << 16)
}

impl BusDevice for MemoryManager {
    fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
        if self.selected_slot < self.hotplug_slots.len() {
            let state = &self.hotplug_slots[self.selected_slot];
            match offset {
                BASE_OFFSET_LOW => {
                    data.copy_from_slice(&state.base.to_le_bytes()[..4]);
                }
                BASE_OFFSET_HIGH => {
                    data.copy_from_slice(&state.base.to_le_bytes()[4..]);
                }
                LENGTH_OFFSET_LOW => {
                    data.copy_from_slice(&state.length.to_le_bytes()[..4]);
                }
                LENGTH_OFFSET_HIGH => {
                    data.copy_from_slice(&state.length.to_le_bytes()[4..]);
                }
                STATUS_OFFSET => {
                    // The Linux kernel, quite reasonably, doesn't zero the memory it gives us.
                    data.fill(0);
                    if state.active {
                        data[0] |= 1 << ENABLE_FLAG;
                    }
                    if state.inserting {
                        data[0] |= 1 << INSERTING_FLAG;
                    }
                    if state.removing {
                        data[0] |= 1 << REMOVING_FLAG;
                    }
                }
                _ => {
                    warn!(
                        "Unexpected offset for accessing memory manager device: {:#}",
                        offset
                    );
                }
            }
        } else {
            warn!("Out of range memory slot: {}", self.selected_slot);
        }
    }

    fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
        match offset {
            SELECTION_OFFSET => {
                self.selected_slot = usize::from(data[0]);
            }
            STATUS_OFFSET => {
                if self.selected_slot < self.hotplug_slots.len() {
                    let state = &mut self.hotplug_slots[self.selected_slot];
                    // The ACPI code writes back a 1 to acknowledge the insertion
                    if (data[0] & (1 << INSERTING_FLAG) == 1 << INSERTING_FLAG) && state.inserting {
                        state.inserting = false;
                    }
                    // Ditto for removal
                    if (data[0] & (1 << REMOVING_FLAG) == 1 << REMOVING_FLAG) && state.removing {
                        state.removing = false;
                    }
                    // Trigger removal of "DIMM"
                    if data[0] & (1 << EJECT_FLAG) == 1 << EJECT_FLAG {
                        warn!("Ejection of memory not currently supported");
                    }
                } else {
                    warn!("Out of range memory slot: {}", self.selected_slot);
                }
            }
            _ => {
                warn!(
                    "Unexpected offset for accessing memory manager device: {:#}",
                    offset
                );
            }
        };
        None
    }
}

impl MemoryManager {
    /// Creates all memory regions based on the available RAM ranges defined
    /// by `ram_regions`, and based on the description of the memory zones.
    /// In practice, this function can perform multiple memory mappings of the
    /// same backing file if there's a hole in the address space between two
    /// RAM ranges.
    /// One example might be ram_regions containing 2 regions (0-3G and 4G-6G)
    /// and zones containing two zones (size 1G and size 4G).
    /// This function will create 3 resulting memory regions:
    /// - First one mapping entirely the first memory zone on 0-1G range
    /// - Second one mapping partially the second memory zone on 1G-3G range
    /// - Third one mapping partially the second memory zone on 4G-6G range
    fn create_memory_regions_from_zones(
        ram_regions: &[(GuestAddress, usize)],
        zones: &[MemoryZoneConfig],
        prefault: bool,
    ) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
        let mut zones = zones.to_owned();
        let mut mem_regions = Vec::new();
        let mut zone = zones.remove(0);
        let mut zone_offset = 0;
        let mut memory_zones = HashMap::new();

        // Add zone id to the list of memory zones.
        memory_zones.insert(zone.id.clone(), MemoryZone::default());

        for ram_region in ram_regions.iter() {
            let mut ram_region_offset = 0;
            let mut exit = false;

            loop {
                let mut ram_region_consumed = false;
                let mut pull_next_zone = false;

                let ram_region_sub_size = ram_region.1 - ram_region_offset;
                let zone_sub_size = zone.size as usize - zone_offset;

                let file_offset = zone_offset as u64;
                let region_start = ram_region
                    .0
                    .checked_add(ram_region_offset as u64)
                    .ok_or(Error::GuestAddressOverFlow)?;
                let region_size = if zone_sub_size <= ram_region_sub_size {
                    if zone_sub_size == ram_region_sub_size {
                        ram_region_consumed = true;
                    }

                    ram_region_offset += zone_sub_size;
                    pull_next_zone = true;

                    zone_sub_size
                } else {
                    zone_offset += ram_region_sub_size;
                    ram_region_consumed = true;

                    ram_region_sub_size
                };

                let region = MemoryManager::create_ram_region(
                    &zone.file,
                    file_offset,
                    region_start,
                    region_size,
                    prefault,
                    zone.shared,
                    zone.hugepages,
                    zone.hugepage_size,
                    zone.host_numa_node,
                )?;

                // Add region to the list of regions associated with the
                // current memory zone.
                if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
                    memory_zone.regions.push(region.clone());
                }

                mem_regions.push(region);

                if pull_next_zone {
                    // Get the next zone and reset the offset.
                    zone_offset = 0;
                    if zones.is_empty() {
                        exit = true;
                        break;
                    }
                    zone = zones.remove(0);

                    // Check if zone id already exist. In case it does, throw
                    // an error as we need unique identifiers. Otherwise, add
                    // the new zone id to the list of memory zones.
                    if memory_zones.contains_key(&zone.id) {
                        error!(
                            "Memory zone identifier '{}' found more than once. \
                            It must be unique",
                            zone.id,
                        );
                        return Err(Error::DuplicateZoneId);
                    }
                    memory_zones.insert(zone.id.clone(), MemoryZone::default());
                }

                if ram_region_consumed {
                    break;
                }
            }

            if exit {
                break;
            }
        }

        Ok((mem_regions, memory_zones))
    }

    fn fill_saved_regions(&mut self, saved_regions: Vec<MemoryRegion>) -> Result<(), Error> {
        for region in saved_regions {
            if let Some(content) = region.content {
                // Open (read only) the snapshot file for the given region.
                let mut memory_region_file = OpenOptions::new()
                    .read(true)
                    .open(content)
                    .map_err(Error::SnapshotOpen)?;

                self.guest_memory
                    .memory()
                    .read_exact_from(
                        GuestAddress(region.start_addr),
                        &mut memory_region_file,
                        region.size as usize,
                    )
                    .map_err(Error::SnapshotCopy)?;
            }
        }

        Ok(())
    }

    pub fn new(
        vm: Arc<dyn hypervisor::Vm>,
        config: &MemoryConfig,
        prefault: bool,
        phys_bits: u8,
        #[cfg(feature = "tdx")] tdx_enabled: bool,
    ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
        let user_provided_zones = config.size == 0;
        let mut allow_mem_hotplug: bool = false;

        let (ram_size, zones) = if !user_provided_zones {
            if config.zones.is_some() {
                error!(
                    "User defined memory regions can't be provided if the \
                    memory size is not 0"
                );
                return Err(Error::InvalidMemoryParameters);
            }

            if config.hotplug_size.is_some() {
                allow_mem_hotplug = true;
            }

            if let Some(hotplugged_size) = config.hotplugged_size {
                if let Some(hotplug_size) = config.hotplug_size {
                    if hotplugged_size > hotplug_size {
                        error!(
                            "'hotplugged_size' {} can't be bigger than \
                            'hotplug_size' {}",
                            hotplugged_size, hotplug_size,
                        );
                        return Err(Error::InvalidMemoryParameters);
                    }
                } else {
                    error!(
                        "Invalid to define 'hotplugged_size' when there is\
                        no 'hotplug_size'"
                    );
                    return Err(Error::InvalidMemoryParameters);
                }
                if config.hotplug_method == HotplugMethod::Acpi {
                    error!(
                        "Invalid to define 'hotplugged_size' with hotplug \
                        method 'acpi'"
                    );
                    return Err(Error::InvalidMemoryParameters);
                }
            }

            // Create a single zone from the global memory config. This lets
            // us reuse the codepath for user defined memory zones.
            let zones = vec![MemoryZoneConfig {
                id: String::from(DEFAULT_MEMORY_ZONE),
                size: config.size,
                file: None,
                shared: config.shared,
                hugepages: config.hugepages,
                hugepage_size: config.hugepage_size,
                host_numa_node: None,
                hotplug_size: config.hotplug_size,
                hotplugged_size: config.hotplugged_size,
            }];

            (config.size, zones)
        } else {
            if config.zones.is_none() {
                error!(
                    "User defined memory regions must be provided if the \
                    memory size is 0"
                );
                return Err(Error::MissingMemoryZones);
            }

            // Safe to unwrap as we checked right above there were some
            // regions.
            let zones = config.zones.clone().unwrap();
            if zones.is_empty() {
                return Err(Error::MissingMemoryZones);
            }

            let mut total_ram_size: u64 = 0;
            for zone in zones.iter() {
                total_ram_size += zone.size;

                if zone.shared && zone.file.is_some() && zone.host_numa_node.is_some() {
                    error!(
                        "Invalid to set host NUMA policy for a memory zone \
                        backed by a regular file and mapped as 'shared'"
                    );
                    return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
                }

                if zone.hotplug_size.is_some() && config.hotplug_method == HotplugMethod::Acpi {
                    error!("Invalid to set ACPI hotplug method for memory zones");
                    return Err(Error::InvalidHotplugMethodWithMemoryZones);
                }

                if let Some(hotplugged_size) = zone.hotplugged_size {
                    if let Some(hotplug_size) = zone.hotplug_size {
                        if hotplugged_size > hotplug_size {
                            error!(
                                "'hotplugged_size' {} can't be bigger than \
                                'hotplug_size' {}",
                                hotplugged_size, hotplug_size,
                            );
                            return Err(Error::InvalidMemoryParameters);
                        }
                    } else {
                        error!(
                            "Invalid to define 'hotplugged_size' when there is\
                            no 'hotplug_size' for a memory zone"
                        );
                        return Err(Error::InvalidMemoryParameters);
                    }
                    if config.hotplug_method == HotplugMethod::Acpi {
                        error!(
                            "Invalid to define 'hotplugged_size' with hotplug \
                            method 'acpi'"
                        );
                        return Err(Error::InvalidMemoryParameters);
                    }
                }
            }

            (total_ram_size, zones)
        };

        // Init guest memory
        let arch_mem_regions = arch::arch_memory_regions(ram_size);

        let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
            .iter()
            .filter(|r| r.2 == RegionType::Ram)
            .map(|r| (r.0, r.1))
            .collect();

        let (mem_regions, mut memory_zones) =
            Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault)?;

        let guest_memory =
            GuestMemoryMmap::from_arc_regions(mem_regions).map_err(Error::GuestMemory)?;

        let boot_guest_memory = guest_memory.clone();

        let mmio_address_space_size = mmio_address_space_size(phys_bits);
        debug_assert_eq!(
            (((mmio_address_space_size) >> 16) << 16),
            mmio_address_space_size
        );
        let end_of_device_area = GuestAddress(mmio_address_space_size - 1);

        let mut start_of_device_area =
            MemoryManager::start_addr(guest_memory.last_addr(), allow_mem_hotplug)?;
        let mut virtio_mem_regions: Vec<Arc<GuestRegionMmap>> = Vec::new();

        // Update list of memory zones for resize.
        for zone in zones {
            if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
                if let Some(hotplug_size) = zone.hotplug_size {
                    if hotplug_size == 0 {
                        error!("'hotplug_size' can't be 0");
                        return Err(Error::InvalidHotplugSize);
                    }

                    if !user_provided_zones && config.hotplug_method == HotplugMethod::Acpi {
                        start_of_device_area = start_of_device_area
                            .checked_add(hotplug_size)
                            .ok_or(Error::GuestAddressOverFlow)?;
                    } else {
                        // Alignment must be "natural" i.e. same as size of block
                        let start_addr = GuestAddress(
                            (start_of_device_area.0 + virtio_devices::VIRTIO_MEM_ALIGN_SIZE - 1)
                                / virtio_devices::VIRTIO_MEM_ALIGN_SIZE
                                * virtio_devices::VIRTIO_MEM_ALIGN_SIZE,
                        );

                        let region = MemoryManager::create_ram_region(
                            &None,
                            0,
                            start_addr,
                            hotplug_size as usize,
                            false,
                            zone.shared,
                            zone.hugepages,
                            zone.hugepage_size,
                            zone.host_numa_node,
                        )?;

                        virtio_mem_regions.push(region.clone());

                        memory_zone.virtio_mem_zone = Some(VirtioMemZone {
                            region,
                            resize_handler: virtio_devices::Resize::new()
                                .map_err(Error::EventFdFail)?,
                            hotplugged_size: zone.hotplugged_size.unwrap_or(0),
                            hugepages: zone.hugepages,
                        });

                        start_of_device_area = start_addr
                            .checked_add(hotplug_size)
                            .ok_or(Error::GuestAddressOverFlow)?;
                    }
                }
            } else {
                return Err(Error::MissingZoneIdentifier);
            }
        }

        let guest_memory = GuestMemoryAtomic::new(guest_memory);

        let mut hotplug_slots = Vec::with_capacity(HOTPLUG_COUNT);
        hotplug_slots.resize_with(HOTPLUG_COUNT, HotPlugState::default);

        // Both MMIO and PIO address spaces start at address 0.
        let allocator = Arc::new(Mutex::new(
            SystemAllocator::new(
                #[cfg(target_arch = "x86_64")]
                {
                    GuestAddress(0)
                },
                #[cfg(target_arch = "x86_64")]
                {
                    1 << 16
                },
                GuestAddress(0),
                mmio_address_space_size,
                layout::MEM_32BIT_DEVICES_START,
                layout::MEM_32BIT_DEVICES_SIZE,
                #[cfg(target_arch = "x86_64")]
                vec![GsiApic::new(
                    X86_64_IRQ_BASE,
                    ioapic::NUM_IOAPIC_PINS as u32 - X86_64_IRQ_BASE,
                )],
            )
            .ok_or(Error::CreateSystemAllocator)?,
        ));

        #[cfg(feature = "acpi")]
        let acpi_address = allocator
            .lock()
            .unwrap()
            .allocate_mmio_addresses(None, MEMORY_MANAGER_ACPI_SIZE as u64, None)
            .ok_or(Error::AllocateMmioAddress)?;

        #[cfg(not(feature = "tdx"))]
        let log_dirty = true;
        #[cfg(feature = "tdx")]
        let log_dirty = !tdx_enabled; // Cannot log dirty pages on a TD

        let memory_manager = Arc::new(Mutex::new(MemoryManager {
            boot_guest_memory,
            guest_memory: guest_memory.clone(),
            next_memory_slot: 0,
            start_of_device_area,
            end_of_device_area,
            vm,
            hotplug_slots,
            selected_slot: 0,
            mergeable: config.mergeable,
            allocator: allocator.clone(),
            hotplug_method: config.hotplug_method.clone(),
            boot_ram: ram_size,
            current_ram: ram_size,
            next_hotplug_slot: 0,
            snapshot: Mutex::new(None),
            shared: config.shared,
            hugepages: config.hugepages,
            hugepage_size: config.hugepage_size,
            #[cfg(target_arch = "x86_64")]
            sgx_epc_region: None,
            user_provided_zones,
            snapshot_memory_regions: Vec::new(),
            memory_zones,
            guest_ram_mappings: Vec::new(),
            #[cfg(feature = "acpi")]
            acpi_address,
            log_dirty,
        }));

        for region in guest_memory.memory().iter() {
            let mut mm = memory_manager.lock().unwrap();
            let slot = mm.create_userspace_mapping(
                region.start_addr().raw_value(),
                region.len() as u64,
                region.as_ptr() as u64,
                config.mergeable,
                false,
                log_dirty,
            )?;
            mm.guest_ram_mappings.push(GuestRamMapping {
                gpa: region.start_addr().raw_value(),
                size: region.len(),
                slot,
            });
        }

        for region in virtio_mem_regions.drain(..) {
            let mut mm = memory_manager.lock().unwrap();
            let slot = mm.create_userspace_mapping(
                region.start_addr().raw_value(),
                region.len() as u64,
                region.as_ptr() as u64,
                config.mergeable,
                false,
                log_dirty,
            )?;

            mm.guest_ram_mappings.push(GuestRamMapping {
                gpa: region.start_addr().raw_value(),
                size: region.len(),
                slot,
            });
            allocator
                .lock()
                .unwrap()
                .allocate_mmio_addresses(Some(region.start_addr()), region.len(), None)
                .ok_or(Error::MemoryRangeAllocation)?;
            mm.add_region(region)?;
        }

        // Allocate RAM and Reserved address ranges.
        for region in arch_mem_regions.iter() {
            allocator
                .lock()
                .unwrap()
                .allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
                .ok_or(Error::MemoryRangeAllocation)?;
        }

        Ok(memory_manager)
    }

    pub fn new_from_snapshot(
        snapshot: &Snapshot,
        vm: Arc<dyn hypervisor::Vm>,
        config: &MemoryConfig,
        source_url: Option<&str>,
        prefault: bool,
        phys_bits: u8,
    ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
        let mm = MemoryManager::new(
            vm,
            config,
            prefault,
            phys_bits,
            #[cfg(feature = "tdx")]
            false,
        )?;

        if let Some(source_url) = source_url {
            let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?;

            let mem_snapshot: MemoryManagerSnapshotData = snapshot
                .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID)
                .map_err(Error::Restore)?;

            // Here we turn the content file name into a content file path as
            // this will be needed to copy the content of the saved memory
            // region into the newly created memory region.
            // We simply ignore the content files that are None, as they
            // represent regions that have been directly saved by the user, with
            // no need for saving into a dedicated external file. For these
            // files, the VmConfig already contains the information on where to
            // find them.
            let mut saved_regions = mem_snapshot.memory_regions;
            for region in saved_regions.iter_mut() {
                if let Some(content) = &mut region.content {
                    let mut memory_region_path = vm_snapshot_path.clone();
                    memory_region_path.push(content.clone());
                    *content = memory_region_path.to_str().unwrap().to_owned();
                }
            }

            mm.lock().unwrap().fill_saved_regions(saved_regions)?;
        }

        Ok(mm)
    }

    fn memfd_create(name: &ffi::CStr, flags: u32) -> Result<RawFd, io::Error> {
        let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };

        if res < 0 {
            Err(io::Error::last_os_error())
        } else {
            Ok(res as RawFd)
        }
    }

    fn mbind(
        addr: *mut u8,
        len: u64,
        mode: u32,
        nodemask: Vec<u64>,
        maxnode: u64,
        flags: u32,
    ) -> Result<(), io::Error> {
        let res = unsafe {
            libc::syscall(
                libc::SYS_mbind,
                addr as *mut libc::c_void,
                len,
                mode,
                nodemask.as_ptr(),
                maxnode,
                flags,
            )
        };

        if res < 0 {
            Err(io::Error::last_os_error())
        } else {
            Ok(())
        }
    }

    #[allow(clippy::too_many_arguments)]
    fn create_ram_region(
        backing_file: &Option<PathBuf>,
        file_offset: u64,
        start_addr: GuestAddress,
        size: usize,
        prefault: bool,
        shared: bool,
        hugepages: bool,
        hugepage_size: Option<u64>,
        host_numa_node: Option<u32>,
    ) -> Result<Arc<GuestRegionMmap>, Error> {
        let (f, f_off) = match backing_file {
            Some(ref file) => {
                if file.is_dir() {
                    // Override file offset as it does not apply in this case.
                    info!(
                        "Ignoring file offset since the backing file is a \
                        temporary file created from the specified directory."
                    );
                    let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
                    let fs = ffi::CString::new(fs_str).unwrap();
                    let mut path = fs.as_bytes_with_nul().to_owned();
                    let path_ptr = path.as_mut_ptr() as *mut _;
                    let fd = unsafe { libc::mkstemp(path_ptr) };
                    unsafe { libc::unlink(path_ptr) };
                    let f = unsafe { File::from_raw_fd(fd) };
                    f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;

                    (f, 0)
                } else {
                    let f = OpenOptions::new()
                        .read(true)
                        .write(true)
                        .open(file)
                        .map_err(Error::SharedFileCreate)?;

                    (f, file_offset)
                }
            }
            None => {
                let fd = Self::memfd_create(
                    &ffi::CString::new("ch_ram").unwrap(),
                    if hugepages {
                        libc::MFD_HUGETLB
                            | if let Some(hugepage_size) = hugepage_size {
                                /*
                                 * From the Linux kernel:
                                 * Several system calls take a flag to request "hugetlb" huge pages.
                                 * Without further specification, these system calls will use the
                                 * system's default huge page size.  If a system supports multiple
                                 * huge page sizes, the desired huge page size can be specified in
                                 * bits [26:31] of the flag arguments.  The value in these 6 bits
                                 * will encode the log2 of the huge page size.
                                 */

                                hugepage_size.trailing_zeros() << 26
                            } else {
                                // Use the system default huge page size
                                0
                            }
                    } else {
                        0
                    },
                )
                .map_err(Error::SharedFileCreate)?;

                let f = unsafe { File::from_raw_fd(fd) };
                f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;

                (f, 0)
            }
        };

        let mut mmap_flags = libc::MAP_NORESERVE
            | if shared {
                libc::MAP_SHARED
            } else {
                libc::MAP_PRIVATE
            };
        if prefault {
            mmap_flags |= libc::MAP_POPULATE;
        }

        let region = GuestRegionMmap::new(
            MmapRegion::build(
                Some(FileOffset::new(f, f_off)),
                size,
                libc::PROT_READ | libc::PROT_WRITE,
                mmap_flags,
            )
            .map_err(Error::GuestMemoryRegion)?,
            start_addr,
        )
        .map_err(Error::GuestMemory)?;

        // Apply NUMA policy if needed.
        if let Some(node) = host_numa_node {
            let addr = region.deref().as_ptr();
            let len = region.deref().size() as u64;
            let mode = MPOL_BIND;
            let mut nodemask: Vec<u64> = Vec::new();
            let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;

            // Linux is kind of buggy in the way it interprets maxnode as it
            // will cut off the last node. That's why we have to add 1 to what
            // we would consider as the proper maxnode value.
            let maxnode = node as u64 + 1 + 1;

            // Allocate the right size for the vector.
            nodemask.resize((node as usize / 64) + 1, 0);

            // Fill the global bitmask through the nodemask vector.
            let idx = (node / 64) as usize;
            let shift = node % 64;
            nodemask[idx] |= 1u64 << shift;

            // Policies are enforced by using MPOL_MF_MOVE flag as it will
            // force the kernel to move all pages that might have been already
            // allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
            // used to throw an error if MPOL_MF_MOVE didn't succeed.
            // MPOL_BIND is the selected mode as it specifies a strict policy
            // that restricts memory allocation to the nodes specified in the
            // nodemask.
            Self::mbind(addr, len, mode, nodemask, maxnode, flags)
                .map_err(Error::ApplyNumaPolicy)?;
        }

        Ok(Arc::new(region))
    }

    // Update the GuestMemoryMmap with the new range
    fn add_region(&mut self, region: Arc<GuestRegionMmap>) -> Result<(), Error> {
        let guest_memory = self
            .guest_memory
            .memory()
            .insert_region(region)
            .map_err(Error::GuestMemory)?;
        self.guest_memory.lock().unwrap().replace(guest_memory);

        Ok(())
    }

    //
    // Calculate the start address of an area next to RAM.
    //
    // If memory hotplug is allowed, the start address needs to be aligned
    // (rounded-up) to 128MiB boundary.
    // If memory hotplug is not allowed, there is no alignment required.
    // On x86_64, it must also start at the 64bit start.
    #[allow(clippy::let_and_return)]
    fn start_addr(mem_end: GuestAddress, allow_mem_hotplug: bool) -> Result<GuestAddress, Error> {
        let mut start_addr = if allow_mem_hotplug {
            GuestAddress(mem_end.0 | ((128 << 20) - 1))
        } else {
            mem_end
        };

        start_addr = start_addr
            .checked_add(1)
            .ok_or(Error::GuestAddressOverFlow)?;

        #[cfg(target_arch = "x86_64")]
        if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
            return Ok(arch::layout::RAM_64BIT_START);
        }

        Ok(start_addr)
    }

    pub fn add_ram_region(
        &mut self,
        start_addr: GuestAddress,
        size: usize,
    ) -> Result<Arc<GuestRegionMmap>, Error> {
        // Allocate memory for the region
        let region = MemoryManager::create_ram_region(
            &None,
            0,
            start_addr,
            size,
            false,
            self.shared,
            self.hugepages,
            self.hugepage_size,
            None,
        )?;

        // Map it into the guest
        let slot = self.create_userspace_mapping(
            region.start_addr().0,
            region.len() as u64,
            region.as_ptr() as u64,
            self.mergeable,
            false,
            self.log_dirty,
        )?;
        self.guest_ram_mappings.push(GuestRamMapping {
            gpa: region.start_addr().raw_value(),
            size: region.len(),
            slot,
        });

        self.add_region(Arc::clone(&region))?;

        Ok(region)
    }

    fn hotplug_ram_region(&mut self, size: usize) -> Result<Arc<GuestRegionMmap>, Error> {
        info!("Hotplugging new RAM: {}", size);

        // Check that there is a free slot
        if self.next_hotplug_slot >= HOTPLUG_COUNT {
            return Err(Error::NoSlotAvailable);
        }

        // "Inserted" DIMM must have a size that is a multiple of 128MiB
        if size % (128 << 20) != 0 {
            return Err(Error::InvalidSize);
        }

        let start_addr = MemoryManager::start_addr(self.guest_memory.memory().last_addr(), true)?;

        if start_addr.checked_add(size.try_into().unwrap()).unwrap() > self.start_of_device_area() {
            return Err(Error::InsufficientHotplugRam);
        }

        let region = self.add_ram_region(start_addr, size)?;

        // Add region to the list of regions associated with the default
        // memory zone.
        if let Some(memory_zone) = self.memory_zones.get_mut(DEFAULT_MEMORY_ZONE) {
            memory_zone.regions.push(Arc::clone(&region));
        }

        // Tell the allocator
        self.allocator
            .lock()
            .unwrap()
            .allocate_mmio_addresses(Some(start_addr), size as GuestUsize, None)
            .ok_or(Error::MemoryRangeAllocation)?;

        // Update the slot so that it can be queried via the I/O port
        let mut slot = &mut self.hotplug_slots[self.next_hotplug_slot];
        slot.active = true;
        slot.inserting = true;
        slot.base = region.start_addr().0;
        slot.length = region.len() as u64;

        self.next_hotplug_slot += 1;

        Ok(region)
    }

    pub fn guest_memory(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
        self.guest_memory.clone()
    }

    pub fn boot_guest_memory(&self) -> GuestMemoryMmap {
        self.boot_guest_memory.clone()
    }

    pub fn allocator(&self) -> Arc<Mutex<SystemAllocator>> {
        self.allocator.clone()
    }

    pub fn start_of_device_area(&self) -> GuestAddress {
        self.start_of_device_area
    }

    pub fn end_of_device_area(&self) -> GuestAddress {
        self.end_of_device_area
    }

    pub fn allocate_memory_slot(&mut self) -> u32 {
        let slot_id = self.next_memory_slot;
        self.next_memory_slot += 1;
        slot_id
    }

    pub fn create_userspace_mapping(
        &mut self,
        guest_phys_addr: u64,
        memory_size: u64,
        userspace_addr: u64,
        mergeable: bool,
        readonly: bool,
        log_dirty: bool,
    ) -> Result<u32, Error> {
        let slot = self.allocate_memory_slot();
        let mem_region = self.vm.make_user_memory_region(
            slot,
            guest_phys_addr,
            memory_size,
            userspace_addr,
            readonly,
            log_dirty,
        );

        self.vm
            .create_user_memory_region(mem_region)
            .map_err(Error::CreateUserMemoryRegion)?;

        // Mark the pages as mergeable if explicitly asked for.
        if mergeable {
            // Safe because the address and size are valid since the
            // mmap succeeded.
            let ret = unsafe {
                libc::madvise(
                    userspace_addr as *mut libc::c_void,
                    memory_size as libc::size_t,
                    libc::MADV_MERGEABLE,
                )
            };
            if ret != 0 {
                let err = io::Error::last_os_error();
                // Safe to unwrap because the error is constructed with
                // last_os_error(), which ensures the output will be Some().
                let errno = err.raw_os_error().unwrap();
                if errno == libc::EINVAL {
                    warn!("kernel not configured with CONFIG_KSM");
                } else {
                    warn!("madvise error: {}", err);
                }
                warn!("failed to mark pages as mergeable");
            }
        }

        info!(
            "Created userspace mapping: {:x} -> {:x} {:x}",
            guest_phys_addr, userspace_addr, memory_size
        );

        Ok(slot)
    }

    pub fn remove_userspace_mapping(
        &mut self,
        guest_phys_addr: u64,
        memory_size: u64,
        userspace_addr: u64,
        mergeable: bool,
        slot: u32,
    ) -> Result<(), Error> {
        let mem_region = self.vm.make_user_memory_region(
            slot,
            guest_phys_addr,
            memory_size,
            userspace_addr,
            false, /* readonly -- don't care */
            false, /* log dirty */
        );

        self.vm
            .remove_user_memory_region(mem_region)
            .map_err(Error::RemoveUserMemoryRegion)?;

        // Mark the pages as unmergeable if there were previously marked as
        // mergeable.
        if mergeable {
            // Safe because the address and size are valid as the region was
            // previously advised.
            let ret = unsafe {
                libc::madvise(
                    userspace_addr as *mut libc::c_void,
                    memory_size as libc::size_t,
                    libc::MADV_UNMERGEABLE,
                )
            };
            if ret != 0 {
                let err = io::Error::last_os_error();
                // Safe to unwrap because the error is constructed with
                // last_os_error(), which ensures the output will be Some().
                let errno = err.raw_os_error().unwrap();
                if errno == libc::EINVAL {
                    warn!("kernel not configured with CONFIG_KSM");
                } else {
                    warn!("madvise error: {}", err);
                }
                warn!("failed to mark pages as unmergeable");
            }
        }

        info!(
            "Removed userspace mapping: {:x} -> {:x} {:x}",
            guest_phys_addr, userspace_addr, memory_size
        );

        Ok(())
    }

    pub fn virtio_mem_resize(&mut self, id: &str, size: u64) -> Result<(), Error> {
        if let Some(memory_zone) = self.memory_zones.get_mut(id) {
            if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
                virtio_mem_zone
                    .resize_handler()
                    .work(size)
                    .map_err(Error::VirtioMemResizeFail)?;
            } else {
                error!("Failed resizing virtio-mem region: No virtio-mem handler");
                return Err(Error::MissingVirtioMemHandler);
            }

            return Ok(());
        }

        error!("Failed resizing virtio-mem region: Unknown memory zone");
        Err(Error::UnknownMemoryZone)
    }

    /// In case this function resulted in adding a new memory region to the
    /// guest memory, the new region is returned to the caller. The virtio-mem
    /// use case never adds a new region as the whole hotpluggable memory has
    /// already been allocated at boot time.
    pub fn resize(&mut self, desired_ram: u64) -> Result<Option<Arc<GuestRegionMmap>>, Error> {
        if self.user_provided_zones {
            error!(
                "Not allowed to resize guest memory when backed with user \
                defined memory zones."
            );
            return Err(Error::InvalidResizeWithMemoryZones);
        }

        let mut region: Option<Arc<GuestRegionMmap>> = None;
        match self.hotplug_method {
            HotplugMethod::VirtioMem => {
                if desired_ram >= self.boot_ram {
                    self.virtio_mem_resize(DEFAULT_MEMORY_ZONE, desired_ram - self.boot_ram)?;
                    self.current_ram = desired_ram;
                }
            }
            HotplugMethod::Acpi => {
                if desired_ram > self.current_ram {
                    region =
                        Some(self.hotplug_ram_region((desired_ram - self.current_ram) as usize)?);
                    self.current_ram = desired_ram;
                }
            }
        }
        Ok(region)
    }

    pub fn resize_zone(&mut self, id: &str, virtio_mem_size: u64) -> Result<(), Error> {
        if !self.user_provided_zones {
            error!(
                "Not allowed to resize guest memory zone when no zone is \
                defined."
            );
            return Err(Error::ResizeZone);
        }

        self.virtio_mem_resize(id, virtio_mem_size)
    }

    #[cfg(target_arch = "x86_64")]
    pub fn setup_sgx(
        &mut self,
        sgx_epc_config: Vec<SgxEpcConfig>,
        vm: &Arc<dyn hypervisor::Vm>,
    ) -> Result<(), Error> {
        let file = OpenOptions::new()
            .read(true)
            .open("/dev/sgx_provision")
            .map_err(Error::SgxProvisionOpen)?;
        vm.enable_sgx_attribute(file)
            .map_err(Error::SgxEnableProvisioning)?;

        // Go over each EPC section and verify its size is a 4k multiple. At
        // the same time, calculate the total size needed for the contiguous
        // EPC region.
        let mut epc_region_size = 0;
        for epc_section in sgx_epc_config.iter() {
            if epc_section.size == 0 {
                return Err(Error::EpcSectionSizeInvalid);
            }
            if epc_section.size & 0x0fff != 0 {
                return Err(Error::EpcSectionSizeInvalid);
            }

            epc_region_size += epc_section.size;
        }

        // Now that we know about the total size for the EPC region, we can
        // proceed with the allocation of the entire range. The EPC region
        // must be 4kiB aligned.
        let epc_region_start = self
            .allocator
            .lock()
            .unwrap()
            .allocate_mmio_addresses(None, epc_region_size as GuestUsize, Some(0x1000))
            .ok_or(Error::SgxEpcRangeAllocation)?;

        let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize);

        // Each section can be memory mapped into the allocated region.
        let mut epc_section_start = epc_region_start.raw_value();
        for epc_section in sgx_epc_config.iter() {
            let file = OpenOptions::new()
                .read(true)
                .write(true)
                .open("/dev/sgx_vepc")
                .map_err(Error::SgxVirtEpcOpen)?;

            let prot = PROT_READ | PROT_WRITE;
            let mut flags = MAP_NORESERVE | MAP_SHARED;
            if epc_section.prefault {
                flags |= MAP_POPULATE;
            }

            // We can't use the vm-memory crate to perform the memory mapping
            // here as it would try to ensure the size of the backing file is
            // matching the size of the expected mapping. The /dev/sgx_vepc
            // device does not work that way, it provides a file descriptor
            // which is not matching the mapping size, as it's a just a way to
            // let KVM know that an EPC section is being created for the guest.
            let host_addr = unsafe {
                libc::mmap(
                    std::ptr::null_mut(),
                    epc_section.size as usize,
                    prot,
                    flags,
                    file.as_raw_fd(),
                    0,
                )
            } as u64;

            let _mem_slot = self.create_userspace_mapping(
                epc_section_start,
                epc_section.size,
                host_addr,
                false,
                false,
                false,
            )?;

            sgx_epc_region.insert(
                epc_section.id.clone(),
                SgxEpcSection::new(
                    GuestAddress(epc_section_start),
                    epc_section.size as GuestUsize,
                ),
            );

            epc_section_start += epc_section.size;
        }

        self.sgx_epc_region = Some(sgx_epc_region);

        Ok(())
    }

    #[cfg(target_arch = "x86_64")]
    pub fn sgx_epc_region(&self) -> &Option<SgxEpcRegion> {
        &self.sgx_epc_region
    }

    pub fn is_hardlink(f: &File) -> bool {
        let mut stat = std::mem::MaybeUninit::<libc::stat>::uninit();
        let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) };
        if ret != 0 {
            error!("Couldn't fstat the backing file");
            return false;
        }

        unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
    }

    pub fn memory_zones(&self) -> &MemoryZones {
        &self.memory_zones
    }
}

#[cfg(feature = "acpi")]
struct MemoryNotify {
    slot_id: usize,
}

#[cfg(feature = "acpi")]
impl Aml for MemoryNotify {
    fn to_aml_bytes(&self) -> Vec<u8> {
        let object = aml::Path::new(&format!("M{:03}", self.slot_id));
        aml::If::new(
            &aml::Equal::new(&aml::Arg(0), &self.slot_id),
            vec![&aml::Notify::new(&object, &aml::Arg(1))],
        )
        .to_aml_bytes()
    }
}

#[cfg(feature = "acpi")]
struct MemorySlot {
    slot_id: usize,
}

#[cfg(feature = "acpi")]
impl Aml for MemorySlot {
    fn to_aml_bytes(&self) -> Vec<u8> {
        aml::Device::new(
            format!("M{:03}", self.slot_id).as_str().into(),
            vec![
                &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C80")),
                &aml::Name::new("_UID".into(), &self.slot_id),
                /*
                _STA return value:
                Bit [0] – Set if the device is present.
                Bit [1] – Set if the device is enabled and decoding its resources.
                Bit [2] – Set if the device should be shown in the UI.
                Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics).
                Bit [4] – Set if the battery is present.
                Bits [31:5] – Reserved (must be cleared).
                */
                &aml::Method::new(
                    "_STA".into(),
                    0,
                    false,
                    // Call into MSTA method which will interrogate device
                    vec![&aml::Return::new(&aml::MethodCall::new(
                        "MSTA".into(),
                        vec![&self.slot_id],
                    ))],
                ),
                // Get details of memory
                &aml::Method::new(
                    "_CRS".into(),
                    0,
                    false,
                    // Call into MCRS which provides actual memory details
                    vec![&aml::Return::new(&aml::MethodCall::new(
                        "MCRS".into(),
                        vec![&self.slot_id],
                    ))],
                ),
            ],
        )
        .to_aml_bytes()
    }
}

#[cfg(feature = "acpi")]
struct MemorySlots {
    slots: usize,
}

#[cfg(feature = "acpi")]
impl Aml for MemorySlots {
    fn to_aml_bytes(&self) -> Vec<u8> {
        let mut bytes = Vec::new();

        for slot_id in 0..self.slots {
            bytes.extend_from_slice(&MemorySlot { slot_id }.to_aml_bytes());
        }

        bytes
    }
}

#[cfg(feature = "acpi")]
struct MemoryMethods {
    slots: usize,
}

#[cfg(feature = "acpi")]
impl Aml for MemoryMethods {
    fn to_aml_bytes(&self) -> Vec<u8> {
        let mut bytes = Vec::new();
        // Add "MTFY" notification method
        let mut memory_notifies = Vec::new();
        for slot_id in 0..self.slots {
            memory_notifies.push(MemoryNotify { slot_id });
        }

        let mut memory_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
        for memory_notifier in memory_notifies.iter() {
            memory_notifies_refs.push(memory_notifier);
        }

        bytes.extend_from_slice(
            &aml::Method::new("MTFY".into(), 2, true, memory_notifies_refs).to_aml_bytes(),
        );

        // MSCN method
        bytes.extend_from_slice(
            &aml::Method::new(
                "MSCN".into(),
                0,
                true,
                vec![
                    // Take lock defined above
                    &aml::Acquire::new("MLCK".into(), 0xffff),
                    &aml::Store::new(&aml::Local(0), &aml::ZERO),
                    &aml::While::new(
                        &aml::LessThan::new(&aml::Local(0), &self.slots),
                        vec![
                            // Write slot number (in first argument) to I/O port via field
                            &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Local(0)),
                            // Check if MINS bit is set (inserting)
                            &aml::If::new(
                                &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MINS"), &aml::ONE),
                                // Notify device if it is
                                vec![
                                    &aml::MethodCall::new(
                                        "MTFY".into(),
                                        vec![&aml::Local(0), &aml::ONE],
                                    ),
                                    // Reset MINS bit
                                    &aml::Store::new(
                                        &aml::Path::new("\\_SB_.MHPC.MINS"),
                                        &aml::ONE,
                                    ),
                                ],
                            ),
                            // Check if MRMV bit is set
                            &aml::If::new(
                                &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MRMV"), &aml::ONE),
                                // Notify device if it is (with the eject constant 0x3)
                                vec![
                                    &aml::MethodCall::new(
                                        "MTFY".into(),
                                        vec![&aml::Local(0), &3u8],
                                    ),
                                    // Reset MRMV bit
                                    &aml::Store::new(
                                        &aml::Path::new("\\_SB_.MHPC.MRMV"),
                                        &aml::ONE,
                                    ),
                                ],
                            ),
                            &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE),
                        ],
                    ),
                    // Release lock
                    &aml::Release::new("MLCK".into()),
                ],
            )
            .to_aml_bytes(),
        );

        bytes.extend_from_slice(
            // Memory status method
            &aml::Method::new(
                "MSTA".into(),
                1,
                true,
                vec![
                    // Take lock defined above
                    &aml::Acquire::new("MLCK".into(), 0xffff),
                    // Write slot number (in first argument) to I/O port via field
                    &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
                    &aml::Store::new(&aml::Local(0), &aml::ZERO),
                    // Check if MEN_ bit is set, if so make the local variable 0xf (see _STA for details of meaning)
                    &aml::If::new(
                        &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MEN_"), &aml::ONE),
                        vec![&aml::Store::new(&aml::Local(0), &0xfu8)],
                    ),
                    // Release lock
                    &aml::Release::new("MLCK".into()),
                    // Return 0 or 0xf
                    &aml::Return::new(&aml::Local(0)),
                ],
            )
            .to_aml_bytes(),
        );

        bytes.extend_from_slice(
            // Memory range method
            &aml::Method::new(
                "MCRS".into(),
                1,
                true,
                vec![
                    // Take lock defined above
                    &aml::Acquire::new("MLCK".into(), 0xffff),
                    // Write slot number (in first argument) to I/O port via field
                    &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
                    &aml::Name::new(
                        "MR64".into(),
                        &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
                            aml::AddressSpaceCachable::Cacheable,
                            true,
                            0x0000_0000_0000_0000u64,
                            0xFFFF_FFFF_FFFF_FFFEu64,
                        )]),
                    ),
                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &14usize, "MINL".into()),
                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &18usize, "MINH".into()),
                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &22usize, "MAXL".into()),
                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &26usize, "MAXH".into()),
                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &38usize, "LENL".into()),
                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &42usize, "LENH".into()),
                    &aml::Store::new(&aml::Path::new("MINL"), &aml::Path::new("\\_SB_.MHPC.MHBL")),
                    &aml::Store::new(&aml::Path::new("MINH"), &aml::Path::new("\\_SB_.MHPC.MHBH")),
                    &aml::Store::new(&aml::Path::new("LENL"), &aml::Path::new("\\_SB_.MHPC.MHLL")),
                    &aml::Store::new(&aml::Path::new("LENH"), &aml::Path::new("\\_SB_.MHPC.MHLH")),
                    &aml::Add::new(
                        &aml::Path::new("MAXL"),
                        &aml::Path::new("MINL"),
                        &aml::Path::new("LENL"),
                    ),
                    &aml::Add::new(
                        &aml::Path::new("MAXH"),
                        &aml::Path::new("MINH"),
                        &aml::Path::new("LENH"),
                    ),
                    &aml::If::new(
                        &aml::LessThan::new(&aml::Path::new("MAXL"), &aml::Path::new("MINL")),
                        vec![&aml::Add::new(
                            &aml::Path::new("MAXH"),
                            &aml::ONE,
                            &aml::Path::new("MAXH"),
                        )],
                    ),
                    &aml::Subtract::new(
                        &aml::Path::new("MAXL"),
                        &aml::Path::new("MAXL"),
                        &aml::ONE,
                    ),
                    // Release lock
                    &aml::Release::new("MLCK".into()),
                    &aml::Return::new(&aml::Path::new("MR64")),
                ],
            )
            .to_aml_bytes(),
        );
        bytes
    }
}

#[cfg(feature = "acpi")]
impl Aml for MemoryManager {
    fn to_aml_bytes(&self) -> Vec<u8> {
        let mut bytes = Vec::new();

        // Memory Hotplug Controller
        bytes.extend_from_slice(
            &aml::Device::new(
                "_SB_.MHPC".into(),
                vec![
                    &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
                    &aml::Name::new("_UID".into(), &"Memory Hotplug Controller"),
                    // Mutex to protect concurrent access as we write to choose slot and then read back status
                    &aml::Mutex::new("MLCK".into(), 0),
                    &aml::Name::new(
                        "_CRS".into(),
                        &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
                            aml::AddressSpaceCachable::NotCacheable,
                            true,
                            self.acpi_address.0 as u64,
                            self.acpi_address.0 + MEMORY_MANAGER_ACPI_SIZE as u64 - 1,
                        )]),
                    ),
                    // OpRegion and Fields map MMIO range into individual field values
                    &aml::OpRegion::new(
                        "MHPR".into(),
                        aml::OpRegionSpace::SystemMemory,
                        self.acpi_address.0 as usize,
                        MEMORY_MANAGER_ACPI_SIZE,
                    ),
                    &aml::Field::new(
                        "MHPR".into(),
                        aml::FieldAccessType::DWord,
                        aml::FieldUpdateRule::Preserve,
                        vec![
                            aml::FieldEntry::Named(*b"MHBL", 32), // Base (low 4 bytes)
                            aml::FieldEntry::Named(*b"MHBH", 32), // Base (high 4 bytes)
                            aml::FieldEntry::Named(*b"MHLL", 32), // Length (low 4 bytes)
                            aml::FieldEntry::Named(*b"MHLH", 32), // Length (high 4 bytes)
                        ],
                    ),
                    &aml::Field::new(
                        "MHPR".into(),
                        aml::FieldAccessType::DWord,
                        aml::FieldUpdateRule::Preserve,
                        vec![
                            aml::FieldEntry::Reserved(128),
                            aml::FieldEntry::Named(*b"MHPX", 32), // PXM
                        ],
                    ),
                    &aml::Field::new(
                        "MHPR".into(),
                        aml::FieldAccessType::Byte,
                        aml::FieldUpdateRule::WriteAsZeroes,
                        vec![
                            aml::FieldEntry::Reserved(160),
                            aml::FieldEntry::Named(*b"MEN_", 1), // Enabled
                            aml::FieldEntry::Named(*b"MINS", 1), // Inserting
                            aml::FieldEntry::Named(*b"MRMV", 1), // Removing
                            aml::FieldEntry::Named(*b"MEJ0", 1), // Ejecting
                        ],
                    ),
                    &aml::Field::new(
                        "MHPR".into(),
                        aml::FieldAccessType::DWord,
                        aml::FieldUpdateRule::Preserve,
                        vec![
                            aml::FieldEntry::Named(*b"MSEL", 32), // Selector
                            aml::FieldEntry::Named(*b"MOEV", 32), // Event
                            aml::FieldEntry::Named(*b"MOSC", 32), // OSC
                        ],
                    ),
                    &MemoryMethods {
                        slots: self.hotplug_slots.len(),
                    },
                    &MemorySlots {
                        slots: self.hotplug_slots.len(),
                    },
                ],
            )
            .to_aml_bytes(),
        );

        #[cfg(target_arch = "x86_64")]
        {
            if let Some(sgx_epc_region) = &self.sgx_epc_region {
                let min = sgx_epc_region.start().raw_value() as u64;
                let max = min + sgx_epc_region.size() as u64 - 1;
                // SGX EPC region
                bytes.extend_from_slice(
                    &aml::Device::new(
                        "_SB_.EPC_".into(),
                        vec![
                            &aml::Name::new("_HID".into(), &aml::EisaName::new("INT0E0C")),
                            // QWORD describing the EPC region start and size
                            &aml::Name::new(
                                "_CRS".into(),
                                &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
                                    aml::AddressSpaceCachable::NotCacheable,
                                    true,
                                    min,
                                    max,
                                )]),
                            ),
                            &aml::Method::new(
                                "_STA".into(),
                                0,
                                false,
                                vec![&aml::Return::new(&0xfu8)],
                            ),
                        ],
                    )
                    .to_aml_bytes(),
                );
            }
        }

        bytes
    }
}

impl Pausable for MemoryManager {}

#[derive(Clone, Versionize)]
pub struct MemoryRegion {
    content: Option<String>,
    start_addr: u64,
    size: u64,
}

#[derive(Versionize)]
pub struct MemoryManagerSnapshotData {
    memory_regions: Vec<MemoryRegion>,
}

impl VersionMapped for MemoryManagerSnapshotData {}

impl Snapshottable for MemoryManager {
    fn id(&self) -> String {
        MEMORY_MANAGER_SNAPSHOT_ID.to_string()
    }

    fn snapshot(&mut self) -> result::Result<Snapshot, MigratableError> {
        let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID);
        let guest_memory = self.guest_memory.memory();

        let mut memory_regions: Vec<MemoryRegion> = Vec::new();

        for (index, region) in guest_memory.iter().enumerate() {
            if region.len() == 0 {
                return Err(MigratableError::Snapshot(anyhow!("Zero length region")));
            }

            let mut content = Some(PathBuf::from(format!("memory-region-{}", index)));
            if let Some(file_offset) = region.file_offset() {
                if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED)
                    && Self::is_hardlink(file_offset.file())
                {
                    // In this very specific case, we know the memory region
                    // is backed by a file on the host filesystem that can be
                    // accessed by the user, and additionally the mapping is
                    // shared, which means that modifications to the content
                    // are written to the actual file.
                    // When meeting these conditions, we can skip the copy of
                    // the memory content for this specific region, as we can
                    // assume the user will have it saved through the backing
                    // file already.
                    content = None;
                }
            }

            memory_regions.push(MemoryRegion {
                content: content.map(|p| p.to_str().unwrap().to_owned()),
                start_addr: region.start_addr().0,
                size: region.len(),
            });
        }

        // Store locally this list of regions as it will be used through the
        // Transportable::send() implementation. The point is to avoid the
        // duplication of code regarding the creation of the path for each
        // region. The 'snapshot' step creates the list of memory regions,
        // including information about the need to copy a memory region or
        // not. This saves the 'send' step having to go through the same
        // process, and instead it can directly proceed with storing the
        // memory region content for the regions requiring it.
        self.snapshot_memory_regions = memory_regions.clone();

        memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state(
            MEMORY_MANAGER_SNAPSHOT_ID,
            &MemoryManagerSnapshotData { memory_regions },
        )?);

        let mut memory_snapshot = self.snapshot.lock().unwrap();
        *memory_snapshot = Some(guest_memory);

        Ok(memory_manager_snapshot)
    }
}

impl Transportable for MemoryManager {
    fn send(
        &self,
        _snapshot: &Snapshot,
        destination_url: &str,
    ) -> result::Result<(), MigratableError> {
        let vm_memory_snapshot_path = url_to_path(destination_url)?;

        if let Some(guest_memory) = &*self.snapshot.lock().unwrap() {
            for region in self.snapshot_memory_regions.iter() {
                if let Some(content) = &region.content {
                    let mut memory_region_path = vm_memory_snapshot_path.clone();
                    memory_region_path.push(content);

                    // Create the snapshot file for the region
                    let mut memory_region_file = OpenOptions::new()
                        .read(true)
                        .write(true)
                        .create_new(true)
                        .open(memory_region_path)
                        .map_err(|e| MigratableError::MigrateSend(e.into()))?;

                    guest_memory
                        .write_all_to(
                            GuestAddress(region.start_addr),
                            &mut memory_region_file,
                            region.size as usize,
                        )
                        .map_err(|e| MigratableError::MigrateSend(e.into()))?;
                }
            }
        }
        Ok(())
    }
}

impl Migratable for MemoryManager {
    // Start the dirty log in the hypervisor (kvm/mshv).
    // Also, reset the dirty bitmap logged by the vmm.
    // Just before we do a bulk copy we want to start/clear the dirty log so that
    // pages touched during our bulk copy are tracked.
    fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
        self.vm.start_dirty_log().map_err(|e| {
            MigratableError::MigrateSend(anyhow!("Error starting VM dirty log {}", e))
        })?;

        for r in self.guest_memory.memory().iter() {
            r.bitmap().reset();
        }

        Ok(())
    }

    fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
        self.vm.stop_dirty_log().map_err(|e| {
            MigratableError::MigrateSend(anyhow!("Error stopping VM dirty log {}", e))
        })?;

        Ok(())
    }

    // Generate a table for the pages that are dirty. The dirty pages are collapsed
    // together in the table if they are contiguous.
    fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
        let mut table = MemoryRangeTable::default();
        for r in &self.guest_ram_mappings {
            let vm_dirty_bitmap = self.vm.get_dirty_log(r.slot, r.gpa, r.size).map_err(|e| {
                MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
            })?;
            let vmm_dirty_bitmap = match self.guest_memory.memory().find_region(GuestAddress(r.gpa))
            {
                Some(region) => {
                    assert!(region.start_addr().raw_value() == r.gpa);
                    assert!(region.len() == r.size);
                    region.bitmap().get_and_reset()
                }
                None => {
                    return Err(MigratableError::MigrateSend(anyhow!(
                        "Error finding 'guest memory region' with address {:x}",
                        r.gpa
                    )))
                }
            };

            let dirty_bitmap: Vec<u64> = vm_dirty_bitmap
                .iter()
                .zip(vmm_dirty_bitmap.iter())
                .map(|(x, y)| x | y)
                .collect();

            let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa);

            if sub_table.regions().is_empty() {
                info!("Dirty Memory Range Table is empty");
            } else {
                info!("Dirty Memory Range Table:");
                for range in sub_table.regions() {
                    info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024);
                }
            }

            table.extend(sub_table);
        }
        Ok(table)
    }
}
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								// Copyright © 2019 Intel Corporation
 								//
 								// SPDX-License-Identifier: Apache-2.0
 								//
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								#[cfg(target_arch = "x86_64")]
 								use crate::config::SgxEpcConfig;
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								use crate::config::{HotplugMethod, MemoryConfig, MemoryZoneConfig};
-												vmm: Simplify snapshot/restore path handling

Extend the existing url_to_path() to take the URL string and then use
that to simplify the snapshot/restore code paths.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-12 10:46:36 +00:00
+								use crate::migration::url_to_path;
-												vmm: vm: Implement the Snapshottable trait

By aggregating snapshots from the CpuManager, the MemoryManager and the
DeviceManager, Vm implements the snapshot() function from the
Snapshottable trait.
And by restoring snapshots from the CpuManager, the MemoryManager and
the DeviceManager, Vm implements the restore() function from the
Snapshottable trait.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2019-05-12 11:53:47 +00:00
+								use crate::MEMORY_MANAGER_SNAPSHOT_ID;
-												misc: Upgrade to use the vm-memory crate w/ dirty-page-tracking

As the first step to complete live-migration with tracking dirty-pages
written by the VMM, this commit patches the dependent vm-memory crate to
the upstream version with the dirty-page-tracking capability. Most
changes are due to the updated `GuestMemoryMmap`, `GuestRegionMmap`, and
`MmapRegion` structs which are taking an additional generic type
parameter to specify what 'bitmap backend' is used.

The above changes should be transparent to the rest of the code base,
e.g. all unit/integration tests should pass without additional changes.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-06-02 19:08:04 +00:00
+								use crate::{GuestMemoryMmap, GuestRegionMmap};
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								#[cfg(feature = "acpi")]
 								use acpi_tables::{aml, aml::Aml};
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								use anyhow::anyhow;
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								#[cfg(target_arch = "x86_64")]
 								use arch::x86_64::{SgxEpcRegion, SgxEpcSection};
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								use arch::{layout, RegionType};
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								#[cfg(target_arch = "x86_64")]
 								use devices::ioapic;
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								#[cfg(target_arch = "x86_64")]
 								use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
-												vmm: Move NUMA node list creation to Vm structure

Based on the previous changes introducing new options for both memory
zones and NUMA configuration, this patch changes the behavior of the
NUMA node definition. Instead of relying on the memory zones to define
the guest NUMA nodes, everything goes through the --numa parameter. This
allows for defining NUMA nodes without associating any particular memory
range to it. And in case one wants to associate one or multiple memory
ranges to it, the expectation is to describe a list of memory zone
through the --numa parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 09:26:43 +00:00
+								use std::collections::HashMap;
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								use std::convert::TryInto;
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								use std::ffi;
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								use std::fs::{File, OpenOptions};
 								use std::io;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								use std::ops::Deref;
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								use std::path::PathBuf;
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								use std::result;
-												misc: Make Bus::write() return an Option<Arc<Barrier>>

This can be uses to indicate to the caller that it should wait on the
barrier before returning as there is some asynchronous activity
triggered by the write which requires the KVM exit to block until it's
completed.

This is useful for having vCPU thread wait for the VMM thread to proceed
to activate the virtio devices.

See #1863

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-12-04 09:23:47 +00:00
+								use std::sync::{Arc, Barrier, Mutex};
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								use versionize::{VersionMap, Versionize, VersionizeResult};
 								use versionize_derive::Versionize;
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								#[cfg(target_arch = "x86_64")]
 								use vm_allocator::GsiApic;
 								use vm_allocator::SystemAllocator;
-												devices, vm-device: Move BusDevice and Bus into vm-device

This removes the dependency of the pci crate on the devices crate which
now only contains the device implementations themselves.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-09-09 14:30:31 +00:00
+								use vm_device::BusDevice;
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								use vm_memory::guest_memory::FileOffset;
 								use vm_memory::{
-												vmm: memory_manager: Implement the Transportable trait

This implements the send() function of the Transportable trait, so that
the guest memory regions can be saved into one file per region.

This will need to be extended for live migration, as it will require
other transport methods and the recv() function will need to be
implemented too.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-02-25 01:38:08 +00:00
+								    mmap::MmapRegionError, Address, Bytes, Error as MmapError, GuestAddress, GuestAddressSpace,
-												misc: Upgrade to use the vm-memory crate w/ dirty-page-tracking

As the first step to complete live-migration with tracking dirty-pages
written by the VMM, this commit patches the dependent vm-memory crate to
the upstream version with the dirty-page-tracking capability. Most
changes are due to the updated `GuestMemoryMmap`, `GuestRegionMmap`, and
`MmapRegion` structs which are taking an additional generic type
parameter to specify what 'bitmap backend' is used.

The above changes should be transparent to the rest of the code base,
e.g. all unit/integration tests should pass without additional changes.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-06-02 19:08:04 +00:00
+								    GuestMemory, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
 								    GuestUsize, MmapRegion,
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								};
 								use vm_migration::{
-												vm-migration, vmm: Extend methods for MemoryRangeTable

In anticipation for supporting the merge of multiple dirty pages coming
from multiple devices, this patch factorizes the creation of a
MemoryRangeTable from a bitmap, as well as providing a simple method for
merging the dirty pages regions under a single MemoryRangeTable.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-08-04 13:30:28 +00:00
+								    protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
 								    SnapshotDataSection, Snapshottable, Transportable, VersionMapped,
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								};
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								#[cfg(feature = "acpi")]
 								pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18;
-												main, vmm: Add mandatory id to memory zones

In anticipation for allowing memory zones to be removed, but also in
anticipation for refactoring NUMA parameter, we introduce a mandatory
'id' option to the --memory-zone parameter.

This forces the user to provide a unique identifier for each memory zone
so that we can refer to these.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-03 18:06:38 +00:00
+								const DEFAULT_MEMORY_ZONE: &str = "mem0";
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								#[cfg(target_arch = "x86_64")]
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								const X86_64_IRQ_BASE: u32 = 5;
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								const HOTPLUG_COUNT: usize = 8;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								// Memory policy constants
 								const MPOL_BIND: u32 = 2;
-												vmm: fix clippy warnings

Signed-off-by: Praveen Paladugu <prapal@microsoft.com>

											
										
										
											2020-09-24 21:16:38 +00:00
+								const MPOL_MF_STRICT: u32 = 1;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								const MPOL_MF_MOVE: u32 = 1 << 1;
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								#[derive(Default)]
 								struct HotPlugState {
 								    base: u64,
 								    length: u64,
 								    active: bool,
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								    inserting: bool,
 								    removing: bool,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								}
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								pub struct VirtioMemZone {
 								    region: Arc<GuestRegionMmap>,
 								    resize_handler: virtio_devices::Resize,
-												vmm: Add hotplugged_size to VirtioMemZone

Adding a new field to VirtioMemZone structure, as it lets us associate
with a particular virtio-mem region the amount of memory that should be
plugged in at boot.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:36:58 +00:00
+								    hotplugged_size: u64,
-												vmm, virtio-devices: mem: Don't use MADV_DONTNEED on hugepages

This commit introduces a new information to the VirtioMemZone structure
in order to know if the memory zone is backed by hugepages.

Based on this new information, the virtio-mem device is now able to
determine if madvise(MADV_DONTNEED) should be performed or not. The
madvise documentation specifies that MADV_DONTNEED advice will fail if
the memory range has been allocated with some hugepages.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2021-02-03 08:46:14 +00:00
+								    hugepages: bool,
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								}
 								impl VirtioMemZone {
 								    pub fn region(&self) -> &Arc<GuestRegionMmap> {
 								        &self.region
 								    }
 								    pub fn resize_handler(&self) -> &virtio_devices::Resize {
 								        &self.resize_handler
 								    }
-												vmm: Add hotplugged_size to VirtioMemZone

Adding a new field to VirtioMemZone structure, as it lets us associate
with a particular virtio-mem region the amount of memory that should be
plugged in at boot.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:36:58 +00:00
+								    pub fn hotplugged_size(&self) -> u64 {
 								        self.hotplugged_size
 								    }
-												vmm, virtio-devices: mem: Don't use MADV_DONTNEED on hugepages

This commit introduces a new information to the VirtioMemZone structure
in order to know if the memory zone is backed by hugepages.

Based on this new information, the virtio-mem device is now able to
determine if madvise(MADV_DONTNEED) should be performed or not. The
madvise documentation specifies that MADV_DONTNEED advice will fail if
the memory range has been allocated with some hugepages.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2021-02-03 08:46:14 +00:00
+								    pub fn hugepages(&self) -> bool {
 								        self.hugepages
 								    }
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								}
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								#[derive(Default)]
 								pub struct MemoryZone {
 								    regions: Vec<Arc<GuestRegionMmap>>,
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								    virtio_mem_zone: Option<VirtioMemZone>,
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								}
 								impl MemoryZone {
 								    pub fn regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
 								        &self.regions
 								    }
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								    pub fn virtio_mem_zone(&self) -> &Option<VirtioMemZone> {
 								        &self.virtio_mem_zone
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								    }
 								}
 								pub type MemoryZones = HashMap<String, MemoryZone>;
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								struct GuestRamMapping {
 								    slot: u32,
 								    gpa: u64,
 								    size: u64,
 								}
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								pub struct MemoryManager {
-												vmm: Store boot guest memory and use it for boot sequence

In order to differentiate the 'boot' memory regions from the virtio-mem
regions, we store what we call 'boot_guest_memory'. This is useful to
provide the adequate list of regions to the configure_system() function
as it expects only the list of regions that should be exposed through
the e820 table.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:24:42 +00:00
+								    boot_guest_memory: GuestMemoryMmap,
-												vmm: Move codebase to GuestMemoryAtomic from vm-memory

Relying on the latest vm-memory version, including the freshly
introduced structure GuestMemoryAtomic, this patch replaces every
occurrence of Arc<ArcSwap<GuestMemoryMmap> with
GuestMemoryAtomic<GuestMemoryMmap>.

The point is to rely on the common RCU-like implementation from
vm-memory so that we don't have to do it from Cloud-Hypervisor.

Fixes #735

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-02-11 16:22:40 +00:00
+								    guest_memory: GuestMemoryAtomic<GuestMemoryMmap>,
-												vmm: drop "kvm" from memory slot code

The code is purely for maintaining an internal counter. It is not really
tied to KVM.

No functional change.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:08:52 +00:00
+								    next_memory_slot: u32,
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								    start_of_device_area: GuestAddress,
 								    end_of_device_area: GuestAddress,
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								    pub vm: Arc<dyn hypervisor::Vm>,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								    hotplug_slots: Vec<HotPlugState>,
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								    selected_slot: usize,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								    mergeable: bool,
 								    allocator: Arc<Mutex<SystemAllocator>>,
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								    hotplug_method: HotplugMethod,
 								    boot_ram: u64,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								    current_ram: u64,
 								    next_hotplug_slot: usize,
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								    snapshot: Mutex<Option<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								    shared: bool,
 								    hugepages: bool,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								    hugepage_size: Option<u64>,
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								    #[cfg(target_arch = "x86_64")]
 								    sgx_epc_region: Option<SgxEpcRegion>,
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								    user_provided_zones: bool,
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								    snapshot_memory_regions: Vec<MemoryRegion>,
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								    memory_zones: MemoryZones,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								    log_dirty: bool, // Enable dirty logging for created RAM regions
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
 								    // Keep track of calls to create_userspace_mapping() for guest RAM.
 								    // This is useful for getting the dirty pages as we need to know the
 								    // slots that the mapping is created in.
 								    guest_ram_mappings: Vec<GuestRamMapping>,
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
 								    #[cfg(feature = "acpi")]
 								    pub acpi_address: GuestAddress,
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								}
 								#[derive(Debug)]
 								pub enum Error {
 								    /// Failed to create shared file.
 								    SharedFileCreate(io::Error),
 								    /// Failed to set shared file length.
 								    SharedFileSetLen(io::Error),
 								    /// Mmap backed guest memory error
 								    GuestMemory(MmapError),
 								    /// Failed to allocate a memory range.
 								    MemoryRangeAllocation,
-												vmm: memory_manager: Refactor GuestMemoryMmap construction

Make the GuestMemoryMmap from a Vec<Arc<GuestRegionMmap>> by using this
method we can persist a set of regions in the MemoryManager and then
extend this set with a newly created region. Ultimately that will allow
the hotplug of memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-31 11:58:07 +00:00
 								    /// Error from region creation
 								    GuestMemoryRegion(MmapRegionError),
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
 								    /// No ACPI slot available
 								    NoSlotAvailable,
 								    /// Not enough space in the hotplug RAM region
-												vmm: Address Rust 1.51.0 clippy issue (upper_case_acroynms)

warning: name `LocalAPIC` contains a capitalized acronym
   --> vmm/src/cpu.rs:197:8
    |
197 | struct LocalAPIC {
    |        ^^^^^^^^^ help: consider making the acronym lowercase, except the initial letter: `LocalApic`
    |
    = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 17:01:21 +00:00
+								    InsufficientHotplugRam,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
 								    /// The requested hotplug memory addition is not a valid size
 								    InvalidSize,
-												vmm: Fix map_err losing the inner error

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-01-24 08:34:51 +00:00
-												vmm: hypervisor: split set_user_memory_region to two functions

Previously the same function was used to both create and remove regions.
This worked on KVM because it uses size 0 to indicate removal.

MSHV has two calls -- one for creation and one for removal. It also
requires having the size field available because it is not slot based.

Split set_user_memory_region to {create/remove}_user_memory_region. For
KVM they still use set_user_memory_region underneath, but for MSHV they
map to different functions.

This fixes user memory region removal on MSHV.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2021-07-03 13:58:39 +00:00
+								    /// Failed to create the user memory region.
 								    CreateUserMemoryRegion(hypervisor::HypervisorVmError),
 								    /// Failed to remove the user memory region.
 								    RemoveUserMemoryRegion(hypervisor::HypervisorVmError),
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
 								    /// Failed to EventFd.
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								    EventFdFail(io::Error),
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
 								    /// Eventfd write error
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								    EventfdError(io::Error),
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
 								    /// Failed to virtio-mem resize
-												vm-virtio, virtio-devices: Split device implementation from virt queues

Split the generic virtio code (queues and device type) from the
VirtioDevice trait, transport and device implementations.

This also simplifies the feature handling in vhost_user_backend as the
vm-virtio crate is no longer has any features.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-07-02 12:25:19 +00:00
+								    VirtioMemResizeFail(virtio_devices::mem::Error),
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
+								    /// Cannot restore VM
 								    Restore(MigratableError),
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								    /// Cannot create the system allocator
 								    CreateSystemAllocator,
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								    /// Invalid SGX EPC section size
 								    #[cfg(target_arch = "x86_64")]
 								    EpcSectionSizeInvalid,
 								    /// Failed allocating SGX EPC region
 								    #[cfg(target_arch = "x86_64")]
 								    SgxEpcRangeAllocation,
 								    /// Failed opening SGX virtual EPC device
 								    #[cfg(target_arch = "x86_64")]
 								    SgxVirtEpcOpen(io::Error),
 								    /// Failed setting the SGX virtual EPC section size
 								    #[cfg(target_arch = "x86_64")]
 								    SgxVirtEpcFileSetLen(io::Error),
-												vmm: Enable provisioning for SGX guest

The guest can see that SGX supports provisioning as it is exposed
through the CPUID. This patch enables the proper backing of this
feature by having the host open the provisioning device and enable
this capability through the hypervisor.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-07-07 10:19:26 +00:00
+								    /// Failed opening SGX provisioning device
 								    #[cfg(target_arch = "x86_64")]
 								    SgxProvisionOpen(io::Error),
 								    /// Failed enabling SGX provisioning
 								    #[cfg(target_arch = "x86_64")]
 								    SgxEnableProvisioning(hypervisor::HypervisorVmError),
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								    /// Failed creating a new MmapRegion instance.
 								    #[cfg(target_arch = "x86_64")]
 								    NewMmapRegion(vm_memory::mmap::MmapRegionError),
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
 								    /// No memory zones found.
 								    MissingMemoryZones,
 								    /// Memory configuration is not valid.
 								    InvalidMemoryParameters,
 								    /// Forbidden operation. Impossible to resize guest memory if it is
 								    /// backed by user defined memory regions.
 								    InvalidResizeWithMemoryZones,
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
 								    /// It's invalid to try applying a NUMA policy to a memory zone that is
 								    /// memory mapped with MAP_SHARED.
 								    InvalidSharedMemoryZoneWithHostNuma,
 								    /// Failed applying NUMA memory policy.
 								    ApplyNumaPolicy(io::Error),
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
 								    /// Memory zone identifier is not unique.
 								    DuplicateZoneId,
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
 								    /// No virtio-mem resizing handler found.
 								    MissingVirtioMemHandler,
-												vmm: memory_manager: Make virtiomem_resize function generic

By adding a new parameter 'id' to the virtiomem_resize() function, we
prepare this function to be usable for both global memory resizing and
memory zone resizing.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 16:24:06 +00:00
+								    /// Unknown memory zone.
 								    UnknownMemoryZone,
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
 								    /// Invalid size for resizing. Can be anything except 0.
 								    InvalidHotplugSize,
-												vmm: Add 'hotplug_size' to memory zones

In anticipation for resizing support of an individual memory zone,
this commit introduces a new option 'hotplug_size' to '--memory-zone'
parameter. This defines the amount of memory that can be added through
each specific memory zone.

Because memory zone resize is tied to virtio-mem, make sure the user
selects 'virtio-mem' hotplug method, otherwise return an error.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 10:20:22 +00:00
 								    /// Invalid hotplug method associated with memory zones resizing capability.
 								    InvalidHotplugMethodWithMemoryZones,
-												vmm: memory_manager: Create one virtio-mem per memory zone

Based on the previous code changes, we can now update the MemoryManager
code to create one virtio-mem region and resizing handler per memory
zone. This will naturally create one virtio-mem device per memory zone
from the DeviceManager's code which has been previously updated as well.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 12:20:26 +00:00
 								    /// Could not find specified memory zone identifier from hash map.
 								    MissingZoneIdentifier,
-												vmm: Add a 'resize-zone' action to the API actions

Implement a new VM action called 'resize-zone' allowing the user to
resize one specific memory zone at a time. This relies on all the
preliminary work from the previous commits to resize each virtio-mem
device independently from each others.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 15:34:15 +00:00
 								    /// Resizing the memory zone failed.
 								    ResizeZone,
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
 								    /// Guest address overflow
 								    GuestAddressOverFlow,
-												vmm: memory_manager: Completely fill guest ram from snapshot

Use GuestRegionMmap::read_exact_from() to ensure that all of the file is
read into the guest. This addresses an issue where
GuestRegionMmap::read_from() was only copying the first 2GiB of the
memory and so lead to snapshot-restore was failing when the guest RAM
was 2GiB or greater.

This change also propagates any error from the copying upwards.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-10-23 16:00:03 +00:00
 								    /// Error opening snapshot file
 								    SnapshotOpen(io::Error),
 								    // Error copying snapshot into region
 								    SnapshotCopy(GuestMemoryError),
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
 								    /// Failed to allocate MMIO address
-												vmm: Address Rust 1.51.0 clippy issue (upper_case_acroynms)

warning: name `LocalAPIC` contains a capitalized acronym
   --> vmm/src/cpu.rs:197:8
    |
197 | struct LocalAPIC {
    |        ^^^^^^^^^ help: consider making the acronym lowercase, except the initial letter: `LocalApic`
    |
    = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 17:01:21 +00:00
+								    AllocateMmioAddress,
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								}
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								const ENABLE_FLAG: usize = 0;
 								const INSERTING_FLAG: usize = 1;
 								const REMOVING_FLAG: usize = 2;
 								const EJECT_FLAG: usize = 3;
 								const BASE_OFFSET_LOW: u64 = 0;
 								const BASE_OFFSET_HIGH: u64 = 0x4;
 								const LENGTH_OFFSET_LOW: u64 = 0x8;
-												vmm: Fix LENGTH_OFFSET_HIGH of MemoryManager

Signed-off-by: Qiu Wenbo <qiuwenbo@phytium.com.cn>

											
										
										
											2020-01-21 17:21:32 +00:00
+								const LENGTH_OFFSET_HIGH: u64 = 0xC;
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								const STATUS_OFFSET: u64 = 0x14;
 								const SELECTION_OFFSET: u64 = 0;
-												memory_manager: Make addressable space size 64k aligned

While the addressable space size reduction of 4k in necessary due to
the Linux bug, the 64k alignment of the addressable space size is
required by Windows. This patch satisfies both.

Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>

											
										
										
											2020-11-14 14:34:18 +00:00
+								// The MMIO address space size is subtracted with 64k. This is done for the
 								// following reasons:
 								//  - Reduce the addressable space size by at least 4k to workaround a Linux
 								//    bug when the VMM allocates devices at the end of the addressable space
 								//  - Windows requires the addressable space size to be 64k aligned
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								fn mmio_address_space_size(phys_bits: u8) -> u64 {
-												memory_manager: Make addressable space size 64k aligned

While the addressable space size reduction of 4k in necessary due to
the Linux bug, the 64k alignment of the addressable space size is
required by Windows. This patch satisfies both.

Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>

											
										
										
											2020-11-14 14:34:18 +00:00
+								    (1 << phys_bits) - (1 << 16)
-												vmm: Reduce MMIO address space by 4KiB

In order to workaround a Linux bug that happens when we place devices at
the end of the physical address space on recent hardware (52 bits limit)
we reduce the MMIO address space by one 4k page. This way, nothing gets
allocated in the last 4k of the address space, which is negligible given
the amount of space in the address space.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-06-09 11:58:26 +00:00
+								}
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								impl BusDevice for MemoryManager {
 								    fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
 								        if self.selected_slot < self.hotplug_slots.len() {
 								            let state = &self.hotplug_slots[self.selected_slot];
 								            match offset {
 								                BASE_OFFSET_LOW => {
 								                    data.copy_from_slice(&state.base.to_le_bytes()[..4]);
 								                }
 								                BASE_OFFSET_HIGH => {
 								                    data.copy_from_slice(&state.base.to_le_bytes()[4..]);
 								                }
 								                LENGTH_OFFSET_LOW => {
 								                    data.copy_from_slice(&state.length.to_le_bytes()[..4]);
 								                }
 								                LENGTH_OFFSET_HIGH => {
 								                    data.copy_from_slice(&state.length.to_le_bytes()[4..]);
 								                }
 								                STATUS_OFFSET => {
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								                    // The Linux kernel, quite reasonably, doesn't zero the memory it gives us.
-												vmm: memory_manager: Increase robustness of MemoryManager control device

See: #1289

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-09-10 09:41:38 +00:00
+								                    data.fill(0);
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								                    if state.active {
 								                        data[0] |= 1 << ENABLE_FLAG;
 								                    }
 								                    if state.inserting {
 								                        data[0] |= 1 << INSERTING_FLAG;
 								                    }
 								                    if state.removing {
 								                        data[0] |= 1 << REMOVING_FLAG;
 								                    }
 								                }
 								                _ => {
 								                    warn!(
 								                        "Unexpected offset for accessing memory manager device: {:#}",
 								                        offset
 								                    );
 								                }
 								            }
-												vmm: memory_manager: Increase robustness of MemoryManager control device

See: #1289

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-09-10 09:41:38 +00:00
+								        } else {
 								            warn!("Out of range memory slot: {}", self.selected_slot);
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								        }
 								    }
-												misc: Make Bus::write() return an Option<Arc<Barrier>>

This can be uses to indicate to the caller that it should wait on the
barrier before returning as there is some asynchronous activity
triggered by the write which requires the KVM exit to block until it's
completed.

This is useful for having vCPU thread wait for the VMM thread to proceed
to activate the virtio devices.

See #1863

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-12-04 09:23:47 +00:00
+								    fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								        match offset {
 								            SELECTION_OFFSET => {
 								                self.selected_slot = usize::from(data[0]);
 								            }
 								            STATUS_OFFSET => {
-												vmm: memory_manager: Increase robustness of MemoryManager control device

See: #1289

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-09-10 09:41:38 +00:00
+								                if self.selected_slot < self.hotplug_slots.len() {
 								                    let state = &mut self.hotplug_slots[self.selected_slot];
 								                    // The ACPI code writes back a 1 to acknowledge the insertion
 								                    if (data[0] & (1 << INSERTING_FLAG) == 1 << INSERTING_FLAG) && state.inserting {
 								                        state.inserting = false;
 								                    }
 								                    // Ditto for removal
 								                    if (data[0] & (1 << REMOVING_FLAG) == 1 << REMOVING_FLAG) && state.removing {
 								                        state.removing = false;
 								                    }
 								                    // Trigger removal of "DIMM"
 								                    if data[0] & (1 << EJECT_FLAG) == 1 << EJECT_FLAG {
 								                        warn!("Ejection of memory not currently supported");
 								                    }
 								                } else {
 								                    warn!("Out of range memory slot: {}", self.selected_slot);
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								                }
 								            }
 								            _ => {
 								                warn!(
 								                    "Unexpected offset for accessing memory manager device: {:#}",
 								                    offset
 								                );
 								            }
-												misc: Make Bus::write() return an Option<Arc<Barrier>>

This can be uses to indicate to the caller that it should wait on the
barrier before returning as there is some asynchronous activity
triggered by the write which requires the KVM exit to block until it's
completed.

This is useful for having vCPU thread wait for the VMM thread to proceed
to activate the virtio devices.

See #1863

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-12-04 09:23:47 +00:00
+								        };
 								        None
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								    }
 								}
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								impl MemoryManager {
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								    /// Creates all memory regions based on the available RAM ranges defined
 								    /// by `ram_regions`, and based on the description of the memory zones.
 								    /// In practice, this function can perform multiple memory mappings of the
 								    /// same backing file if there's a hole in the address space between two
 								    /// RAM ranges.
 								    /// One example might be ram_regions containing 2 regions (0-3G and 4G-6G)
 								    /// and zones containing two zones (size 1G and size 4G).
 								    /// This function will create 3 resulting memory regions:
 								    /// - First one mapping entirely the first memory zone on 0-1G range
 								    /// - Second one mapping partially the second memory zone on 1G-3G range
 								    /// - Third one mapping partially the second memory zone on 4G-6G range
 								    fn create_memory_regions_from_zones(
 								        ram_regions: &[(GuestAddress, usize)],
 								        zones: &[MemoryZoneConfig],
 								        prefault: bool,
-												vmm: Move NUMA node list creation to Vm structure

Based on the previous changes introducing new options for both memory
zones and NUMA configuration, this patch changes the behavior of the
NUMA node definition. Instead of relying on the memory zones to define
the guest NUMA nodes, everything goes through the --numa parameter. This
allows for defining NUMA nodes without associating any particular memory
range to it. And in case one wants to associate one or multiple memory
ranges to it, the expectation is to describe a list of memory zone
through the --numa parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 09:26:43 +00:00
+								    ) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								        let mut zones = zones.to_owned();
 								        let mut mem_regions = Vec::new();
 								        let mut zone = zones.remove(0);
 								        let mut zone_offset = 0;
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								        let mut memory_zones = HashMap::new();
 								        // Add zone id to the list of memory zones.
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								        memory_zones.insert(zone.id.clone(), MemoryZone::default());
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
 								        for ram_region in ram_regions.iter() {
 								            let mut ram_region_offset = 0;
 								            let mut exit = false;
 								            loop {
 								                let mut ram_region_consumed = false;
 								                let mut pull_next_zone = false;
 								                let ram_region_sub_size = ram_region.1 - ram_region_offset;
 								                let zone_sub_size = zone.size as usize - zone_offset;
 								                let file_offset = zone_offset as u64;
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								                let region_start = ram_region
 								                    .0
 								                    .checked_add(ram_region_offset as u64)
 								                    .ok_or(Error::GuestAddressOverFlow)?;
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								                let region_size = if zone_sub_size <= ram_region_sub_size {
 								                    if zone_sub_size == ram_region_sub_size {
 								                        ram_region_consumed = true;
 								                    }
 								                    ram_region_offset += zone_sub_size;
 								                    pull_next_zone = true;
 								                    zone_sub_size
 								                } else {
 								                    zone_offset += ram_region_sub_size;
 								                    ram_region_consumed = true;
 								                    ram_region_sub_size
 								                };
-												vmm: memory_manager: Create a NUMA node list

Based on the 'guest_numa_node' option, we create and store a list of
NUMA nodes in the MemoryManager. The point being to associate a list of
memory regions to each node, so that we can later create the ACPI tables
with the proper memory range information.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 12:16:50 +00:00
+								                let region = MemoryManager::create_ram_region(
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								                    &zone.file,
 								                    file_offset,
 								                    region_start,
 								                    region_size,
 								                    prefault,
 								                    zone.shared,
 								                    zone.hugepages,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								                    zone.hugepage_size,
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								                    zone.host_numa_node,
-												vmm: memory_manager: Create a NUMA node list

Based on the 'guest_numa_node' option, we create and store a list of
NUMA nodes in the MemoryManager. The point being to associate a list of
memory regions to each node, so that we can later create the ACPI tables
with the proper memory range information.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 12:16:50 +00:00
+								                )?;
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								                // Add region to the list of regions associated with the
 								                // current memory zone.
 								                if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								                    memory_zone.regions.push(region.clone());
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								                }
-												vmm: memory_manager: Create a NUMA node list

Based on the 'guest_numa_node' option, we create and store a list of
NUMA nodes in the MemoryManager. The point being to associate a list of
memory regions to each node, so that we can later create the ACPI tables
with the proper memory range information.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 12:16:50 +00:00
+								                mem_regions.push(region);
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
 								                if pull_next_zone {
 								                    // Get the next zone and reset the offset.
 								                    zone_offset = 0;
 								                    if zones.is_empty() {
 								                        exit = true;
 								                        break;
 								                    }
 								                    zone = zones.remove(0);
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
 								                    // Check if zone id already exist. In case it does, throw
 								                    // an error as we need unique identifiers. Otherwise, add
 								                    // the new zone id to the list of memory zones.
 								                    if memory_zones.contains_key(&zone.id) {
 								                        error!(
 								                            "Memory zone identifier '{}' found more than once. \
 								                            It must be unique",
 								                            zone.id,
 								                        );
 								                        return Err(Error::DuplicateZoneId);
 								                    }
-												vmm: Create a MemoryZone structure

In order to anticipate the need for storing memory regions along with
virtio-mem information for each memory zone, we create a new structure
MemoryZone that will replace Vec<Arc<GuestRegionMmap>> in the hash map
MemoryZones.

This makes thing more logical as MemoryZones becomes a list of
MemoryZone sorted by their identifier.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 08:08:02 +00:00
+								                    memory_zones.insert(zone.id.clone(), MemoryZone::default());
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								                }
 								                if ram_region_consumed {
 								                    break;
 								                }
 								            }
 								            if exit {
 								                break;
 								            }
 								        }
-												vmm: Move NUMA node list creation to Vm structure

Based on the previous changes introducing new options for both memory
zones and NUMA configuration, this patch changes the behavior of the
NUMA node definition. Instead of relying on the memory zones to define
the guest NUMA nodes, everything goes through the --numa parameter. This
allows for defining NUMA nodes without associating any particular memory
range to it. And in case one wants to associate one or multiple memory
ranges to it, the expectation is to describe a list of memory zone
through the --numa parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 09:26:43 +00:00
+								        Ok((mem_regions, memory_zones))
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								    }
-												vmm: memory_manager: Move the restoration of guest memory later

Rather than filling the guest memory from a file at the point of the the
guest memory region being created instead fill from the file later. This
simplifies the region creation code but also adds flexibility for
sourcing the guest memory from a source other than an on disk file.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-10-30 10:26:31 +00:00
+								    fn fill_saved_regions(&mut self, saved_regions: Vec<MemoryRegion>) -> Result<(), Error> {
 								        for region in saved_regions {
 								            if let Some(content) = region.content {
 								                // Open (read only) the snapshot file for the given region.
 								                let mut memory_region_file = OpenOptions::new()
 								                    .read(true)
 								                    .open(content)
 								                    .map_err(Error::SnapshotOpen)?;
 								                self.guest_memory
 								                    .memory()
 								                    .read_exact_from(
-												vmm: Simplify memory state to support Versionize

In order to support using Versionize for state structures it is necessary
to use simpler, primitive, data types in the state definitions used for
snapshot restore.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-23 09:55:05 +00:00
+								                        GuestAddress(region.start_addr),
-												vmm: memory_manager: Move the restoration of guest memory later

Rather than filling the guest memory from a file at the point of the the
guest memory region being created instead fill from the file later. This
simplifies the region creation code but also adds flexibility for
sourcing the guest memory from a source other than an on disk file.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-10-30 10:26:31 +00:00
+								                        &mut memory_region_file,
 								                        region.size as usize,
 								                    )
 								                    .map_err(Error::SnapshotCopy)?;
 								            }
 								        }
 								        Ok(())
 								    }
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								    pub fn new(
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								        vm: Arc<dyn hypervisor::Vm>,
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								        config: &MemoryConfig,
-												vmm: memory: Add prefault option when creating region

When CoW can be used, the VM restoration time is reduced, but the pages
are not populated. This can lead to some slowness from the guest when
accessing these pages.

Depending on the use case, we might prefer a slower boot time for better
performances from guest runtime. The way to achieve this is to prefault
the pages in this case, using the MAP_POPULATE flag along with CoW.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 11:33:18 +00:00
+								        prefault: bool,
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								        phys_bits: u8,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								        #[cfg(feature = "tdx")] tdx_enabled: bool,
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								    ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								        let user_provided_zones = config.size == 0;
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								        let mut allow_mem_hotplug: bool = false;
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								        let (ram_size, zones) = if !user_provided_zones {
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								            if config.zones.is_some() {
 								                error!(
 								                    "User defined memory regions can't be provided if the \
 								                    memory size is not 0"
 								                );
 								                return Err(Error::InvalidMemoryParameters);
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								            }
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								            if config.hotplug_size.is_some() {
 								                allow_mem_hotplug = true;
 								            }
-												vmm: Add 'hotplugged_size' to memory parameters

Add the new option 'hotplugged_size' to both --memory-zone and --memory
parameters so that we can let the user specify a certain amount of
memory being plugged at boot.

This is also part of making sure we can store the virtio-mem size over a
reboot of the VM.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 18:23:45 +00:00
+								            if let Some(hotplugged_size) = config.hotplugged_size {
 								                if let Some(hotplug_size) = config.hotplug_size {
 								                    if hotplugged_size > hotplug_size {
 								                        error!(
 								                            "'hotplugged_size' {} can't be bigger than \
 								                            'hotplug_size' {}",
 								                            hotplugged_size, hotplug_size,
 								                        );
 								                        return Err(Error::InvalidMemoryParameters);
 								                    }
 								                } else {
 								                    error!(
 								                        "Invalid to define 'hotplugged_size' when there is\
 								                        no 'hotplug_size'"
 								                    );
 								                    return Err(Error::InvalidMemoryParameters);
 								                }
 								                if config.hotplug_method == HotplugMethod::Acpi {
 								                    error!(
 								                        "Invalid to define 'hotplugged_size' with hotplug \
 								                        method 'acpi'"
 								                    );
 								                    return Err(Error::InvalidMemoryParameters);
 								                }
 								            }
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								            // Create a single zone from the global memory config. This lets
 								            // us reuse the codepath for user defined memory zones.
 								            let zones = vec![MemoryZoneConfig {
-												main, vmm: Add mandatory id to memory zones

In anticipation for allowing memory zones to be removed, but also in
anticipation for refactoring NUMA parameter, we introduce a mandatory
'id' option to the --memory-zone parameter.

This forces the user to provide a unique identifier for each memory zone
so that we can refer to these.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-03 18:06:38 +00:00
+								                id: String::from(DEFAULT_MEMORY_ZONE),
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								                size: config.size,
 								                file: None,
 								                shared: config.shared,
 								                hugepages: config.hugepages,
-												vmm: config: Add a hugepage_size option

This allows the user to use an alternative huge page size otherwise the
default size will be used.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:21:53 +00:00
+								                hugepage_size: config.hugepage_size,
-												vmm: Add 'host_numa_nodes' option to memory zones

Since memory zones have been introduced, it is now possible for a user
to specify multiple backends for the guest RAM. By adding a new option
'host_numa_node' to the 'memory-zone' parameter, we allow the guest RAM
to be backed by memory that might come from a specific NUMA node on the
host.

The option expects a node identifier, specifying which NUMA node should
be used to allocate the memory associated with a specific memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 15:36:19 +00:00
+								                host_numa_node: None,
-												vmm: Add 'hotplug_size' to memory zones

In anticipation for resizing support of an individual memory zone,
this commit introduces a new option 'hotplug_size' to '--memory-zone'
parameter. This defines the amount of memory that can be added through
each specific memory zone.

Because memory zone resize is tied to virtio-mem, make sure the user
selects 'virtio-mem' hotplug method, otherwise return an error.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 10:20:22 +00:00
+								                hotplug_size: config.hotplug_size,
-												vmm: Add 'hotplugged_size' to memory parameters

Add the new option 'hotplugged_size' to both --memory-zone and --memory
parameters so that we can let the user specify a certain amount of
memory being plugged at boot.

This is also part of making sure we can store the virtio-mem size over a
reboot of the VM.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 18:23:45 +00:00
+								                hotplugged_size: config.hotplugged_size,
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								            }];
 								            (config.size, zones)
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								        } else {
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								            if config.zones.is_none() {
 								                error!(
 								                    "User defined memory regions must be provided if the \
 								                    memory size is 0"
 								                );
 								                return Err(Error::MissingMemoryZones);
 								            }
 								            // Safe to unwrap as we checked right above there were some
 								            // regions.
 								            let zones = config.zones.clone().unwrap();
 								            if zones.is_empty() {
 								                return Err(Error::MissingMemoryZones);
-												vmm: memory: Extend new() to support external backing files

Whenever a MemoryManager is restored from a snapshot, the memory regions
associated with it might need to directly back the mapped memory for
increased performances. If that's the case, a list of external regions
is provided and the MemoryManager should simply ignore what's coming
from the MemoryConfig.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:19:28 +00:00
+								            }
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
 								            let mut total_ram_size: u64 = 0;
 								            for zone in zones.iter() {
 								                total_ram_size += zone.size;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
-												vmm: memory_manager: Allow host NUMA for RAM backed files

Let's narrow down the limitation related to mbind() by allowing shared
mappings backed by a file backed by RAM. This leaves the restriction on
only for mappings backed by a regular file.

With this patch, host NUMA node can be specified even if using
vhost-user devices.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 13:57:58 +00:00
+								                if zone.shared && zone.file.is_some() && zone.host_numa_node.is_some() {
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								                    error!(
 								                        "Invalid to set host NUMA policy for a memory zone \
-												vmm: memory_manager: Allow host NUMA for RAM backed files

Let's narrow down the limitation related to mbind() by allowing shared
mappings backed by a file backed by RAM. This leaves the restriction on
only for mappings backed by a regular file.

With this patch, host NUMA node can be specified even if using
vhost-user devices.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 13:57:58 +00:00
+								                        backed by a regular file and mapped as 'shared'"
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								                    );
 								                    return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
 								                }
-												vmm: Add 'hotplug_size' to memory zones

In anticipation for resizing support of an individual memory zone,
this commit introduces a new option 'hotplug_size' to '--memory-zone'
parameter. This defines the amount of memory that can be added through
each specific memory zone.

Because memory zone resize is tied to virtio-mem, make sure the user
selects 'virtio-mem' hotplug method, otherwise return an error.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 10:20:22 +00:00
 								                if zone.hotplug_size.is_some() && config.hotplug_method == HotplugMethod::Acpi {
 								                    error!("Invalid to set ACPI hotplug method for memory zones");
 								                    return Err(Error::InvalidHotplugMethodWithMemoryZones);
 								                }
-												vmm: Add 'hotplugged_size' to memory parameters

Add the new option 'hotplugged_size' to both --memory-zone and --memory
parameters so that we can let the user specify a certain amount of
memory being plugged at boot.

This is also part of making sure we can store the virtio-mem size over a
reboot of the VM.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 18:23:45 +00:00
 								                if let Some(hotplugged_size) = zone.hotplugged_size {
 								                    if let Some(hotplug_size) = zone.hotplug_size {
 								                        if hotplugged_size > hotplug_size {
 								                            error!(
 								                                "'hotplugged_size' {} can't be bigger than \
 								                                'hotplug_size' {}",
 								                                hotplugged_size, hotplug_size,
 								                            );
 								                            return Err(Error::InvalidMemoryParameters);
 								                        }
 								                    } else {
 								                        error!(
 								                            "Invalid to define 'hotplugged_size' when there is\
 								                            no 'hotplug_size' for a memory zone"
 								                        );
 								                        return Err(Error::InvalidMemoryParameters);
 								                    }
 								                    if config.hotplug_method == HotplugMethod::Acpi {
 								                        error!(
 								                            "Invalid to define 'hotplugged_size' with hotplug \
 								                            method 'acpi'"
 								                        );
 								                        return Err(Error::InvalidMemoryParameters);
 								                    }
 								                }
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								            }
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								            (total_ram_size, zones)
 								        };
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								        // Init guest memory
 								        let arch_mem_regions = arch::arch_memory_regions(ram_size);
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
-												vmm: memory_manager: Factorize memory regions creation

Factorize the codepath between simple memory and multiple memory zones.
This simplifies the way regions are memory mapped, as everything relies
on the same codepath. This is performed by creating a memory zone on the
fly for the specific use case where --memory is used with size being
different from 0. Internally, the code can rely on memory zones to
create the memory regions forming the guest memory.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 11:12:46 +00:00
+								        let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
 								            .iter()
 								            .filter(|r| r.2 == RegionType::Ram)
 								            .map(|r| (r.0, r.1))
 								            .collect();
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								        let (mem_regions, mut memory_zones) =
-												vmm: memory_manager: Move the restoration of guest memory later

Rather than filling the guest memory from a file at the point of the the
guest memory region being created instead fill from the file later. This
simplifies the region creation code but also adds flexibility for
sourcing the guest memory from a source other than an on disk file.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-10-30 10:26:31 +00:00
+								            Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault)?;
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
-												vmm: memory_manager: Refactor GuestMemoryMmap construction

Make the GuestMemoryMmap from a Vec<Arc<GuestRegionMmap>> by using this
method we can persist a set of regions in the MemoryManager and then
extend this set with a newly created region. Ultimately that will allow
the hotplug of memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-31 11:58:07 +00:00
+								        let guest_memory =
-												vmm: Move codebase to GuestMemoryAtomic from vm-memory

Relying on the latest vm-memory version, including the freshly
introduced structure GuestMemoryAtomic, this patch replaces every
occurrence of Arc<ArcSwap<GuestMemoryMmap> with
GuestMemoryAtomic<GuestMemoryMmap>.

The point is to rely on the common RCU-like implementation from
vm-memory so that we don't have to do it from Cloud-Hypervisor.

Fixes #735

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-02-11 16:22:40 +00:00
+								            GuestMemoryMmap::from_arc_regions(mem_regions).map_err(Error::GuestMemory)?;
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
-												vmm: Store boot guest memory and use it for boot sequence

In order to differentiate the 'boot' memory regions from the virtio-mem
regions, we store what we call 'boot_guest_memory'. This is useful to
provide the adequate list of regions to the configure_system() function
as it expects only the list of regions that should be exposed through
the e820 table.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:24:42 +00:00
+								        let boot_guest_memory = guest_memory.clone();
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								        let mmio_address_space_size = mmio_address_space_size(phys_bits);
-												memory_manager: Make addressable space size 64k aligned

While the addressable space size reduction of 4k in necessary due to
the Linux bug, the 64k alignment of the addressable space size is
required by Windows. This patch satisfies both.

Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>

											
										
										
											2020-11-14 14:34:18 +00:00
+								        debug_assert_eq!(
 								            (((mmio_address_space_size) >> 16) << 16),
 								            mmio_address_space_size
 								        );
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								        let end_of_device_area = GuestAddress(mmio_address_space_size - 1);
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								        let mut start_of_device_area =
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								            MemoryManager::start_addr(guest_memory.last_addr(), allow_mem_hotplug)?;
-												vmm: Rename 'virtiomem' to 'virtio_mem'

For more consistency and help reading the code better, this commit
renames all 'virtiomem*' variables into 'virtio_mem*'.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 11:57:43 +00:00
+								        let mut virtio_mem_regions: Vec<Arc<GuestRegionMmap>> = Vec::new();
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
-												vmm: memory_manager: Create one virtio-mem per memory zone

Based on the previous code changes, we can now update the MemoryManager
code to create one virtio-mem region and resizing handler per memory
zone. This will naturally create one virtio-mem device per memory zone
from the DeviceManager's code which has been previously updated as well.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 12:20:26 +00:00
+								        // Update list of memory zones for resize.
 								        for zone in zones {
 								            if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
 								                if let Some(hotplug_size) = zone.hotplug_size {
 								                    if hotplug_size == 0 {
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                        error!("'hotplug_size' can't be 0");
 								                        return Err(Error::InvalidHotplugSize);
 								                    }
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								                    if !user_provided_zones && config.hotplug_method == HotplugMethod::Acpi {
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								                        start_of_device_area = start_of_device_area
 								                            .checked_add(hotplug_size)
 								                            .ok_or(Error::GuestAddressOverFlow)?;
-												vmm: memory_manager: Create one virtio-mem per memory zone

Based on the previous code changes, we can now update the MemoryManager
code to create one virtio-mem region and resizing handler per memory
zone. This will naturally create one virtio-mem device per memory zone
from the DeviceManager's code which has been previously updated as well.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 12:20:26 +00:00
+								                    } else {
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                        // Alignment must be "natural" i.e. same as size of block
 								                        let start_addr = GuestAddress(
-												vmm: virtio-mem: Enforce alignment and size requirements

The virtio-mem driver is generating some warnings regarding both size
and alignment of the virtio-mem region if not based on 128MiB:

The alignment of the physical start address can make some memory
unusable.
The alignment of the physical end address can make some memory
unusable.

For these reasons, the current patch enforces virtio-mem regions to be
128MiB aligned and checks the size provided by the user is a multiple of
128MiB.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 09:36:31 +00:00
+								                            (start_of_device_area.0 + virtio_devices::VIRTIO_MEM_ALIGN_SIZE - 1)
 								                                / virtio_devices::VIRTIO_MEM_ALIGN_SIZE
 								                                * virtio_devices::VIRTIO_MEM_ALIGN_SIZE,
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                        );
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                        let region = MemoryManager::create_ram_region(
 								                            &None,
 ,
 								                            start_addr,
-												vmm: memory_manager: Create one virtio-mem per memory zone

Based on the previous code changes, we can now update the MemoryManager
code to create one virtio-mem region and resizing handler per memory
zone. This will naturally create one virtio-mem device per memory zone
from the DeviceManager's code which has been previously updated as well.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 12:20:26 +00:00
+								                            hotplug_size as usize,
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                            false,
-												mm: Apply zone's property instread of global config

Apply memory zone's property for associated virtio-mem regions.

Signed-off-by: Jiangbo Wu <jiangbo.wu@intel.com>

											
										
										
											2020-09-22 06:30:29 +00:00
+								                            zone.shared,
 								                            zone.hugepages,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								                            zone.hugepage_size,
-												mm: Apply memory policy for virtio-mem region

Use zone.host_numa_node to create memory zone, so that memory zone
can apply memory policy in according with host numa node ID

Signed-off-by: Jiangbo Wu <jiangbo.wu@intel.com>

											
										
										
											2020-09-18 09:29:59 +00:00
+								                            zone.host_numa_node,
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                        )?;
-												vmm: Rename 'virtiomem' to 'virtio_mem'

For more consistency and help reading the code better, this commit
renames all 'virtiomem*' variables into 'virtio_mem*'.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 11:57:43 +00:00
+								                        virtio_mem_regions.push(region.clone());
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								                        memory_zone.virtio_mem_zone = Some(VirtioMemZone {
 								                            region,
 								                            resize_handler: virtio_devices::Resize::new()
 								                                .map_err(Error::EventFdFail)?,
-												vmm: Add hotplugged_size to VirtioMemZone

Adding a new field to VirtioMemZone structure, as it lets us associate
with a particular virtio-mem region the amount of memory that should be
plugged in at boot.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:36:58 +00:00
+								                            hotplugged_size: zone.hotplugged_size.unwrap_or(0),
-												vmm, virtio-devices: mem: Don't use MADV_DONTNEED on hugepages

This commit introduces a new information to the VirtioMemZone structure
in order to know if the memory zone is backed by hugepages.

Based on this new information, the virtio-mem device is now able to
determine if madvise(MADV_DONTNEED) should be performed or not. The
madvise documentation specifies that MADV_DONTNEED advice will fail if
the memory range has been allocated with some hugepages.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2021-02-03 08:46:14 +00:00
+								                            hugepages: zone.hugepages,
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								                        });
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								                        start_of_device_area = start_addr
 								                            .checked_add(hotplug_size)
 								                            .ok_or(Error::GuestAddressOverFlow)?;
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								                    }
 								                }
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								            } else {
-												vmm: memory_manager: Create one virtio-mem per memory zone

Based on the previous code changes, we can now update the MemoryManager
code to create one virtio-mem region and resizing handler per memory
zone. This will naturally create one virtio-mem device per memory zone
from the DeviceManager's code which has been previously updated as well.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 12:20:26 +00:00
+								                return Err(Error::MissingZoneIdentifier);
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								            }
-												vmm, main: Add optional "hotplug_size" to --mem

This specifies how much address space should be reserved for hotplugging
of RAM. This space is reserved by adding move the start of the device
area by the desired amount.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-07 11:25:55 +00:00
+								        }
-												vmm: Move codebase to GuestMemoryAtomic from vm-memory

Relying on the latest vm-memory version, including the freshly
introduced structure GuestMemoryAtomic, this patch replaces every
occurrence of Arc<ArcSwap<GuestMemoryMmap> with
GuestMemoryAtomic<GuestMemoryMmap>.

The point is to rely on the common RCU-like implementation from
vm-memory so that we don't have to do it from Cloud-Hypervisor.

Fixes #735

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-02-11 16:22:40 +00:00
+								        let guest_memory = GuestMemoryAtomic::new(guest_memory);
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        let mut hotplug_slots = Vec::with_capacity(HOTPLUG_COUNT);
 								        hotplug_slots.resize_with(HOTPLUG_COUNT, HotPlugState::default);
-												vmm: Reduce MMIO address space by 4KiB

In order to workaround a Linux bug that happens when we place devices at
the end of the physical address space on recent hardware (52 bits limit)
we reduce the MMIO address space by one 4k page. This way, nothing gets
allocated in the last 4k of the address space, which is negligible given
the amount of space in the address space.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-06-09 11:58:26 +00:00
+								        // Both MMIO and PIO address spaces start at address 0.
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								        let allocator = Arc::new(Mutex::new(
 								            SystemAllocator::new(
-												vmm: Enable memory manager for AArch64

Screened IO space as it is not available on AArch64.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-06-09 06:54:15 +00:00
+								                #[cfg(target_arch = "x86_64")]
-												vmm: memory_manager: Use workaround for conditional function arguments

With Rust 1.49 using attributes on a function parameter is not allowed.
The recommended workaround is to put it in a new block.

error[E0658]: attributes on expressions are experimental
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
    = note: see issue #15701 <https://github.com/rust-lang/rust/issues/15701> for more information

error: removing an expression is not supported in this position
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-02 20:47:24 +00:00
+								                {
 								                    GuestAddress(0)
 								                },
-												vmm: Enable memory manager for AArch64

Screened IO space as it is not available on AArch64.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-06-09 06:54:15 +00:00
+								                #[cfg(target_arch = "x86_64")]
-												vmm: memory_manager: Use workaround for conditional function arguments

With Rust 1.49 using attributes on a function parameter is not allowed.
The recommended workaround is to put it in a new block.

error[E0658]: attributes on expressions are experimental
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
    = note: see issue #15701 <https://github.com/rust-lang/rust/issues/15701> for more information

error: removing an expression is not supported in this position
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-02 20:47:24 +00:00
+								                {
-												misc: Remove unnecessary literal casts

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-02 20:37:31 +00:00
+<< 16
-												vmm: memory_manager: Use workaround for conditional function arguments

With Rust 1.49 using attributes on a function parameter is not allowed.
The recommended workaround is to put it in a new block.

error[E0658]: attributes on expressions are experimental
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
    = note: see issue #15701 <https://github.com/rust-lang/rust/issues/15701> for more information

error: removing an expression is not supported in this position
   --> vmm/src/memory_manager.rs:698:17
    |
698 |                 #[cfg(target_arch = "x86_64")]
    |

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-02 20:47:24 +00:00
+								                },
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								                GuestAddress(0),
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								                mmio_address_space_size,
-												vm-allocator: Enable vm-allocator for AArch64

Implemented GSI allocator and system allocator for AArch64.
Renamed some layout definitions to align more code between architectures.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-06-09 04:04:40 +00:00
+								                layout::MEM_32BIT_DEVICES_START,
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								                layout::MEM_32BIT_DEVICES_SIZE,
-												vm-allocator: Enable vm-allocator for AArch64

Implemented GSI allocator and system allocator for AArch64.
Renamed some layout definitions to align more code between architectures.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-06-09 04:04:40 +00:00
+								                #[cfg(target_arch = "x86_64")]
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								                vec![GsiApic::new(
 								                    X86_64_IRQ_BASE,
 								                    ioapic::NUM_IOAPIC_PINS as u32 - X86_64_IRQ_BASE,
 								                )],
 								            )
 								            .ok_or(Error::CreateSystemAllocator)?,
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								        ));
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								        #[cfg(feature = "acpi")]
 								        let acpi_address = allocator
 								            .lock()
 								            .unwrap()
 								            .allocate_mmio_addresses(None, MEMORY_MANAGER_ACPI_SIZE as u64, None)
-												vmm: Address Rust 1.51.0 clippy issue (upper_case_acroynms)

warning: name `LocalAPIC` contains a capitalized acronym
   --> vmm/src/cpu.rs:197:8
    |
197 | struct LocalAPIC {
    |        ^^^^^^^^^ help: consider making the acronym lowercase, except the initial letter: `LocalApic`
    |
    = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 17:01:21 +00:00
+								            .ok_or(Error::AllocateMmioAddress)?;
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								        #[cfg(not(feature = "tdx"))]
 								        let log_dirty = true;
 								        #[cfg(feature = "tdx")]
 								        let log_dirty = !tdx_enabled; // Cannot log dirty pages on a TD
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								        let memory_manager = Arc::new(Mutex::new(MemoryManager {
-												vmm: Store boot guest memory and use it for boot sequence

In order to differentiate the 'boot' memory regions from the virtio-mem
regions, we store what we call 'boot_guest_memory'. This is useful to
provide the adequate list of regions to the configure_system() function
as it expects only the list of regions that should be exposed through
the e820 table.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:24:42 +00:00
+								            boot_guest_memory,
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								            guest_memory: guest_memory.clone(),
-												vmm: drop "kvm" from memory slot code

The code is purely for maintaining an internal counter. It is not really
tied to KVM.

No functional change.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:08:52 +00:00
+								            next_memory_slot: 0,
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								            start_of_device_area,
 								            end_of_device_area,
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								            vm,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								            hotplug_slots,
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								            selected_slot: 0,
-												vmm: memory_manager: Pass MemoryConfig to simplify the new() prototype

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:04:50 +00:00
+								            mergeable: config.mergeable,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								            allocator: allocator.clone(),
-												vmm: memory_manager: Pass MemoryConfig to simplify the new() prototype

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:04:50 +00:00
+								            hotplug_method: config.hotplug_method.clone(),
-												vmm: Update memory configuration upon virtio-mem resizing

Based on all the preparatory work achieved through previous commits,
this patch updates the 'hotplugged_size' field for both MemoryConfig and
MemoryZoneConfig structures when either the whole memory is resized, or
simply when a memory zone is resized.

This fixes the lack of support for rebooting a VM with the right amount
of memory plugged in.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 21:50:30 +00:00
+								            boot_ram: ram_size,
 								            current_ram: ram_size,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								            next_hotplug_slot: 0,
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								            snapshot: Mutex::new(None),
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								            shared: config.shared,
 								            hugepages: config.hugepages,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								            hugepage_size: config.hugepage_size,
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								            #[cfg(target_arch = "x86_64")]
 								            sgx_epc_region: None,
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								            user_provided_zones,
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								            snapshot_memory_regions: Vec::new(),
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								            memory_zones,
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								            guest_ram_mappings: Vec::new(),
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								            #[cfg(feature = "acpi")]
 								            acpi_address,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								            log_dirty,
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								        }));
-												virtio-devices, vmm: Deprecate "GuestMemory::with_regions(_mut)"

Function "GuestMemory::with_regions(_mut)" were mainly temporary methods
to access the regions in `GuestMemory` as the lack of iterator-based
access, and hence they are deprecated in the upstream vm-memory crate [1].

[1] https://github.com/rust-vmm/vm-memory/issues/133

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-05-24 19:23:25 +00:00
+								        for region in guest_memory.memory().iter() {
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								            let mut mm = memory_manager.lock().unwrap();
 								            let slot = mm.create_userspace_mapping(
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								                region.start_addr().raw_value(),
 								                region.len() as u64,
 								                region.as_ptr() as u64,
-												vmm: memory_manager: Pass MemoryConfig to simplify the new() prototype

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:04:50 +00:00
+								                config.mergeable,
-												vmm: Add "readonly" parameter MemoryManager::create_userspace_mapping

Use this boolean to turn on the KVM_MEM_READONLY flag to indicate that
this memory mapping should not be writable by the VM.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-03-19 10:00:09 +00:00
+								                false,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								                log_dirty,
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								            )?;
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								            mm.guest_ram_mappings.push(GuestRamMapping {
 								                gpa: region.start_addr().raw_value(),
 								                size: region.len(),
 								                slot,
 								            });
-												virtio-devices, vmm: Deprecate "GuestMemory::with_regions(_mut)"

Function "GuestMemory::with_regions(_mut)" were mainly temporary methods
to access the regions in `GuestMemory` as the lack of iterator-based
access, and hence they are deprecated in the upstream vm-memory crate [1].

[1] https://github.com/rust-vmm/vm-memory/issues/133

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-05-24 19:23:25 +00:00
+								        }
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								        for region in virtio_mem_regions.drain(..) {
 								            let mut mm = memory_manager.lock().unwrap();
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								            let slot = mm.create_userspace_mapping(
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								                region.start_addr().raw_value(),
 								                region.len() as u64,
 								                region.as_ptr() as u64,
-												vmm: memory_manager: Pass MemoryConfig to simplify the new() prototype

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:04:50 +00:00
+								                config.mergeable,
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								                false,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								                log_dirty,
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								            )?;
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
 								            mm.guest_ram_mappings.push(GuestRamMapping {
 								                gpa: region.start_addr().raw_value(),
 								                size: region.len(),
 								                slot,
 								            });
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								            allocator
 								                .lock()
 								                .unwrap()
 								                .allocate_mmio_addresses(Some(region.start_addr()), region.len(), None)
 								                .ok_or(Error::MemoryRangeAllocation)?;
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								            mm.add_region(region)?;
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								        }
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								        // Allocate RAM and Reserved address ranges.
 								        for region in arch_mem_regions.iter() {
 								            allocator
 								                .lock()
 								                .unwrap()
 								                .allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
 								                .ok_or(Error::MemoryRangeAllocation)?;
 								        }
 								        Ok(memory_manager)
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								    }
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
+								    pub fn new_from_snapshot(
 								        snapshot: &Snapshot,
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								        vm: Arc<dyn hypervisor::Vm>,
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
+								        config: &MemoryConfig,
-												vmm: memory_manager: Make the snapshot source directory an Option

This allows the code to be reused when creating the VM from a snapshot
when doing VM migration.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-04 14:58:18 +00:00
+								        source_url: Option<&str>,
-												vmm: Add "prefault" option when restoring

Now that the restore path uses RestoreConfig structure, we add a new
parameter called "prefault" to it. This will give the user the ability
to populate the pages corresponding to the mapped regions backed by the
snapshotted memory files.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 13:54:33 +00:00
+								        prefault: bool,
-												vmm: memory_manager: Rely on physical bits for address space size

If the user provided a maximum physical bits value for the vCPUs, the
memory manager will adapt the guest physical address space accordingly
so that devices are not placed further than the specified value.

It's important to note that if the number exceed what is available on
the host, the smaller number will be picked.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-13 07:44:37 +00:00
+								        phys_bits: u8,
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
+								    ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								        let mm = MemoryManager::new(
 								            vm,
 								            config,
 								            prefault,
 								            phys_bits,
 								            #[cfg(feature = "tdx")]
 								            false,
 								        )?;
-												vmm: memory_manager: Make the snapshot source directory an Option

This allows the code to be reused when creating the VM from a snapshot
when doing VM migration.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-04 14:58:18 +00:00
 								        if let Some(source_url) = source_url {
-												vmm: Simplify snapshot/restore path handling

Extend the existing url_to_path() to take the URL string and then use
that to simplify the snapshot/restore code paths.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-12 10:46:36 +00:00
+								            let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?;
-												vmm: memory_manager: Make the snapshot source directory an Option

This allows the code to be reused when creating the VM from a snapshot
when doing VM migration.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-04 14:58:18 +00:00
-												vmm: Directly (de)serialise CpuManager, DeviceManager and MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-14 15:50:29 +00:00
+								            let mem_snapshot: MemoryManagerSnapshotData = snapshot
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								                .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID)
-												vmm: Directly (de)serialise CpuManager, DeviceManager and MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-14 15:50:29 +00:00
+								                .map_err(Error::Restore)?;
 								            // Here we turn the content file name into a content file path as
 								            // this will be needed to copy the content of the saved memory
 								            // region into the newly created memory region.
 								            // We simply ignore the content files that are None, as they
 								            // represent regions that have been directly saved by the user, with
 								            // no need for saving into a dedicated external file. For these
 								            // files, the VmConfig already contains the information on where to
 								            // find them.
 								            let mut saved_regions = mem_snapshot.memory_regions;
 								            for region in saved_regions.iter_mut() {
 								                if let Some(content) = &mut region.content {
 								                    let mut memory_region_path = vm_snapshot_path.clone();
 								                    memory_region_path.push(content.clone());
-												vmm: Simplify memory state to support Versionize

In order to support using Versionize for state structures it is necessary
to use simpler, primitive, data types in the state definitions used for
snapshot restore.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-23 09:55:05 +00:00
+								                    *content = memory_region_path.to_str().unwrap().to_owned();
-												vmm: memory_manager: Make backing file from snapshot optional

Let's not assume that a backing file is going to be the result from
a snapshot for each memory region. These regions might be backed by
a file on the host filesystem (not a temporary file in host RAM), which
means they don't need to be copied and stored into dedicated files.

That's why this commit prepares for further changes by introducing an
optional PathBuf associated with the snapshot of each memory region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 11:13:24 +00:00
+								                }
-												vmm: Directly (de)serialise CpuManager, DeviceManager and MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-14 15:50:29 +00:00
+								            }
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
-												vmm: Directly (de)serialise CpuManager, DeviceManager and MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-14 15:50:29 +00:00
+								            mm.lock().unwrap().fill_saved_regions(saved_regions)?;
-												vmm: memory: Restore with Copy-on-Write when possible

This patch extends the previous behavior on the restore codepath.
Instead of copying the memory regions content from the snapshot files
into the new memory regions, the VMM will use the snapshot region files
as the backing files behind each mapped region. This is done in order to
reduce the time for the VM to be restored.

When the source VM has been initially started with a backing file, this
means it has been mapped with the MAP_SHARED flag. For this case, we
cannot use the CoW trick to speed up the VM restore path and we simply
fallback onto the copy of the memory regions content.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 09:27:49 +00:00
+								        }
-												vmm: Fix clippy issue

error: all if blocks contain the same code at the end
   --> vmm/src/memory_manager.rs:884:9
    |
884 | /             Ok(mm)
885 | |         }
    | |_________^

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-09-08 18:44:57 +00:00
 								        Ok(mm)
-												vmm: Create initial VM from its snapshot

The MemoryManager is somehow a special case, as its restore() function
was not implemented as part of the Snapshottable trait. Instead, and
because restoring memory regions rely both on vm.json and every memory
region snapshot file, the memory manager is restored at creation time.
This makes the restore path slightly different from CpuManager, Vcpu,
DeviceManager and Vm, but achieve the correct restoration of the
MemoryManager along with its memory regions filled with the correct
content.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 23:48:12 +00:00
+								    }
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								    fn memfd_create(name: &ffi::CStr, flags: u32) -> Result<RawFd, io::Error> {
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								        let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };
 								        if res < 0 {
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								            Err(io::Error::last_os_error())
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								        } else {
 								            Ok(res as RawFd)
 								        }
 								    }
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								    fn mbind(
 								        addr: *mut u8,
 								        len: u64,
 								        mode: u32,
 								        nodemask: Vec<u64>,
 								        maxnode: u64,
 								        flags: u32,
 								    ) -> Result<(), io::Error> {
 								        let res = unsafe {
 								            libc::syscall(
 								                libc::SYS_mbind,
 								                addr as *mut libc::c_void,
 								                len,
 								                mode,
 								                nodemask.as_ptr(),
 								                maxnode,
 								                flags,
 								            )
 								        };
 								        if res < 0 {
 								            Err(io::Error::last_os_error())
 								        } else {
 								            Ok(())
 								        }
 								    }
-												vmm: memory_manager: Provide file offset through create_ram_region()

In anticipation for the need to map part of a file with the function
create_ram_region(), it is extended to accept a file offset as argument.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:32:44 +00:00
+								    #[allow(clippy::too_many_arguments)]
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								    fn create_ram_region(
-												vmm: memory_manager: Always copy anonymous RAM regions from disk

When restoring if a region of RAM is backed by anonymous memory i.e from
memfd_create() then copy the contents of the ram from the file that has
been saved to disk.

Previously the code would map the memory from that file into the guest
using a MAP_PRIVATE mapping. This has the effect of
minimising the restore time but provides an issue where the restored VM
does not have the same structure as the snapshotted VM, in particular
memory is backed by files in the restored VM that were anonymously
backed in the original.

This creates two problems:

* The snapshot data is mapped from files for the pages of the guest
  which prevents the storage from being reclaimed.
* When snapshotting again the guest memory will not be correctly saved
  as it will have looked like it was backed by a file so it will not be
  written to disk but as it is a MAP_PRIVATE mapping the changes will
  never be written to the disk again. This results in incorrect
  behaviour.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-10-22 14:33:34 +00:00
+								        backing_file: &Option<PathBuf>,
 								        file_offset: u64,
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								        start_addr: GuestAddress,
 								        size: usize,
-												vmm: memory: Add prefault option when creating region

When CoW can be used, the VM restoration time is reduced, but the pages
are not populated. This can lead to some slowness from the guest when
accessing these pages.

Depending on the use case, we might prefer a slower boot time for better
performances from guest runtime. The way to achieve this is to prefault
the pages in this case, using the MAP_POPULATE flag along with CoW.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 11:33:18 +00:00
+								        prefault: bool,
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								        shared: bool,
 								        hugepages: bool,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								        hugepage_size: Option<u64>,
-												vmm: Use u32 instead of u64 for host_numa_node option

Given that ACPI uses u32 as the type for the Proximity Domain, we can
use u32 instead of u64 as the type for 'host_numa_node' option.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-01 10:51:25 +00:00
+								        host_numa_node: Option<u32>,
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								    ) -> Result<Arc<GuestRegionMmap>, Error> {
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								        let (f, f_off) = match backing_file {
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								            Some(ref file) => {
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								                if file.is_dir() {
-												vmm: memory_manager: Provide file offset through create_ram_region()

In anticipation for the need to map part of a file with the function
create_ram_region(), it is extended to accept a file offset as argument.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:32:44 +00:00
+								                    // Override file offset as it does not apply in this case.
 								                    info!(
 								                        "Ignoring file offset since the backing file is a \
 								                        temporary file created from the specified directory."
 								                    );
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								                    let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								                    let fs = ffi::CString::new(fs_str).unwrap();
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								                    let mut path = fs.as_bytes_with_nul().to_owned();
 								                    let path_ptr = path.as_mut_ptr() as *mut _;
 								                    let fd = unsafe { libc::mkstemp(path_ptr) };
 								                    unsafe { libc::unlink(path_ptr) };
-												vmm: memory_manager: Don't truncate backing file

In case the provided backing file is an actual file and not a directory,
we should not truncate it, as we expect the file to already be the right
size.

This change will be important once we try to map the same file through
multiple memory mappings. We can't let the file be truncated as the
second mapping wouldn't work properly.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 08:04:02 +00:00
+								                    let f = unsafe { File::from_raw_fd(fd) };
 								                    f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
 								                    (f, 0)
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								                } else {
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								                    let f = OpenOptions::new()
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								                        .read(true)
 								                        .write(true)
 								                        .open(file)
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								                        .map_err(Error::SharedFileCreate)?;
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								                    (f, file_offset)
-												vmm: memory: Add prefault option when creating region

When CoW can be used, the VM restoration time is reduced, but the pages
are not populated. This can lead to some slowness from the guest when
accessing these pages.

Depending on the use case, we might prefer a slower boot time for better
performances from guest runtime. The way to achieve this is to prefault
the pages in this case, using the MAP_POPULATE flag along with CoW.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-07 11:33:18 +00:00
+								                }
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								            }
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								            None => {
 								                let fd = Self::memfd_create(
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								                    &ffi::CString::new("ch_ram").unwrap(),
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								                    if hugepages {
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								                        libc::MFD_HUGETLB
 								                            | if let Some(hugepage_size) = hugepage_size {
 								                                /*
 								                                 * From the Linux kernel:
 								                                 * Several system calls take a flag to request "hugetlb" huge pages.
 								                                 * Without further specification, these system calls will use the
 								                                 * system's default huge page size.  If a system supports multiple
 								                                 * huge page sizes, the desired huge page size can be specified in
 								                                 * bits [26:31] of the flag arguments.  The value in these 6 bits
 								                                 * will encode the log2 of the huge page size.
 								                                 */
 								                                hugepage_size.trailing_zeros() << 26
 								                            } else {
 								                                // Use the system default huge page size
 
 								                            }
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								                    } else {
 
 								                    },
 								                )
 								                .map_err(Error::SharedFileCreate)?;
 								                let f = unsafe { File::from_raw_fd(fd) };
 								                f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								                (f, 0)
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								            }
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								        };
 								        let mut mmap_flags = libc::MAP_NORESERVE
 								            | if shared {
 								                libc::MAP_SHARED
 								            } else {
 								                libc::MAP_PRIVATE
 								            };
 								        if prefault {
 								            mmap_flags |= libc::MAP_POPULATE;
 								        }
 								        let region = GuestRegionMmap::new(
 								            MmapRegion::build(
 								                Some(FileOffset::new(f, f_off)),
 								                size,
 								                libc::PROT_READ | libc::PROT_WRITE,
 								                mmap_flags,
 								            )
 								            .map_err(Error::GuestMemoryRegion)?,
 								            start_addr,
 								        )
 								        .map_err(Error::GuestMemory)?;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								        // Apply NUMA policy if needed.
 								        if let Some(node) = host_numa_node {
 								            let addr = region.deref().as_ptr();
 								            let len = region.deref().size() as u64;
 								            let mode = MPOL_BIND;
 								            let mut nodemask: Vec<u64> = Vec::new();
 								            let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
 								            // Linux is kind of buggy in the way it interprets maxnode as it
 								            // will cut off the last node. That's why we have to add 1 to what
 								            // we would consider as the proper maxnode value.
-												vmm: Use u32 instead of u64 for host_numa_node option

Given that ACPI uses u32 as the type for the Proximity Domain, we can
use u32 instead of u64 as the type for 'host_numa_node' option.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-01 10:51:25 +00:00
+								            let maxnode = node as u64 + 1 + 1;
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
 								            // Allocate the right size for the vector.
 								            nodemask.resize((node as usize / 64) + 1, 0);
 								            // Fill the global bitmask through the nodemask vector.
 								            let idx = (node / 64) as usize;
 								            let shift = node % 64;
 								            nodemask[idx] |= 1u64 << shift;
 								            // Policies are enforced by using MPOL_MF_MOVE flag as it will
 								            // force the kernel to move all pages that might have been already
 								            // allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
 								            // used to throw an error if MPOL_MF_MOVE didn't succeed.
 								            // MPOL_BIND is the selected mode as it specifies a strict policy
 								            // that restricts memory allocation to the nodes specified in the
 								            // nodemask.
 								            Self::mbind(addr, len, mode, nodemask, maxnode, flags)
 								                .map_err(Error::ApplyNumaPolicy)?;
 								        }
-												vmm: memory_manager: Simplify how to restore memory regions

By factorizing a lot of code into create_ram_region(), this commit
achieves the simplification of the restore codepath. Additionally, it
makes user defined memory zones compatible with snapshot/restore.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 17:34:28 +00:00
+								        Ok(Arc::new(region))
-												vmm: memory_manager: Further refactor memory region allocation

This allows the memory regions to be allocated later which is necessary
for hotplug memory.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-06 17:12:03 +00:00
+								    }
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								    // Update the GuestMemoryMmap with the new range
 								    fn add_region(&mut self, region: Arc<GuestRegionMmap>) -> Result<(), Error> {
 								        let guest_memory = self
 								            .guest_memory
 								            .memory()
 								            .insert_region(region)
 								            .map_err(Error::GuestMemory)?;
 								        self.guest_memory.lock().unwrap().replace(guest_memory);
 								        Ok(())
 								    }
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								    //
 								    // Calculate the start address of an area next to RAM.
 								    //
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								    // If memory hotplug is allowed, the start address needs to be aligned
 								    // (rounded-up) to 128MiB boundary.
 								    // If memory hotplug is not allowed, there is no alignment required.
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								    // On x86_64, it must also start at the 64bit start.
-												vmm: Fix AArch64 clippy warnings of vmm and other crates

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-08-23 07:45:44 +00:00
+								    #[allow(clippy::let_and_return)]
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								    fn start_addr(mem_end: GuestAddress, allow_mem_hotplug: bool) -> Result<GuestAddress, Error> {
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								        let mut start_addr = if allow_mem_hotplug {
 								            GuestAddress(mem_end.0 | ((128 << 20) - 1))
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								        } else {
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
+								            mem_end
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								        };
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								        start_addr = start_addr
 								            .checked_add(1)
 								            .ok_or(Error::GuestAddressOverFlow)?;
-												vmm: Remove reserved 256M gaps for hotplugging memory with ACPI

We are now reserving a 256M gap in the guest address space each time
when hotplugging memory with ACPI, which prevents users from hotplugging
memory to the maximum size they requested. We confirm that there is no
need to reserve this gap.

This patch removes the 'reserved gaps'. It also refactors the
'MemoryManager::start_addr' so that it is rounding-up to 128M alignment
when hotplugged memory is allowed with ACPI.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-08 04:58:43 +00:00
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								        #[cfg(target_arch = "x86_64")]
-												vmm: memory_manager: Simplify start_addr()

Small simplification for the function calculating the start address.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-17 12:49:45 +00:00
+								        if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								            return Ok(arch::layout::RAM_64BIT_START);
-												vmm: memory_manager: Simplify start_addr()

Small simplification for the function calculating the start address.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-17 12:49:45 +00:00
+								        }
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
-												vmm: Replace the use of 'unchecked_add' with 'checked_add'

The 'GuestAddress::unchecked_add' function has undefined behavior when
an overflow occurs. Its alternative 'checked_add' requires use to handle
the overflow explicitly.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-12 22:49:07 +00:00
+								        Ok(start_addr)
-												build: Fixed build errors and warnings on AArch64

This is a preparing commit to build and test CH on AArch64. All building
issues were fixed, but no functionality was introduced.
For X86, the logic of code was not changed at all.
For ARM, the architecture specific part is still empty. And we applied
some tricks to workaround lint warnings. But such code will be replaced
later by other commits with real functionality.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>

											
										
										
											2020-05-12 09:49:12 +00:00
+								    }
-												vmm: memory_manager: Extract code for allocating new memory

This function can then be used by the TDX code to allocate the memory at
specific locations required for the TDVF to run from.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-15 14:00:27 +00:00
+								    pub fn add_ram_region(
 								        &mut self,
 								        start_addr: GuestAddress,
 								        size: usize,
 								    ) -> Result<Arc<GuestRegionMmap>, Error> {
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        // Allocate memory for the region
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								        let region = MemoryManager::create_ram_region(
-												vmm: Remove 'file' option from MemoryConfig

After the introduction of user defined memory zones, we can now remove
the deprecated 'file' option from --memory parameter. This makes this
parameter simpler, letting more advanced users define their own custom
memory zones through the dedicated parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-24 09:09:01 +00:00
+								            &None,
-												vmm: memory_manager: Provide file offset through create_ram_region()

In anticipation for the need to map part of a file with the function
create_ram_region(), it is extended to accept a file offset as argument.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:32:44 +00:00
+,
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								            start_addr,
 								            size,
 								            false,
 								            self.shared,
 								            self.hugepages,
-												vmm: Support configurable huge pages in MemoryManager

Use the newly added hugepages_size option if provided by the user to
pick a huge page size when creating the memfd region. If none is
specified use the system default.

Sadly different huge pages cannot be tested by an integration test as
creating a pool of the non-default size cannot be done at runtime
(requires kernel to be booted with certain parameters.)

TETS=Manually tested with a kernel booted with both 1GiB and 2MiB huge
pages (hugepagesz=1G hugepages=1 hugepagesz=2M hugepages=512)

Fixes: #2230

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-04 16:50:37 +00:00
+								            self.hugepage_size,
-												vmm: memory_manager: Apply NUMA policy to memory zones

Relying on the new option 'host_numa_node' from the 'memory-zone'
parameter, the user can now define which NUMA node from the host
should be used to back the current memory zone.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-25 16:02:34 +00:00
+								            None,
-												vmm: Add the 'shared' and 'hugepages' controls to MemoryConfig

The new 'shared' and 'hugepages' controls aim to replace the 'file'
option in MemoryConfig. This patch also updated all related integration
tests to use the new controls (instead of providing explicit paths to
"/dev/shm" or "/dev/hugepages").

Fixes: #1011

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 21:20:17 +00:00
+								        )?;
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
 								        // Map it into the guest
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								        let slot = self.create_userspace_mapping(
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								            region.start_addr().0,
 								            region.len() as u64,
 								            region.as_ptr() as u64,
 								            self.mergeable,
-												vmm: Add "readonly" parameter MemoryManager::create_userspace_mapping

Use this boolean to turn on the KVM_MEM_READONLY flag to indicate that
this memory mapping should not be writable by the VM.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-03-19 10:00:09 +00:00
+								            false,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								            self.log_dirty,
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        )?;
-												memory_manager: Generate a MemoryRangeTable for dirty ranges

In order to do this we must extend the MemoryManager API to add the
ability to specify the tracking of the dirty pages when creating the
userspace mappings and also keep track of the userspace mappings that
have been created for RAM regions.

Currently the dirty pages are collected into ranges based on a block
level of 64 pages. The algorithm could be tweaked to create smaller
ranges but for now if any page in the block of 64 is dirty the whole
block is added to the range.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-12 14:53:57 +00:00
+								        self.guest_ram_mappings.push(GuestRamMapping {
 								            gpa: region.start_addr().raw_value(),
 								            size: region.len(),
 								            slot,
 								        });
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
-												vmm: memory_manager: Extract code for allocating new memory

This function can then be used by the TDX code to allocate the memory at
specific locations required for the TDVF to run from.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-15 14:00:27 +00:00
+								        self.add_region(Arc::clone(&region))?;
 								        Ok(region)
 								    }
 								    fn hotplug_ram_region(&mut self, size: usize) -> Result<Arc<GuestRegionMmap>, Error> {
 								        info!("Hotplugging new RAM: {}", size);
 								        // Check that there is a free slot
 								        if self.next_hotplug_slot >= HOTPLUG_COUNT {
 								            return Err(Error::NoSlotAvailable);
 								        }
 								        // "Inserted" DIMM must have a size that is a multiple of 128MiB
 								        if size % (128 << 20) != 0 {
 								            return Err(Error::InvalidSize);
 								        }
 								        let start_addr = MemoryManager::start_addr(self.guest_memory.memory().last_addr(), true)?;
 								        if start_addr.checked_add(size.try_into().unwrap()).unwrap() > self.start_of_device_area() {
-												vmm: Address Rust 1.51.0 clippy issue (upper_case_acroynms)

warning: name `LocalAPIC` contains a capitalized acronym
   --> vmm/src/cpu.rs:197:8
    |
197 | struct LocalAPIC {
    |        ^^^^^^^^^ help: consider making the acronym lowercase, except the initial letter: `LocalApic`
    |
    = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 17:01:21 +00:00
+								            return Err(Error::InsufficientHotplugRam);
-												vmm: memory_manager: Extract code for allocating new memory

This function can then be used by the TDX code to allocate the memory at
specific locations required for the TDVF to run from.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-02-15 14:00:27 +00:00
+								        }
 								        let region = self.add_ram_region(start_addr, size)?;
-												vmm: memory_manager: Add ACPI hotplug region to default memory zone

When memory is resized through ACPI, a new region is added to the guest
memory. This region must also be added to the corresponding memory zone
in order to keep everything in sync.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-03-03 10:58:54 +00:00
+								        // Add region to the list of regions associated with the default
 								        // memory zone.
 								        if let Some(memory_zone) = self.memory_zones.get_mut(DEFAULT_MEMORY_ZONE) {
 								            memory_zone.regions.push(Arc::clone(&region));
 								        }
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        // Tell the allocator
 								        self.allocator
 								            .lock()
 								            .unwrap()
 								            .allocate_mmio_addresses(Some(start_addr), size as GuestUsize, None)
 								            .ok_or(Error::MemoryRangeAllocation)?;
-												Revert "vmm: Move MemoryManager from I/O ports to MMIO region"

This reverts commit 03108fb88b0ae1afd9b6b0f65f92e4b424c89c3e.

											
										
										
											2020-01-24 10:36:39 +00:00
+								        // Update the slot so that it can be queried via the I/O port
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        let mut slot = &mut self.hotplug_slots[self.next_hotplug_slot];
 								        slot.active = true;
-												vmm: memory_manager: Expose the slots details via an I/O port

Expose the details of hotplug RAM slots via an I/O port. This will be
consumed by the ACPI DSDT tables to report the hotplug memory details to
the guest.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:07:34 +00:00
+								        slot.inserting = true;
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        slot.base = region.start_addr().0;
 								        slot.length = region.len() as u64;
 								        self.next_hotplug_slot += 1;
-												vmm: Retrieve new memory region when memory is extended

Whenever the memory is resized, it's important to retrieve the new
region to pass it down to the device manager, this way it can decide
what to do with it.

Also, there's no need to use a boolean as we can instead use an Option
to carry the information about the region. In case of virtio-mem, there
will be no region since the whole memory has been reserved up front by
the VMM at boot. This means only the ACPI hotplug will return a region
and is the only method that requires the memory to be updated from the
device manager.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-03-26 13:36:15 +00:00
+								        Ok(region)
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								    }
-												vmm: Move codebase to GuestMemoryAtomic from vm-memory

Relying on the latest vm-memory version, including the freshly
introduced structure GuestMemoryAtomic, this patch replaces every
occurrence of Arc<ArcSwap<GuestMemoryMmap> with
GuestMemoryAtomic<GuestMemoryMmap>.

The point is to rely on the common RCU-like implementation from
vm-memory so that we don't have to do it from Cloud-Hypervisor.

Fixes #735

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-02-11 16:22:40 +00:00
+								    pub fn guest_memory(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								        self.guest_memory.clone()
 								    }
-												vmm: Store boot guest memory and use it for boot sequence

In order to differentiate the 'boot' memory regions from the virtio-mem
regions, we store what we call 'boot_guest_memory'. This is useful to
provide the adequate list of regions to the configure_system() function
as it expects only the list of regions that should be exposed through
the e820 table.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:24:42 +00:00
+								    pub fn boot_guest_memory(&self) -> GuestMemoryMmap {
 								        self.boot_guest_memory.clone()
 								    }
-												vmm: memory_manager: Own the system allocator

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-03-16 17:58:23 +00:00
+								    pub fn allocator(&self) -> Arc<Mutex<SystemAllocator>> {
 								        self.allocator.clone()
 								    }
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								    pub fn start_of_device_area(&self) -> GuestAddress {
 								        self.start_of_device_area
 								    }
 								    pub fn end_of_device_area(&self) -> GuestAddress {
 								        self.end_of_device_area
 								    }
-												vmm: Obtain sequential KVM memory slot numbers from MemoryManager

This removes the need to handle a mutable integer and also centralises
the allocation of these slot numbers.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 15:17:49 +00:00
-												vmm: drop "kvm" from memory slot code

The code is purely for maintaining an internal counter. It is not really
tied to KVM.

No functional change.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:08:52 +00:00
+								    pub fn allocate_memory_slot(&mut self) -> u32 {
 								        let slot_id = self.next_memory_slot;
 								        self.next_memory_slot += 1;
-												vmm: Obtain sequential KVM memory slot numbers from MemoryManager

This removes the need to handle a mutable integer and also centralises
the allocation of these slot numbers.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 15:17:49 +00:00
+								        slot_id
 								    }
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
 								    pub fn create_userspace_mapping(
 								        &mut self,
 								        guest_phys_addr: u64,
 								        memory_size: u64,
 								        userspace_addr: u64,
 								        mergeable: bool,
-												vmm: Add "readonly" parameter MemoryManager::create_userspace_mapping

Use this boolean to turn on the KVM_MEM_READONLY flag to indicate that
this memory mapping should not be writable by the VM.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-03-19 10:00:09 +00:00
+								        readonly: bool,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								        log_dirty: bool,
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								    ) -> Result<u32, Error> {
-												vmm: drop "kvm" from memory slot code

The code is purely for maintaining an internal counter. It is not really
tied to KVM.

No functional change.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:08:52 +00:00
+								        let slot = self.allocate_memory_slot();
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								        let mem_region = self.vm.make_user_memory_region(
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								            slot,
 								            guest_phys_addr,
 								            memory_size,
 								            userspace_addr,
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								            readonly,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								            log_dirty,
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								        );
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								        self.vm
-												vmm: hypervisor: split set_user_memory_region to two functions

Previously the same function was used to both create and remove regions.
This worked on KVM because it uses size 0 to indicate removal.

MSHV has two calls -- one for creation and one for removal. It also
requires having the size field available because it is not slot based.

Split set_user_memory_region to {create/remove}_user_memory_region. For
KVM they still use set_user_memory_region underneath, but for MSHV they
map to different functions.

This fixes user memory region removal on MSHV.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2021-07-03 13:58:39 +00:00
+								            .create_user_memory_region(mem_region)
 								            .map_err(Error::CreateUserMemoryRegion)?;
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
 								        // Mark the pages as mergeable if explicitly asked for.
 								        if mergeable {
 								            // Safe because the address and size are valid since the
 								            // mmap succeeded.
 								            let ret = unsafe {
 								                libc::madvise(
 								                    userspace_addr as *mut libc::c_void,
 								                    memory_size as libc::size_t,
 								                    libc::MADV_MERGEABLE,
 								                )
 								            };
 								            if ret != 0 {
 								                let err = io::Error::last_os_error();
 								                // Safe to unwrap because the error is constructed with
 								                // last_os_error(), which ensures the output will be Some().
 								                let errno = err.raw_os_error().unwrap();
 								                if errno == libc::EINVAL {
 								                    warn!("kernel not configured with CONFIG_KSM");
 								                } else {
 								                    warn!("madvise error: {}", err);
 								                }
 								                warn!("failed to mark pages as mergeable");
 								            }
 								        }
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        info!(
 								            "Created userspace mapping: {:x} -> {:x} {:x}",
 								            guest_phys_addr, userspace_addr, memory_size
 								        );
-												vmm: memory_manager: Refactor KVM userspace mapping creation

This function will be useful for other parts of the VMM that also
estabilish their own mappings.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-20 16:10:27 +00:00
+								        Ok(slot)
 								    }
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
-												vmm: Extend MemoryManager to remove userspace mappings

The same way we added a helper for creating userspace memory mappings
from the MemoryManager, this patch adds a new helper to remove some
previously added mappings.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-20 15:37:00 +00:00
+								    pub fn remove_userspace_mapping(
 								        &mut self,
 								        guest_phys_addr: u64,
 								        memory_size: u64,
 								        userspace_addr: u64,
 								        mergeable: bool,
 								        slot: u32,
 								    ) -> Result<(), Error> {
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								        let mem_region = self.vm.make_user_memory_region(
-												vmm: Extend MemoryManager to remove userspace mappings

The same way we added a helper for creating userspace memory mappings
from the MemoryManager, this patch adds a new helper to remove some
previously added mappings.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-20 15:37:00 +00:00
+								            slot,
 								            guest_phys_addr,
-												vmm: hypervisor: split set_user_memory_region to two functions

Previously the same function was used to both create and remove regions.
This worked on KVM because it uses size 0 to indicate removal.

MSHV has two calls -- one for creation and one for removal. It also
requires having the size field available because it is not slot based.

Split set_user_memory_region to {create/remove}_user_memory_region. For
KVM they still use set_user_memory_region underneath, but for MSHV they
map to different functions.

This fixes user memory region removal on MSHV.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2021-07-03 13:58:39 +00:00
+								            memory_size,
-												vmm: Extend MemoryManager to remove userspace mappings

The same way we added a helper for creating userspace memory mappings
from the MemoryManager, this patch adds a new helper to remove some
previously added mappings.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-20 15:37:00 +00:00
+								            userspace_addr,
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								            false, /* readonly -- don't care */
-												hypervisor: Add control for dirty page logging

When creating a userspace mapping provide a control for enabling the
logging of dirty pages.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-11-11 18:16:39 +00:00
+								            false, /* log dirty */
-												vmm, hypervisor: introduce and use make_user_memory_region

This removes the last KVM-ism from memory_manager. Also make use of that
method in other places.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2020-07-04 11:38:17 +00:00
+								        );
-												vmm: Extend MemoryManager to remove userspace mappings

The same way we added a helper for creating userspace memory mappings
from the MemoryManager, this patch adds a new helper to remove some
previously added mappings.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-20 15:37:00 +00:00
-												vmm: memory_manager: Rename fd variable into something more meaningful

The fd naming is quite KVM specific. Since we're now using the
hypervisor crate abstractions, we can rename those into something more
readable and meaningful.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-07-03 09:13:40 +00:00
+								        self.vm
-												vmm: hypervisor: split set_user_memory_region to two functions

Previously the same function was used to both create and remove regions.
This worked on KVM because it uses size 0 to indicate removal.

MSHV has two calls -- one for creation and one for removal. It also
requires having the size field available because it is not slot based.

Split set_user_memory_region to {create/remove}_user_memory_region. For
KVM they still use set_user_memory_region underneath, but for MSHV they
map to different functions.

This fixes user memory region removal on MSHV.

Signed-off-by: Wei Liu <liuwe@microsoft.com>

											
										
										
											2021-07-03 13:58:39 +00:00
+								            .remove_user_memory_region(mem_region)
 								            .map_err(Error::RemoveUserMemoryRegion)?;
-												vmm: Extend MemoryManager to remove userspace mappings

The same way we added a helper for creating userspace memory mappings
from the MemoryManager, this patch adds a new helper to remove some
previously added mappings.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-04-20 15:37:00 +00:00
 								        // Mark the pages as unmergeable if there were previously marked as
 								        // mergeable.
 								        if mergeable {
 								            // Safe because the address and size are valid as the region was
 								            // previously advised.
 								            let ret = unsafe {
 								                libc::madvise(
 								                    userspace_addr as *mut libc::c_void,
 								                    memory_size as libc::size_t,
 								                    libc::MADV_UNMERGEABLE,
 								                )
 								            };
 								            if ret != 0 {
 								                let err = io::Error::last_os_error();
 								                // Safe to unwrap because the error is constructed with
 								                // last_os_error(), which ensures the output will be Some().
 								                let errno = err.raw_os_error().unwrap();
 								                if errno == libc::EINVAL {
 								                    warn!("kernel not configured with CONFIG_KSM");
 								                } else {
 								                    warn!("madvise error: {}", err);
 								                }
 								                warn!("failed to mark pages as unmergeable");
 								            }
 								        }
 								        info!(
 								            "Removed userspace mapping: {:x} -> {:x} {:x}",
 								            guest_phys_addr, userspace_addr, memory_size
 								        );
 								        Ok(())
 								    }
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								    pub fn virtio_mem_resize(&mut self, id: &str, size: u64) -> Result<(), Error> {
 								        if let Some(memory_zone) = self.memory_zones.get_mut(id) {
-												vmm: Group virtio-mem fields under a dedicated structure

This patch simplifies the code as we have one single Option for the
VirtioMemZone. This also prepares for storing additional information
related to the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 20:24:36 +00:00
+								            if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
 								                virtio_mem_zone
 								                    .resize_handler()
 								                    .work(size)
 								                    .map_err(Error::VirtioMemResizeFail)?;
-												vmm: Prepare code to accept multiple virtio-mem devices

Both MemoryManager and DeviceManager are updated through this commit to
handle the creation of multiple virtio-mem devices if needed. For now,
only the framework is in place, but the behavior remains the same, which
means only the memory zone created from '--memory' generates a
virtio-mem region that can be used for resize.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 09:28:21 +00:00
+								            } else {
-												vmm: memory_manager: Make virtiomem_resize function generic

By adding a new parameter 'id' to the virtiomem_resize() function, we
prepare this function to be usable for both global memory resizing and
memory zone resizing.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 16:24:06 +00:00
+								                error!("Failed resizing virtio-mem region: No virtio-mem handler");
 								                return Err(Error::MissingVirtioMemHandler);
 								            }
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								            return Ok(());
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								        }
-												vmm: memory_manager: Fix virtio-mem resize

It's important to return the region covered by virtio-mem the first time
it is inserted as the device manager must update all devices with this
information.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 15:42:55 +00:00
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								        error!("Failed resizing virtio-mem region: Unknown memory zone");
 								        Err(Error::UnknownMemoryZone)
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								    }
-												vmm: Retrieve new memory region when memory is extended

Whenever the memory is resized, it's important to retrieve the new
region to pass it down to the device manager, this way it can decide
what to do with it.

Also, there's no need to use a boolean as we can instead use an Option
to carry the information about the region. In case of virtio-mem, there
will be no region since the whole memory has been reserved up front by
the VMM at boot. This means only the ACPI hotplug will return a region
and is the only method that requires the memory to be updated from the
device manager.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-03-26 13:36:15 +00:00
+								    /// In case this function resulted in adding a new memory region to the
 								    /// guest memory, the new region is returned to the caller. The virtio-mem
 								    /// use case never adds a new region as the whole hotpluggable memory has
 								    /// already been allocated at boot time.
 								    pub fn resize(&mut self, desired_ram: u64) -> Result<Option<Arc<GuestRegionMmap>>, Error> {
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								        if self.user_provided_zones {
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								            error!(
 								                "Not allowed to resize guest memory when backed with user \
-												vmm: Add a 'resize-zone' action to the API actions

Implement a new VM action called 'resize-zone' allowing the user to
resize one specific memory zone at a time. This relies on all the
preliminary work from the previous commits to resize each virtio-mem
device independently from each others.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 15:34:15 +00:00
+								                defined memory zones."
-												vmm: memory_manager: Handle user defined memory zones

In case the memory size is 0, this means the user defined memory
zones are used as a way to specify how to back the guest memory.

This is the first step in supporting complex use cases where the user
can define exactly which type of memory from the host should back the
memory from the guest.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-18 15:17:15 +00:00
+								            );
 								            return Err(Error::InvalidResizeWithMemoryZones);
 								        }
-												vmm: Retrieve new memory region when memory is extended

Whenever the memory is resized, it's important to retrieve the new
region to pass it down to the device manager, this way it can decide
what to do with it.

Also, there's no need to use a boolean as we can instead use an Option
to carry the information about the region. In case of virtio-mem, there
will be no region since the whole memory has been reserved up front by
the VMM at boot. This means only the ACPI hotplug will return a region
and is the only method that requires the memory to be updated from the
device manager.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-03-26 13:36:15 +00:00
+								        let mut region: Option<Arc<GuestRegionMmap>> = None;
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								        match self.hotplug_method {
 								            HotplugMethod::VirtioMem => {
 								                if desired_ram >= self.boot_ram {
-												vmm: Always add virtio-mem region upon VM creation

Now that e820 tables are created from the 'boot_guest_memory', we can
simplify the memory manager code by adding the virtio-mem regions when
they are created. There's no need to wait for the first hotplug to
insert these regions.

This also anticipates the need for starting a VM with some memory
already plugged into the virtio-mem region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 15:48:13 +00:00
+								                    self.virtio_mem_resize(DEFAULT_MEMORY_ZONE, desired_ram - self.boot_ram)?;
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								                    self.current_ram = desired_ram;
 								                }
 								            }
 								            HotplugMethod::Acpi => {
-												vmm: Report no error when resizing to current memory size with ACPI

We now try to create a ram region of size 0 when the requested memory
size is the same as current memory size. It results in an error of
`GuestMemoryRegion(Mmap(Os { code: 22, kind: InvalidInput, message:
"Invalid argument" }))`. This error is not meaningful to users and we
should not report it.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-10-09 23:26:58 +00:00
+								                if desired_ram > self.current_ram {
-												vmm: Retrieve new memory region when memory is extended

Whenever the memory is resized, it's important to retrieve the new
region to pass it down to the device manager, this way it can decide
what to do with it.

Also, there's no need to use a boolean as we can instead use an Option
to carry the information about the region. In case of virtio-mem, there
will be no region since the whole memory has been reserved up front by
the VMM at boot. This means only the ACPI hotplug will return a region
and is the only method that requires the memory to be updated from the
device manager.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-03-26 13:36:15 +00:00
+								                    region =
 								                        Some(self.hotplug_ram_region((desired_ram - self.current_ram) as usize)?);
-												vmm: Add support for virtio-mem

This commit adds new option hotplug_method to memory config.
It can set the hotplug method to "acpi" or "virtio-mem".

Signed-off-by: Hui Zhu <teawater@antfin.com>

											
										
										
											2020-03-04 02:16:07 +00:00
+								                    self.current_ram = desired_ram;
 								                }
 								            }
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								        }
-												vmm: Retrieve new memory region when memory is extended

Whenever the memory is resized, it's important to retrieve the new
region to pass it down to the device manager, this way it can decide
what to do with it.

Also, there's no need to use a boolean as we can instead use an Option
to carry the information about the region. In case of virtio-mem, there
will be no region since the whole memory has been reserved up front by
the VMM at boot. This means only the ACPI hotplug will return a region
and is the only method that requires the memory to be updated from the
device manager.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-03-26 13:36:15 +00:00
+								        Ok(region)
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								    }
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
-												vmm: Update memory configuration upon virtio-mem resizing

Based on all the preparatory work achieved through previous commits,
this patch updates the 'hotplugged_size' field for both MemoryConfig and
MemoryZoneConfig structures when either the whole memory is resized, or
simply when a memory zone is resized.

This fixes the lack of support for rebooting a VM with the right amount
of memory plugged in.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 21:50:30 +00:00
+								    pub fn resize_zone(&mut self, id: &str, virtio_mem_size: u64) -> Result<(), Error> {
-												vmm: memory_manager: Rename 'use_zones' with 'user_provided_zones'

This brings more clarity on the meaning of this boolean.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-11 12:09:00 +00:00
+								        if !self.user_provided_zones {
-												vmm: Add a 'resize-zone' action to the API actions

Implement a new VM action called 'resize-zone' allowing the user to
resize one specific memory zone at a time. This relies on all the
preliminary work from the previous commits to resize each virtio-mem
device independently from each others.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 15:34:15 +00:00
+								            error!(
 								                "Not allowed to resize guest memory zone when no zone is \
 								                defined."
 								            );
 								            return Err(Error::ResizeZone);
 								        }
-												vmm: Update memory configuration upon virtio-mem resizing

Based on all the preparatory work achieved through previous commits,
this patch updates the 'hotplugged_size' field for both MemoryConfig and
MemoryZoneConfig structures when either the whole memory is resized, or
simply when a memory zone is resized.

This fixes the lack of support for rebooting a VM with the right amount
of memory plugged in.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-14 21:50:30 +00:00
+								        self.virtio_mem_resize(id, virtio_mem_size)
-												vmm: Add a 'resize-zone' action to the API actions

Implement a new VM action called 'resize-zone' allowing the user to
resize one specific memory zone at a time. This relies on all the
preliminary work from the previous commits to resize each virtio-mem
device independently from each others.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-10 15:34:15 +00:00
+								    }
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								    #[cfg(target_arch = "x86_64")]
-												vmm: Enable provisioning for SGX guest

The guest can see that SGX supports provisioning as it is exposed
through the CPUID. This patch enables the proper backing of this
feature by having the host open the provisioning device and enable
this capability through the hypervisor.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-07-07 10:19:26 +00:00
+								    pub fn setup_sgx(
 								        &mut self,
 								        sgx_epc_config: Vec<SgxEpcConfig>,
 								        vm: &Arc<dyn hypervisor::Vm>,
 								    ) -> Result<(), Error> {
 								        let file = OpenOptions::new()
 								            .read(true)
 								            .open("/dev/sgx_provision")
 								            .map_err(Error::SgxProvisionOpen)?;
 								        vm.enable_sgx_attribute(file)
 								            .map_err(Error::SgxEnableProvisioning)?;
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								        // Go over each EPC section and verify its size is a 4k multiple. At
 								        // the same time, calculate the total size needed for the contiguous
 								        // EPC region.
 								        let mut epc_region_size = 0;
 								        for epc_section in sgx_epc_config.iter() {
 								            if epc_section.size == 0 {
 								                return Err(Error::EpcSectionSizeInvalid);
 								            }
 								            if epc_section.size & 0x0fff != 0 {
 								                return Err(Error::EpcSectionSizeInvalid);
 								            }
 								            epc_region_size += epc_section.size;
 								        }
 								        // Now that we know about the total size for the EPC region, we can
 								        // proceed with the allocation of the entire range. The EPC region
 								        // must be 4kiB aligned.
 								        let epc_region_start = self
 								            .allocator
 								            .lock()
 								            .unwrap()
 								            .allocate_mmio_addresses(None, epc_region_size as GuestUsize, Some(0x1000))
 								            .ok_or(Error::SgxEpcRangeAllocation)?;
 								        let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize);
 								        // Each section can be memory mapped into the allocated region.
 								        let mut epc_section_start = epc_region_start.raw_value();
 								        for epc_section in sgx_epc_config.iter() {
 								            let file = OpenOptions::new()
 								                .read(true)
 								                .write(true)
-												sgx: update virt EPC device path and docs

The latest kvm-sgx code has renamed sgx_virt_epc device node
to sgx_vepc. Update cloud-hypervisor code and documentation to
follow this.

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>

											
										
										
											2021-04-29 11:52:16 +00:00
+								                .open("/dev/sgx_vepc")
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								                .map_err(Error::SgxVirtEpcOpen)?;
 								            let prot = PROT_READ | PROT_WRITE;
 								            let mut flags = MAP_NORESERVE | MAP_SHARED;
 								            if epc_section.prefault {
 								                flags |= MAP_POPULATE;
 								            }
 								            // We can't use the vm-memory crate to perform the memory mapping
 								            // here as it would try to ensure the size of the backing file is
-												sgx: update virt EPC device path and docs

The latest kvm-sgx code has renamed sgx_virt_epc device node
to sgx_vepc. Update cloud-hypervisor code and documentation to
follow this.

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>

											
										
										
											2021-04-29 11:52:16 +00:00
+								            // matching the size of the expected mapping. The /dev/sgx_vepc
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								            // device does not work that way, it provides a file descriptor
 								            // which is not matching the mapping size, as it's a just a way to
 								            // let KVM know that an EPC section is being created for the guest.
 								            let host_addr = unsafe {
 								                libc::mmap(
 								                    std::ptr::null_mut(),
 								                    epc_section.size as usize,
 								                    prot,
 								                    flags,
 								                    file.as_raw_fd(),
-												misc: Remove unnecessary literal casts

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-02 20:37:31 +00:00
+,
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								                )
 								            } as u64;
 								            let _mem_slot = self.create_userspace_mapping(
 								                epc_section_start,
 								                epc_section.size,
 								                host_addr,
 								                false,
 								                false,
-												vmm: Create guest memory regions with explicit dirty-pages-log flags

As we are now using an global control to start/stop dirty pages log from
the `hypervisor` crate, we need to explicitly tell the hypervisor (KVM)
whether a region needs dirty page tracking when it is created.

This reverts commit f063346de303a19cb3add35aafe4b3ffc24adf6c.

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-07-26 18:30:01 +00:00
+								                false,
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
+								            )?;
-												sgx: Add mandatory `id` field to SgxEpcConfig

In order to uniquely identify each SGX EPC section, we introduce a
mandatory option `id` to the `--sgx-epc` parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-07-09 08:48:32 +00:00
+								            sgx_epc_region.insert(
 								                epc_section.id.clone(),
 								                SgxEpcSection::new(
 								                    GuestAddress(epc_section_start),
 								                    epc_section.size as GuestUsize,
 								                ),
 								            );
-												arch, vmm: Create SGX virtual EPC sections from MemoryManager

Based on the presence of one or multiple SGX EPC sections from the VM
configuration, the MemoryManager will allocate a contiguous block of
guest address space to hold the entire EPC region. Within this EPC
region, each EPC section is memory mapped.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 09:53:28 +00:00
 								            epc_section_start += epc_section.size;
 								        }
 								        self.sgx_epc_region = Some(sgx_epc_region);
 								        Ok(())
 								    }
 								    #[cfg(target_arch = "x86_64")]
 								    pub fn sgx_epc_region(&self) -> &Option<SgxEpcRegion> {
 								        &self.sgx_epc_region
 								    }
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
 								    pub fn is_hardlink(f: &File) -> bool {
 								        let mut stat = std::mem::MaybeUninit::<libc::stat>::uninit();
 								        let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) };
 								        if ret != 0 {
 								            error!("Couldn't fstat the backing file");
 								            return false;
 								        }
 								        unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
 								    }
-												vmm: memory_manager: Create a NUMA node list

Based on the 'guest_numa_node' option, we create and store a list of
NUMA nodes in the MemoryManager. The point being to associate a list of
memory regions to each node, so that we can later create the ACPI tables
with the proper memory range information.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-27 12:16:50 +00:00
-												vmm: memory_manager: Store a list of memory zones

Now that we have an identifier per memory zone, and in order to keep
track of the memory regions associated with the memory zones, we create
and store a map referencing list of memory regions per memory zone ID.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-09-04 07:22:16 +00:00
+								    pub fn memory_zones(&self) -> &MemoryZones {
 								        &self.memory_zones
 								    }
-												vmm: memory_manger: Add support for adding new memory to the VM

Add a "resize()" method on MemoryManager which will create a new memory
allocation based on the difference between the desired RAM amount and
the amount already in use. After allocating the added RAM using the same
backing method as the boot RAM store the details in a vector and update
the KVM map and create a new GuestMemoryMmap and replace all the users.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:02:19 +00:00
+								}
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
-												ci: Run clippy for each specific feature

The build is run against "--all-features", "pci,acpi", "pci" and "mmio"
separately. The clippy validation must be run against the same set of
features in order to validate the code is correct.

Because of these new checks, this commit includes multiple fixes
related to the errors generated when manually running the checks.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-01-21 08:32:50 +00:00
+								#[cfg(feature = "acpi")]
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								struct MemoryNotify {
 								    slot_id: usize,
 								}
 								#[cfg(feature = "acpi")]
 								impl Aml for MemoryNotify {
 								    fn to_aml_bytes(&self) -> Vec<u8> {
 								        let object = aml::Path::new(&format!("M{:03}", self.slot_id));
 								        aml::If::new(
 								            &aml::Equal::new(&aml::Arg(0), &self.slot_id),
 								            vec![&aml::Notify::new(&object, &aml::Arg(1))],
 								        )
 								        .to_aml_bytes()
 								    }
 								}
-												ci: Run clippy for each specific feature

The build is run against "--all-features", "pci,acpi", "pci" and "mmio"
separately. The clippy validation must be run against the same set of
features in order to validate the code is correct.

Because of these new checks, this commit includes multiple fixes
related to the errors generated when manually running the checks.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-01-21 08:32:50 +00:00
+								#[cfg(feature = "acpi")]
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								struct MemorySlot {
 								    slot_id: usize,
 								}
 								#[cfg(feature = "acpi")]
 								impl Aml for MemorySlot {
 								    fn to_aml_bytes(&self) -> Vec<u8> {
 								        aml::Device::new(
 								            format!("M{:03}", self.slot_id).as_str().into(),
 								            vec![
-												acpi_tables: Address Rust 1.51.0 clippy issue (upper_case_acronyms)

error: name `SDT` contains a capitalized acronym
  --> acpi_tables/src/sdt.rs:27:12
   |
27 | pub struct SDT {
   |            ^^^ help: consider making the acronym lowercase, except the initial letter: `Sdt`
   |
   = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 15:57:27 +00:00
+								                &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C80")),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                &aml::Name::new("_UID".into(), &self.slot_id),
 								                /*
 								                _STA return value:
 								                Bit [0] – Set if the device is present.
 								                Bit [1] – Set if the device is enabled and decoding its resources.
 								                Bit [2] – Set if the device should be shown in the UI.
 								                Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics).
 								                Bit [4] – Set if the battery is present.
 								                Bits [31:5] – Reserved (must be cleared).
 								                */
 								                &aml::Method::new(
 								                    "_STA".into(),
 ,
 								                    false,
 								                    // Call into MSTA method which will interrogate device
 								                    vec![&aml::Return::new(&aml::MethodCall::new(
 								                        "MSTA".into(),
 								                        vec![&self.slot_id],
 								                    ))],
 								                ),
 								                // Get details of memory
 								                &aml::Method::new(
 								                    "_CRS".into(),
 ,
 								                    false,
 								                    // Call into MCRS which provides actual memory details
 								                    vec![&aml::Return::new(&aml::MethodCall::new(
 								                        "MCRS".into(),
 								                        vec![&self.slot_id],
 								                    ))],
 								                ),
 								            ],
 								        )
 								        .to_aml_bytes()
 								    }
 								}
-												ci: Run clippy for each specific feature

The build is run against "--all-features", "pci,acpi", "pci" and "mmio"
separately. The clippy validation must be run against the same set of
features in order to validate the code is correct.

Because of these new checks, this commit includes multiple fixes
related to the errors generated when manually running the checks.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-01-21 08:32:50 +00:00
+								#[cfg(feature = "acpi")]
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								struct MemorySlots {
 								    slots: usize,
 								}
 								#[cfg(feature = "acpi")]
 								impl Aml for MemorySlots {
 								    fn to_aml_bytes(&self) -> Vec<u8> {
 								        let mut bytes = Vec::new();
 								        for slot_id in 0..self.slots {
 								            bytes.extend_from_slice(&MemorySlot { slot_id }.to_aml_bytes());
 								        }
 								        bytes
 								    }
 								}
-												ci: Run clippy for each specific feature

The build is run against "--all-features", "pci,acpi", "pci" and "mmio"
separately. The clippy validation must be run against the same set of
features in order to validate the code is correct.

Because of these new checks, this commit includes multiple fixes
related to the errors generated when manually running the checks.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-01-21 08:32:50 +00:00
+								#[cfg(feature = "acpi")]
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								struct MemoryMethods {
 								    slots: usize,
 								}
 								#[cfg(feature = "acpi")]
 								impl Aml for MemoryMethods {
 								    fn to_aml_bytes(&self) -> Vec<u8> {
 								        let mut bytes = Vec::new();
 								        // Add "MTFY" notification method
 								        let mut memory_notifies = Vec::new();
 								        for slot_id in 0..self.slots {
 								            memory_notifies.push(MemoryNotify { slot_id });
 								        }
 								        let mut memory_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
 								        for memory_notifier in memory_notifies.iter() {
 								            memory_notifies_refs.push(memory_notifier);
 								        }
 								        bytes.extend_from_slice(
 								            &aml::Method::new("MTFY".into(), 2, true, memory_notifies_refs).to_aml_bytes(),
 								        );
 								        // MSCN method
 								        bytes.extend_from_slice(
 								            &aml::Method::new(
 								                "MSCN".into(),
 ,
 								                true,
 								                vec![
 								                    // Take lock defined above
-												vmm: acpi: Fix incorrect mutex timeout value

The mutex timeout should be 0xffff rather than 0xfff to disable the
timeout feature.

dsdt.dsl    745:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    767:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    775:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 11:47:29 +00:00
+								                    &aml::Acquire::new("MLCK".into(), 0xffff),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::Store::new(&aml::Local(0), &aml::ZERO),
 								                    &aml::While::new(
 								                        &aml::LessThan::new(&aml::Local(0), &self.slots),
 								                        vec![
-												Revert "vmm: Move MemoryManager from I/O ports to MMIO region"

This reverts commit 03108fb88b0ae1afd9b6b0f65f92e4b424c89c3e.

											
										
										
											2020-01-24 10:36:39 +00:00
+								                            // Write slot number (in first argument) to I/O port via field
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                            &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Local(0)),
 								                            // Check if MINS bit is set (inserting)
 								                            &aml::If::new(
 								                                &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MINS"), &aml::ONE),
 								                                // Notify device if it is
 								                                vec![
 								                                    &aml::MethodCall::new(
 								                                        "MTFY".into(),
 								                                        vec![&aml::Local(0), &aml::ONE],
 								                                    ),
 								                                    // Reset MINS bit
 								                                    &aml::Store::new(
 								                                        &aml::Path::new("\\_SB_.MHPC.MINS"),
 								                                        &aml::ONE,
 								                                    ),
 								                                ],
 								                            ),
 								                            // Check if MRMV bit is set
 								                            &aml::If::new(
 								                                &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MRMV"), &aml::ONE),
 								                                // Notify device if it is (with the eject constant 0x3)
 								                                vec![
 								                                    &aml::MethodCall::new(
 								                                        "MTFY".into(),
 								                                        vec![&aml::Local(0), &3u8],
 								                                    ),
 								                                    // Reset MRMV bit
 								                                    &aml::Store::new(
 								                                        &aml::Path::new("\\_SB_.MHPC.MRMV"),
 								                                        &aml::ONE,
 								                                    ),
 								                                ],
 								                            ),
 								                            &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE),
 								                        ],
 								                    ),
 								                    // Release lock
 								                    &aml::Release::new("MLCK".into()),
 								                ],
 								            )
 								            .to_aml_bytes(),
 								        );
 								        bytes.extend_from_slice(
 								            // Memory status method
 								            &aml::Method::new(
 								                "MSTA".into(),
 ,
 								                true,
 								                vec![
 								                    // Take lock defined above
-												vmm: acpi: Fix incorrect mutex timeout value

The mutex timeout should be 0xffff rather than 0xfff to disable the
timeout feature.

dsdt.dsl    745:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    767:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    775:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 11:47:29 +00:00
+								                    &aml::Acquire::new("MLCK".into(), 0xffff),
-												Revert "vmm: Move MemoryManager from I/O ports to MMIO region"

This reverts commit 03108fb88b0ae1afd9b6b0f65f92e4b424c89c3e.

											
										
										
											2020-01-24 10:36:39 +00:00
+								                    // Write slot number (in first argument) to I/O port via field
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
 								                    &aml::Store::new(&aml::Local(0), &aml::ZERO),
 								                    // Check if MEN_ bit is set, if so make the local variable 0xf (see _STA for details of meaning)
 								                    &aml::If::new(
 								                        &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MEN_"), &aml::ONE),
 								                        vec![&aml::Store::new(&aml::Local(0), &0xfu8)],
 								                    ),
 								                    // Release lock
 								                    &aml::Release::new("MLCK".into()),
 								                    // Return 0 or 0xf
 								                    &aml::Return::new(&aml::Local(0)),
 								                ],
 								            )
 								            .to_aml_bytes(),
 								        );
 								        bytes.extend_from_slice(
 								            // Memory range method
 								            &aml::Method::new(
 								                "MCRS".into(),
 ,
 								                true,
 								                vec![
 								                    // Take lock defined above
-												vmm: acpi: Fix incorrect mutex timeout value

The mutex timeout should be 0xffff rather than 0xfff to disable the
timeout feature.

dsdt.dsl    745:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    767:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

dsdt.dsl    775:             Acquire (\_SB.PRES.CPLK, 0x0FFF)
Warning  3130 -                                           ^ Result is not used, possible operator timeout will be missed

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 11:47:29 +00:00
+								                    &aml::Acquire::new("MLCK".into(), 0xffff),
-												Revert "vmm: Move MemoryManager from I/O ports to MMIO region"

This reverts commit 03108fb88b0ae1afd9b6b0f65f92e4b424c89c3e.

											
										
										
											2020-01-24 10:36:39 +00:00
+								                    // Write slot number (in first argument) to I/O port via field
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
 								                    &aml::Name::new(
 								                        "MR64".into(),
 								                        &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
 								                            aml::AddressSpaceCachable::Cacheable,
 								                            true,
 x0000_0000_0000_0000u64,
 xFFFF_FFFF_FFFF_FFFEu64,
 								                        )]),
 								                    ),
-												vmm: acpi: Ensure field size address matches ResourceTag

dsdt.dsl    960:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MINL)  // _MIN: Minimum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    962:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAXL)  // _MAX: Maximum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    964:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LENL)  // _LEN: Length
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 12:12:32 +00:00
+								                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &14usize, "MINL".into()),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &18usize, "MINH".into()),
-												vmm: acpi: Ensure field size address matches ResourceTag

dsdt.dsl    960:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MINL)  // _MIN: Minimum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    962:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAXL)  // _MAX: Maximum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    964:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LENL)  // _LEN: Length
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 12:12:32 +00:00
+								                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &22usize, "MAXL".into()),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &26usize, "MAXH".into()),
-												vmm: acpi: Ensure field size address matches ResourceTag

dsdt.dsl    960:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MINL)  // _MIN: Minimum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    962:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAXL)  // _MAX: Maximum Base Address
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

dsdt.dsl    964:             CreateDWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LENL)  // _LEN: Length
Warning  3128 -                              ResourceTag larger than Field ^  (Size mismatch, Tag: 64 bits, Field: 32 bits)

Fixes: #2216

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-28 12:12:32 +00:00
+								                    &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &38usize, "LENL".into()),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &42usize, "LENH".into()),
 								                    &aml::Store::new(&aml::Path::new("MINL"), &aml::Path::new("\\_SB_.MHPC.MHBL")),
 								                    &aml::Store::new(&aml::Path::new("MINH"), &aml::Path::new("\\_SB_.MHPC.MHBH")),
 								                    &aml::Store::new(&aml::Path::new("LENL"), &aml::Path::new("\\_SB_.MHPC.MHLL")),
 								                    &aml::Store::new(&aml::Path::new("LENH"), &aml::Path::new("\\_SB_.MHPC.MHLH")),
 								                    &aml::Add::new(
 								                        &aml::Path::new("MAXL"),
 								                        &aml::Path::new("MINL"),
 								                        &aml::Path::new("LENL"),
 								                    ),
 								                    &aml::Add::new(
 								                        &aml::Path::new("MAXH"),
 								                        &aml::Path::new("MINH"),
 								                        &aml::Path::new("LENH"),
 								                    ),
-												memory_manager: Fix address range calculation in MemorySlot

The MCRS method returns a 64-bit memory range descriptor. The
calculation is supposed to be done as follows:

max = min + len - 1

However, every operand is represented not as a QWORD but as combination
of two DWORDs for high and low part. Till now, the calculation was done
this way, please see also inline comments:

max.lo = min.lo + len.lo //this may overflow, need to carry over to high
max.hi = min.hi + len.hi
max.hi = max.hi - 1 // subtraction needs to happen on the low part

This calculation has been corrected the following way:

max.lo = min.lo + len.lo
max.hi = min.hi + len.hi + (max.lo < min.lo) // check for overflow
max.lo = max.lo - 1 // subtract from low part

The relevant part from the generated ASL for the MCRS method:
```
Method (MCRS, 1, Serialized)
{
    Acquire (MLCK, 0xFFFF)
    \_SB.MHPC.MSEL = Arg0
    Name (MR64, ResourceTemplate ()
    {
	QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite,
	    0x0000000000000000, // Granularity
	    0x0000000000000000, // Range Minimum
	    0xFFFFFFFFFFFFFFFE, // Range Maximum
	    0x0000000000000000, // Translation Offset
	    0xFFFFFFFFFFFFFFFF, // Length
	    ,, _Y00, AddressRangeMemory, TypeStatic)
    })
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MINL)  // _MIN: Minimum Base Address
    CreateDWordField (MR64, 0x12, MINH)
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAXL)  // _MAX: Maximum Base Address
    CreateDWordField (MR64, 0x1A, MAXH)
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LENL)  // _LEN: Length
    CreateDWordField (MR64, 0x2A, LENH)
    MINL = \_SB.MHPC.MHBL
    MINH = \_SB.MHPC.MHBH
    LENL = \_SB.MHPC.MHLL
    LENH = \_SB.MHPC.MHLH
    MAXL = (MINL + LENL) /* \_SB_.MHPC.MCRS.LENL */
    MAXH = (MINH + LENH) /* \_SB_.MHPC.MCRS.LENH */
    If ((MAXL < MINL))
    {
	MAXH += One /* \_SB_.MHPC.MCRS.MAXH */
    }

    MAXL -= One
    Release (MLCK)
    Return (MR64) /* \_SB_.MHPC.MCRS.MR64 */
}
```

Fixes #1800.

Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>

											
										
										
											2021-04-08 18:05:38 +00:00
+								                    &aml::If::new(
 								                        &aml::LessThan::new(&aml::Path::new("MAXL"), &aml::Path::new("MINL")),
 								                        vec![&aml::Add::new(
 								                            &aml::Path::new("MAXH"),
 								                            &aml::ONE,
 								                            &aml::Path::new("MAXH"),
 								                        )],
 								                    ),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::Subtract::new(
-												memory_manager: Fix address range calculation in MemorySlot

The MCRS method returns a 64-bit memory range descriptor. The
calculation is supposed to be done as follows:

max = min + len - 1

However, every operand is represented not as a QWORD but as combination
of two DWORDs for high and low part. Till now, the calculation was done
this way, please see also inline comments:

max.lo = min.lo + len.lo //this may overflow, need to carry over to high
max.hi = min.hi + len.hi
max.hi = max.hi - 1 // subtraction needs to happen on the low part

This calculation has been corrected the following way:

max.lo = min.lo + len.lo
max.hi = min.hi + len.hi + (max.lo < min.lo) // check for overflow
max.lo = max.lo - 1 // subtract from low part

The relevant part from the generated ASL for the MCRS method:
```
Method (MCRS, 1, Serialized)
{
    Acquire (MLCK, 0xFFFF)
    \_SB.MHPC.MSEL = Arg0
    Name (MR64, ResourceTemplate ()
    {
	QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite,
	    0x0000000000000000, // Granularity
	    0x0000000000000000, // Range Minimum
	    0xFFFFFFFFFFFFFFFE, // Range Maximum
	    0x0000000000000000, // Translation Offset
	    0xFFFFFFFFFFFFFFFF, // Length
	    ,, _Y00, AddressRangeMemory, TypeStatic)
    })
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MINL)  // _MIN: Minimum Base Address
    CreateDWordField (MR64, 0x12, MINH)
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAXL)  // _MAX: Maximum Base Address
    CreateDWordField (MR64, 0x1A, MAXH)
    CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LENL)  // _LEN: Length
    CreateDWordField (MR64, 0x2A, LENH)
    MINL = \_SB.MHPC.MHBL
    MINH = \_SB.MHPC.MHBH
    LENL = \_SB.MHPC.MHLL
    LENH = \_SB.MHPC.MHLH
    MAXL = (MINL + LENL) /* \_SB_.MHPC.MCRS.LENL */
    MAXH = (MINH + LENH) /* \_SB_.MHPC.MCRS.LENH */
    If ((MAXL < MINL))
    {
	MAXH += One /* \_SB_.MHPC.MCRS.MAXH */
    }

    MAXL -= One
    Release (MLCK)
    Return (MR64) /* \_SB_.MHPC.MCRS.MR64 */
}
```

Fixes #1800.

Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>

											
										
										
											2021-04-08 18:05:38 +00:00
+								                        &aml::Path::new("MAXL"),
 								                        &aml::Path::new("MAXL"),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                        &aml::ONE,
 								                    ),
 								                    // Release lock
 								                    &aml::Release::new("MLCK".into()),
 								                    &aml::Return::new(&aml::Path::new("MR64")),
 								                ],
 								            )
 								            .to_aml_bytes(),
 								        );
 								        bytes
 								    }
 								}
 								#[cfg(feature = "acpi")]
 								impl Aml for MemoryManager {
 								    fn to_aml_bytes(&self) -> Vec<u8> {
 								        let mut bytes = Vec::new();
 								        // Memory Hotplug Controller
 								        bytes.extend_from_slice(
 								            &aml::Device::new(
 								                "_SB_.MHPC".into(),
 								                vec![
-												acpi_tables: Address Rust 1.51.0 clippy issue (upper_case_acronyms)

error: name `SDT` contains a capitalized acronym
  --> acpi_tables/src/sdt.rs:27:12
   |
27 | pub struct SDT {
   |            ^^^ help: consider making the acronym lowercase, except the initial letter: `Sdt`
   |
   = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 15:57:27 +00:00
+								                    &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
-												devices: acpi: Add UID to devices with common HID

Some OS might check for duplicates and bail out, if it can't create a
distinct mapping. According to ACPI 5.0 section 6.1.12, while _UID is
optional, it becomes required when there are multiple devices with the
same _HID.

Signed-off-by: Anatol Belski <ab@php.net>

											
										
										
											2020-08-13 21:18:17 +00:00
+								                    &aml::Name::new("_UID".into(), &"Memory Hotplug Controller"),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    // Mutex to protect concurrent access as we write to choose slot and then read back status
 								                    &aml::Mutex::new("MLCK".into(), 0),
 								                    &aml::Name::new(
 								                        "_CRS".into(),
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								                        &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
 								                            aml::AddressSpaceCachable::NotCacheable,
 								                            true,
 								                            self.acpi_address.0 as u64,
 								                            self.acpi_address.0 + MEMORY_MANAGER_ACPI_SIZE as u64 - 1,
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                        )]),
 								                    ),
-												vmm: acpi: Move MemoryManager ACPI device to an MMIO address

Migrate the MemoryManager from a fixed I/O port address to an allocated
MMIO address.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-01-20 16:12:02 +00:00
+								                    // OpRegion and Fields map MMIO range into individual field values
 								                    &aml::OpRegion::new(
 								                        "MHPR".into(),
 								                        aml::OpRegionSpace::SystemMemory,
 								                        self.acpi_address.0 as usize,
 								                        MEMORY_MANAGER_ACPI_SIZE,
 								                    ),
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								                    &aml::Field::new(
 								                        "MHPR".into(),
 								                        aml::FieldAccessType::DWord,
 								                        aml::FieldUpdateRule::Preserve,
 								                        vec![
 								                            aml::FieldEntry::Named(*b"MHBL", 32), // Base (low 4 bytes)
 								                            aml::FieldEntry::Named(*b"MHBH", 32), // Base (high 4 bytes)
 								                            aml::FieldEntry::Named(*b"MHLL", 32), // Length (low 4 bytes)
 								                            aml::FieldEntry::Named(*b"MHLH", 32), // Length (high 4 bytes)
 								                        ],
 								                    ),
 								                    &aml::Field::new(
 								                        "MHPR".into(),
 								                        aml::FieldAccessType::DWord,
 								                        aml::FieldUpdateRule::Preserve,
 								                        vec![
 								                            aml::FieldEntry::Reserved(128),
 								                            aml::FieldEntry::Named(*b"MHPX", 32), // PXM
 								                        ],
 								                    ),
 								                    &aml::Field::new(
 								                        "MHPR".into(),
 								                        aml::FieldAccessType::Byte,
 								                        aml::FieldUpdateRule::WriteAsZeroes,
 								                        vec![
 								                            aml::FieldEntry::Reserved(160),
 								                            aml::FieldEntry::Named(*b"MEN_", 1), // Enabled
 								                            aml::FieldEntry::Named(*b"MINS", 1), // Inserting
 								                            aml::FieldEntry::Named(*b"MRMV", 1), // Removing
 								                            aml::FieldEntry::Named(*b"MEJ0", 1), // Ejecting
 								                        ],
 								                    ),
 								                    &aml::Field::new(
 								                        "MHPR".into(),
 								                        aml::FieldAccessType::DWord,
 								                        aml::FieldUpdateRule::Preserve,
 								                        vec![
 								                            aml::FieldEntry::Named(*b"MSEL", 32), // Selector
 								                            aml::FieldEntry::Named(*b"MOEV", 32), // Event
 								                            aml::FieldEntry::Named(*b"MOSC", 32), // OSC
 								                        ],
 								                    ),
 								                    &MemoryMethods {
 								                        slots: self.hotplug_slots.len(),
 								                    },
 								                    &MemorySlots {
 								                        slots: self.hotplug_slots.len(),
 								                    },
 								                ],
 								            )
 								            .to_aml_bytes(),
 								        );
-												vmm: Add EPC device to ACPI tables

The SGX EPC region must be exposed through the ACPI tables so that the
guest can detect its presence. The guest only get the full range from
ACPI, as the specific EPC sections are directly described through the
CPUID of each vCPU.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 12:27:42 +00:00
+								        #[cfg(target_arch = "x86_64")]
 								        {
 								            if let Some(sgx_epc_region) = &self.sgx_epc_region {
 								                let min = sgx_epc_region.start().raw_value() as u64;
 								                let max = min + sgx_epc_region.size() as u64 - 1;
 								                // SGX EPC region
 								                bytes.extend_from_slice(
 								                    &aml::Device::new(
 								                        "_SB_.EPC_".into(),
 								                        vec![
-												acpi_tables: Address Rust 1.51.0 clippy issue (upper_case_acronyms)

error: name `SDT` contains a capitalized acronym
  --> acpi_tables/src/sdt.rs:27:12
   |
27 | pub struct SDT {
   |            ^^^ help: consider making the acronym lowercase, except the initial letter: `Sdt`
   |
   = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#upper_case_acronyms

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-25 15:57:27 +00:00
+								                            &aml::Name::new("_HID".into(), &aml::EisaName::new("INT0E0C")),
-												vmm: Add EPC device to ACPI tables

The SGX EPC region must be exposed through the ACPI tables so that the
guest can detect its presence. The guest only get the full range from
ACPI, as the specific EPC sections are directly described through the
CPUID of each vCPU.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-07-08 12:27:42 +00:00
+								                            // QWORD describing the EPC region start and size
 								                            &aml::Name::new(
 								                                "_CRS".into(),
 								                                &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
 								                                    aml::AddressSpaceCachable::NotCacheable,
 								                                    true,
 								                                    min,
 								                                    max,
 								                                )]),
 								                            ),
 								                            &aml::Method::new(
 								                                "_STA".into(),
 ,
 								                                false,
 								                                vec![&aml::Return::new(&0xfu8)],
 								                            ),
 								                        ],
 								                    )
 								                    .to_aml_bytes(),
 								                );
 								            }
 								        }
-												vmm: Add memory hotplug ACPI entries to DSDT

Generate and expose the DSDT table entries required to support memory
hotplug. The AML methods call into the MemoryManager via I/O ports
exposed as fields.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2020-01-10 16:11:32 +00:00
+								        bytes
 								    }
-												vmm: Introduce MemoryManager

The memory manager is responsible for setting up the guest memory and in
the long term will also handle addition of guest memory.

In this commit move code for creating the backing memory and populating
the allocator into the new implementation trying to make as minimal
changes to other code as possible.

Follow on commits will further reduce some of the duplicated code.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2019-12-19 15:47:36 +00:00
+								}
-												vm-migration: Define the Snapshottable and Transportable traits

A Snapshottable component can snapshot itself and
provide a MigrationSnapshot payload as a result.

A MigrationSnapshot payload is a map of component IDs to a list of
migration sections (MigrationSection). As component can be made of
several Migratable sub-components (e.g. the DeviceManager and its
device objects), a migration snapshot can be made of multiple snapshot
itself.
A snapshot is a list of migration sections, each section being a
component state snapshot. Having multiple sections allows for easier and
backward compatible migration payload extensions.

Once created, a migratable component snapshot may be transported and this
is what the Transportable trait defines, through 2 methods: send and recv.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2019-05-01 16:59:51 +00:00
 								impl Pausable for MemoryManager {}
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								#[derive(Clone, Versionize)]
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								pub struct MemoryRegion {
-												vmm: Simplify memory state to support Versionize

In order to support using Versionize for state structures it is necessary
to use simpler, primitive, data types in the state definitions used for
snapshot restore.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-23 09:55:05 +00:00
+								    content: Option<String>,
 								    start_addr: u64,
 								    size: u64,
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								}
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								#[derive(Versionize)]
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								pub struct MemoryManagerSnapshotData {
 								    memory_regions: Vec<MemoryRegion>,
 								}
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								impl VersionMapped for MemoryManagerSnapshotData {}
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								impl Snapshottable for MemoryManager {
 								    fn id(&self) -> String {
 								        MEMORY_MANAGER_SNAPSHOT_ID.to_string()
 								    }
-												vm-migration: Make snapshot() mutable

There will be some cases where the implementation of the snapshot()
function from the Snapshottable trait will require to modify some
internal data, therefore we make this possible by updating the trait
definition with snapshot(&mut self).

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-21 12:31:58 +00:00
+								    fn snapshot(&mut self) -> result::Result<Snapshot, MigratableError> {
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								        let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID);
 								        let guest_memory = self.guest_memory.memory();
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								        let mut memory_regions: Vec<MemoryRegion> = Vec::new();
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
-												virtio-devices, vmm: Deprecate "GuestMemory::with_regions(_mut)"

Function "GuestMemory::with_regions(_mut)" were mainly temporary methods
to access the regions in `GuestMemory` as the lack of iterator-based
access, and hence they are deprecated in the upstream vm-memory crate [1].

[1] https://github.com/rust-vmm/vm-memory/issues/133

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-05-24 19:23:25 +00:00
+								        for (index, region) in guest_memory.iter().enumerate() {
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								            if region.len() == 0 {
 								                return Err(MigratableError::Snapshot(anyhow!("Zero length region")));
 								            }
-												vmm: memory_manager: Replace 'backing_file' variable names

In the context of saving the memory regions content through snapshot,
using the term "backing file" brings confusion with the actual backing
file that might back the memory mapping.

To avoid such conflicting naming, the 'backing_file' field from the
MemoryRegion structure gets replaced with 'content', as this is
designating the potential file containing the memory region data.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-23 11:54:29 +00:00
+								            let mut content = Some(PathBuf::from(format!("memory-region-{}", index)));
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								            if let Some(file_offset) = region.file_offset() {
 								                if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED)
 								                    && Self::is_hardlink(file_offset.file())
 								                {
 								                    // In this very specific case, we know the memory region
 								                    // is backed by a file on the host filesystem that can be
 								                    // accessed by the user, and additionally the mapping is
 								                    // shared, which means that modifications to the content
 								                    // are written to the actual file.
 								                    // When meeting these conditions, we can skip the copy of
 								                    // the memory content for this specific region, as we can
 								                    // assume the user will have it saved through the backing
 								                    // file already.
-												vmm: memory_manager: Replace 'backing_file' variable names

In the context of saving the memory regions content through snapshot,
using the term "backing file" brings confusion with the actual backing
file that might back the memory mapping.

To avoid such conflicting naming, the 'backing_file' field from the
MemoryRegion structure gets replaced with 'content', as this is
designating the potential file containing the memory region data.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-10-23 11:54:29 +00:00
+								                    content = None;
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								                }
 								            }
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								            memory_regions.push(MemoryRegion {
-												vmm: Simplify memory state to support Versionize

In order to support using Versionize for state structures it is necessary
to use simpler, primitive, data types in the state definitions used for
snapshot restore.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-23 09:55:05 +00:00
+								                content: content.map(|p| p.to_str().unwrap().to_owned()),
 								                start_addr: region.start_addr().0,
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
+								                size: region.len(),
 								            });
-												virtio-devices, vmm: Deprecate "GuestMemory::with_regions(_mut)"

Function "GuestMemory::with_regions(_mut)" were mainly temporary methods
to access the regions in `GuestMemory` as the lack of iterator-based
access, and hence they are deprecated in the upstream vm-memory crate [1].

[1] https://github.com/rust-vmm/vm-memory/issues/133

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2021-05-24 19:23:25 +00:00
+								        }
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
-												vmm: memory_manager: Store memory regions content only when necessary

First thing, this patch introduces a new function to identify if a file
descriptor is linked to any hard link on the system. This can let the
VMM know if the file can be accessed by the user, or if the file will
be destroyed as soon as the VMM releases the file descriptor.

Based on this information, and associated with the knowledge about the
region being MAP_SHARED or not, the VMM can now decide to skip the copy
of the memory region content. If the user has access to the file from
the filesystem, and if the file has been mapped as MAP_SHARED, we can
consider the guest memory region content to be present in this file at
any point in time. That's why in this specific case, there's no need for
performing the copy of the memory region content into a dedicated file.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2020-08-20 18:05:34 +00:00
+								        // Store locally this list of regions as it will be used through the
 								        // Transportable::send() implementation. The point is to avoid the
 								        // duplication of code regarding the creation of the path for each
 								        // region. The 'snapshot' step creates the list of memory regions,
 								        // including information about the need to copy a memory region or
 								        // not. This saves the 'send' step having to go through the same
 								        // process, and instead it can directly proceed with storing the
 								        // memory region content for the regions requiring it.
 								        self.snapshot_memory_regions = memory_regions.clone();
-												vmm: Version MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-05-21 12:50:41 +00:00
+								        memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state(
-												vmm: Directly (de)serialise CpuManager, DeviceManager and MemoryManager state

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-14 15:50:29 +00:00
+								            MEMORY_MANAGER_SNAPSHOT_ID,
 								            &MemoryManagerSnapshotData { memory_regions },
 								        )?);
-												vmm: memory_manager: Implement the Snapshottable trait

In order to snapshot the content of the guest RAM, the MemoryManager
must implement the Snapshottable trait.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>

											
										
										
											2020-02-06 15:03:29 +00:00
 								        let mut memory_snapshot = self.snapshot.lock().unwrap();
 								        *memory_snapshot = Some(guest_memory);
 								        Ok(memory_manager_snapshot)
 								    }
 								}
-												vmm: memory_manager: Implement the Transportable trait

This implements the send() function of the Transportable trait, so that
the guest memory regions can be saved into one file per region.

This will need to be extended for live migration, as it will require
other transport methods and the recv() function will need to be
implemented too.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-02-25 01:38:08 +00:00
+								impl Transportable for MemoryManager {
 								    fn send(
 								        &self,
 								        _snapshot: &Snapshot,
 								        destination_url: &str,
-												vmm: MemoryManager: Cleanup the usage of std::ffi/io/result

Signed-off-by: Bo Chen <chen.bo@intel.com>

											
										
										
											2020-04-22 22:51:06 +00:00
+								    ) -> result::Result<(), MigratableError> {
-												vmm: Switch MemoryManager::send() to url_to_path()

This continues the work in cc78a597cd579f49971d379626021300196ef548

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-12 13:13:33 +00:00
+								        let vm_memory_snapshot_path = url_to_path(destination_url)?;
-												vmm: memory_manager: Implement the Transportable trait

This implements the send() function of the Transportable trait, so that
the guest memory regions can be saved into one file per region.

This will need to be extended for live migration, as it will require
other transport methods and the recv() function will need to be
implemented too.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-02-25 01:38:08 +00:00
-												vmm: Switch MemoryManager::send() to url_to_path()

This continues the work in cc78a597cd579f49971d379626021300196ef548

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-12 13:13:33 +00:00
+								        if let Some(guest_memory) = &*self.snapshot.lock().unwrap() {
 								            for region in self.snapshot_memory_regions.iter() {
 								                if let Some(content) = &region.content {
 								                    let mut memory_region_path = vm_memory_snapshot_path.clone();
 								                    memory_region_path.push(content);
 								                    // Create the snapshot file for the region
 								                    let mut memory_region_file = OpenOptions::new()
 								                        .read(true)
 								                        .write(true)
 								                        .create_new(true)
 								                        .open(memory_region_path)
 								                        .map_err(|e| MigratableError::MigrateSend(e.into()))?;
 								                    guest_memory
 								                        .write_all_to(
-												vmm: Simplify memory state to support Versionize

In order to support using Versionize for state structures it is necessary
to use simpler, primitive, data types in the state definitions used for
snapshot restore.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-04-23 09:55:05 +00:00
+								                            GuestAddress(region.start_addr),
-												vmm: Switch MemoryManager::send() to url_to_path()

This continues the work in cc78a597cd579f49971d379626021300196ef548

Signed-off-by: Rob Bradford <robert.bradford@intel.com>

											
										
										
											2021-03-12 13:13:33 +00:00
+								                            &mut memory_region_file,
 								                            region.size as usize,
 								                        )
 								                        .map_err(|e| MigratableError::MigrateSend(e.into()))?;
-												vmm: memory_manager: Implement the Transportable trait

This implements the send() function of the Transportable trait, so that
the guest memory regions can be saved into one file per region.

This will need to be extended for live migration, as it will require
other transport methods and the recv() function will need to be
implemented too.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

											
										
										
											2020-02-25 01:38:08 +00:00
+								                }
 								            }
 								        }
 								        Ok(())
 								    }
 								}
-												vmm: Refactor migration through Migratable trait

Now that Migratable provides the methods for starting, stopping and
retrieving the dirty pages, we move the existing code to these new
functions.

No functional change intended.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>

											
										
										
											2021-08-04 14:52:31 +00:00
 								impl Migratable for MemoryManager {
 								    // Start the dirty log in the hypervisor (kvm/mshv).
 								    // Also, reset the dirty bitmap logged by the vmm.
 								    // Just before we do a bulk copy we want to start/clear the dirty log so that
 								    // pages touched during our bulk copy are tracked.
 								    fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
 								        self.vm.start_dirty_log().map_err(|e| {
 								            MigratableError::MigrateSend(anyhow!("Error starting VM dirty log {}", e))
 								        })?;
 								        for r in self.guest_memory.memory().iter() {
 								            r.bitmap().reset();
 								        }
 								        Ok(())
 								    }
 								    fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
 								        self.vm.stop_dirty_log().map_err(|e| {
 								            MigratableError::MigrateSend(anyhow!("Error stopping VM dirty log {}", e))
 								        })?;
 								        Ok(())
 								    }
 								    // Generate a table for the pages that are dirty. The dirty pages are collapsed
 								    // together in the table if they are contiguous.
 								    fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
 								        let mut table = MemoryRangeTable::default();
 								        for r in &self.guest_ram_mappings {
 								            let vm_dirty_bitmap = self.vm.get_dirty_log(r.slot, r.gpa, r.size).map_err(|e| {
 								                MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
 								            })?;
 								            let vmm_dirty_bitmap = match self.guest_memory.memory().find_region(GuestAddress(r.gpa))
 								            {
 								                Some(region) => {
 								                    assert!(region.start_addr().raw_value() == r.gpa);
 								                    assert!(region.len() == r.size);
 								                    region.bitmap().get_and_reset()
 								                }
 								                None => {
 								                    return Err(MigratableError::MigrateSend(anyhow!(
 								                        "Error finding 'guest memory region' with address {:x}",
 								                        r.gpa
 								                    )))
 								                }
 								            };
 								            let dirty_bitmap: Vec<u64> = vm_dirty_bitmap
 								                .iter()
 								                .zip(vmm_dirty_bitmap.iter())
 								                .map(|(x, y)| x | y)
 								                .collect();
 								            let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa);
 								            if sub_table.regions().is_empty() {
 								                info!("Dirty Memory Range Table is empty");
 								            } else {
 								                info!("Dirty Memory Range Table:");
 								                for range in sub_table.regions() {
 								                    info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024);
 								                }
 								            }
 								            table.extend(sub_table);
 								        }
 								        Ok(table)
 								    }
 								}