diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs new file mode 100644 index 000000000..83371592a --- /dev/null +++ b/vmm/src/device_manager.rs @@ -0,0 +1,956 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +// + +use crate::config::ConsoleOutputMode; +use crate::vm::VmInfo; + +use devices::ioapic; +use kvm_bindings::{kvm_msi, kvm_userspace_memory_region}; +use kvm_ioctls::*; +use libc::O_TMPFILE; +use libc::{EFD_NONBLOCK, TIOCGWINSZ}; + +use net_util::Tap; +use pci::{ + InterruptDelivery, InterruptParameters, PciConfigIo, PciDevice, PciInterruptPin, PciRoot, +}; +use qcow::{self, ImageType, QcowFile}; + +use std::fs::{File, OpenOptions}; +use std::io::{self, sink, stdout}; + +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::AsRawFd; +use std::ptr::null_mut; +use std::result; +use std::sync::{Arc, Mutex, RwLock}; +use vfio::{VfioDevice, VfioPciDevice, VfioPciError}; +use vm_allocator::SystemAllocator; +use vm_memory::{Address, GuestMemoryMmap, GuestUsize}; +use vm_virtio::transport::VirtioPciDevice; +use vm_virtio::{VirtioSharedMemory, VirtioSharedMemoryList}; +use vmm_sys_util::eventfd::EventFd; + +const DEFAULT_MSIX_VEC_NUM: u16 = 2; + +// IOAPIC address range +const IOAPIC_RANGE_ADDR: u64 = 0xfec0_0000; +const IOAPIC_RANGE_SIZE: u64 = 0x20; + +/// Errors associated with device manager +#[derive(Debug)] +pub enum DeviceManagerError { + /// Cannot create EventFd. + EventFd(io::Error), + + /// Cannot open disk path + Disk(io::Error), + + /// Cannot create vhost-user-net device + CreateVhostUserNet(vm_virtio::vhost_user::Error), + + /// Cannot create virtio-blk device + CreateVirtioBlock(io::Error), + + /// Cannot create virtio-net device + CreateVirtioNet(vm_virtio::net::Error), + + /// Cannot create virtio-console device + CreateVirtioConsole(io::Error), + + /// Cannot create virtio-rng device + CreateVirtioRng(io::Error), + + /// Cannot create virtio-fs device + CreateVirtioFs(vm_virtio::vhost_user::Error), + + /// Cannot create virtio-pmem device + CreateVirtioPmem(io::Error), + + /// Failed parsing disk image format + DetectImageType(qcow::Error), + + /// Cannot open qcow disk path + QcowDeviceCreate(qcow::Error), + + /// Cannot open tap interface + OpenTap(net_util::TapError), + + /// Cannot allocate IRQ. + AllocateIrq, + + /// Cannot configure the IRQ. + Irq(io::Error), + + /// Cannot allocate PCI BARs + AllocateBars(pci::PciDeviceError), + + /// Cannot register ioevent. + RegisterIoevent(io::Error), + + /// Cannot create virtio device + VirtioDevice(vmm_sys_util::errno::Error), + + /// Cannot add PCI device + AddPciDevice(pci::PciRootError), + + /// Cannot open persistent memory file + PmemFileOpen(io::Error), + + /// Cannot set persistent memory file size + PmemFileSetLen(io::Error), + + /// Cannot find a memory range for persistent memory + PmemRangeAllocation, + + /// Cannot find a memory range for virtio-fs + FsRangeAllocation, + + /// Error creating serial output file + SerialOutputFileOpen(io::Error), + + /// Error creating console output file + ConsoleOutputFileOpen(io::Error), + + /// Cannot create a VFIO device + VfioCreate(vfio::VfioError), + + /// Cannot create a VFIO PCI device + VfioPciCreate(vfio::VfioPciError), + + /// Failed to map VFIO MMIO region. + VfioMapRegion(VfioPciError), + + /// Failed to create the KVM device. + CreateKvmDevice(io::Error), + + /// Failed to memory map. + Mmap(io::Error), + + /// Cannot add legacy device to Bus. + BusError(devices::BusError), +} +pub type DeviceManagerResult = result::Result; + +struct BusInfo<'a> { + io: &'a mut devices::Bus, + mmio: &'a mut devices::Bus, +} + +struct InterruptInfo<'a> { + msi_capable: bool, + ioapic: &'a Option>>, +} + +struct KernelIoapicIrq { + evt: EventFd, +} + +impl KernelIoapicIrq { + fn new(evt: EventFd) -> Self { + KernelIoapicIrq { evt } + } +} + +impl devices::Interrupt for KernelIoapicIrq { + fn deliver(&self) -> result::Result<(), io::Error> { + self.evt.write(1) + } +} + +struct UserIoapicIrq { + ioapic: Arc>, + irq: usize, +} + +impl UserIoapicIrq { + fn new(ioapic: Arc>, irq: usize) -> Self { + UserIoapicIrq { ioapic, irq } + } +} + +impl devices::Interrupt for UserIoapicIrq { + fn deliver(&self) -> result::Result<(), io::Error> { + self.ioapic + .lock() + .unwrap() + .service_irq(self.irq) + .map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("failed to inject IRQ #{}: {:?}", self.irq, e), + ) + }) + } +} + +pub fn get_win_size() -> (u16, u16) { + #[repr(C)] + struct WS { + rows: u16, + cols: u16, + }; + let ws: WS = WS { + rows: 0u16, + cols: 0u16, + }; + unsafe { + libc::ioctl(0, TIOCGWINSZ, &ws); + } + + (ws.cols, ws.rows) +} + +pub struct DeviceManager { + pub io_bus: devices::Bus, + pub mmio_bus: devices::Bus, + + // Serial port on 0x3f8 + pub serial: Option>>, + pub console_input: Option>, + + // i8042 device for i8042 reset + i8042: Arc>, + + #[cfg(feature = "acpi")] + // ACPI device for reboot/shutdwon + acpi_device: Arc>, + + // Shutdown (exit) and reboot (reset) control + pub exit_evt: EventFd, + pub reset_evt: EventFd, + + // IOAPIC + pub ioapic: Option>>, + + // PCI root + pci: Arc>, + + // mmap()ed region to unmap on drop + mmap_regions: Vec<(*mut libc::c_void, usize)>, +} + +impl DeviceManager { + pub fn new( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + msi_capable: bool, + userspace_ioapic: bool, + mut mem_slots: u32, + ) -> DeviceManagerResult { + let mut io_bus = devices::Bus::new(); + let mut mmio_bus = devices::Bus::new(); + + let mut buses = BusInfo { + io: &mut io_bus, + mmio: &mut mmio_bus, + }; + + let ioapic = if userspace_ioapic { + // Create IOAPIC + Some(Arc::new(Mutex::new(ioapic::Ioapic::new( + vm_info.vm_fd.clone(), + )))) + } else { + None + }; + + let interrupt_info = InterruptInfo { + msi_capable, + ioapic: &ioapic, + }; + + let serial_writer: Option> = match vm_info.vm_cfg.serial.mode { + ConsoleOutputMode::File => Some(Box::new( + File::create(vm_info.vm_cfg.serial.file.unwrap()) + .map_err(DeviceManagerError::SerialOutputFileOpen)?, + )), + ConsoleOutputMode::Tty => Some(Box::new(stdout())), + ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, + }; + let serial = if vm_info.vm_cfg.serial.mode != ConsoleOutputMode::Off { + // Serial is tied to IRQ #4 + let serial_irq = 4; + let interrupt: Box = if let Some(ioapic) = &ioapic { + Box::new(UserIoapicIrq::new(ioapic.clone(), serial_irq)) + } else { + let serial_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; + vm_info + .vm_fd + .register_irqfd(serial_evt.as_raw_fd(), serial_irq as u32) + .map_err(DeviceManagerError::Irq)?; + + Box::new(KernelIoapicIrq::new(serial_evt)) + }; + + Some(Arc::new(Mutex::new(devices::legacy::Serial::new( + interrupt, + serial_writer, + )))) + } else { + None + }; + + // Add a shutdown device (i8042) + let exit_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; + let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; + let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( + reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, + ))); + + #[cfg(feature = "acpi")] + let acpi_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( + exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, + reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, + ))); + + let pci_root = PciRoot::new(None); + let mut pci = PciConfigIo::new(pci_root); + + let console_writer: Option> = match vm_info.vm_cfg.console.mode { + ConsoleOutputMode::File => Some(Box::new( + File::create(vm_info.vm_cfg.console.file.unwrap()) + .map_err(DeviceManagerError::ConsoleOutputFileOpen)?, + )), + ConsoleOutputMode::Tty => Some(Box::new(stdout())), + ConsoleOutputMode::Null => Some(Box::new(sink())), + ConsoleOutputMode::Off => None, + }; + let (col, row) = get_win_size(); + let console = if console_writer.is_some() { + let (virtio_console_device, console_input) = + vm_virtio::Console::new(console_writer, col, row) + .map_err(DeviceManagerError::CreateVirtioConsole)?; + DeviceManager::add_virtio_pci_device( + Box::new(virtio_console_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + &mut pci, + &mut buses, + &interrupt_info, + )?; + Some(console_input) + } else { + None + }; + + let mut mmap_regions = Vec::new(); + + DeviceManager::add_virtio_devices( + vm_info, + allocator, + &mut pci, + &mut buses, + &interrupt_info, + &mut mem_slots, + &mut mmap_regions, + )?; + + DeviceManager::add_vfio_devices(vm_info, allocator, &mut pci, &mut buses, mem_slots)?; + + let pci = Arc::new(Mutex::new(pci)); + + Ok(DeviceManager { + io_bus, + mmio_bus, + serial, + console_input: console, + i8042, + #[cfg(feature = "acpi")] + acpi_device, + exit_evt, + reset_evt, + ioapic, + pci, + mmap_regions, + }) + } + + fn add_virtio_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + mut mem_slots: &mut u32, + mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, + ) -> DeviceManagerResult<()> { + // Add virtio-blk if required + DeviceManager::add_virtio_block_devices(vm_info, allocator, pci, buses, &interrupt_info)?; + + // Add virtio-net if required + DeviceManager::add_virtio_net_devices(vm_info, allocator, pci, buses, &interrupt_info)?; + + // Add virtio-rng if required + DeviceManager::add_virtio_rng_devices(vm_info, allocator, pci, buses, &interrupt_info)?; + + // Add virtio-fs if required + DeviceManager::add_virtio_fs_devices( + vm_info, + allocator, + pci, + buses, + &interrupt_info, + &mut mem_slots, + mmap_regions, + )?; + + // Add virtio-pmem if required + DeviceManager::add_virtio_pmem_devices( + vm_info, + allocator, + pci, + buses, + &interrupt_info, + &mut mem_slots, + mmap_regions, + )?; + + // Add virtio-vhost-user-net if required + DeviceManager::add_virtio_vhost_user_net_devices( + vm_info, + allocator, + pci, + buses, + &interrupt_info, + )?; + + Ok(()) + } + + fn add_virtio_block_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + ) -> DeviceManagerResult<()> { + if let Some(disk_list_cfg) = &vm_info.vm_cfg.disks { + for disk_cfg in disk_list_cfg.iter() { + // Open block device path + let raw_img: File = OpenOptions::new() + .read(true) + .write(true) + .open(disk_cfg.path) + .map_err(DeviceManagerError::Disk)?; + + let image_type = qcow::detect_image_type(&raw_img) + .map_err(DeviceManagerError::DetectImageType)?; + let block = match image_type { + ImageType::Raw => { + let raw_img = vm_virtio::RawFile::new(raw_img); + let dev = + vm_virtio::Block::new(raw_img, disk_cfg.path.to_path_buf(), false) + .map_err(DeviceManagerError::CreateVirtioBlock)?; + Box::new(dev) as Box + } + ImageType::Qcow2 => { + let qcow_img = QcowFile::from(raw_img) + .map_err(DeviceManagerError::QcowDeviceCreate)?; + let dev = + vm_virtio::Block::new(qcow_img, disk_cfg.path.to_path_buf(), false) + .map_err(DeviceManagerError::CreateVirtioBlock)?; + Box::new(dev) as Box + } + }; + + DeviceManager::add_virtio_pci_device( + block, + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + } + + Ok(()) + } + + fn add_virtio_net_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + ) -> DeviceManagerResult<()> { + // Add virtio-net if required + if let Some(net_list_cfg) = &vm_info.vm_cfg.net { + for net_cfg in net_list_cfg.iter() { + let virtio_net_device: vm_virtio::Net; + + if let Some(tap_if_name) = net_cfg.tap { + let tap = Tap::open_named(tap_if_name).map_err(DeviceManagerError::OpenTap)?; + virtio_net_device = vm_virtio::Net::new_with_tap(tap, Some(&net_cfg.mac)) + .map_err(DeviceManagerError::CreateVirtioNet)?; + } else { + virtio_net_device = + vm_virtio::Net::new(net_cfg.ip, net_cfg.mask, Some(&net_cfg.mac)) + .map_err(DeviceManagerError::CreateVirtioNet)?; + } + + DeviceManager::add_virtio_pci_device( + Box::new(virtio_net_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + } + + Ok(()) + } + + fn add_virtio_rng_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + ) -> DeviceManagerResult<()> { + // Add virtio-rng if required + if let Some(rng_path) = vm_info.vm_cfg.rng.src.to_str() { + let virtio_rng_device = + vm_virtio::Rng::new(rng_path).map_err(DeviceManagerError::CreateVirtioRng)?; + + DeviceManager::add_virtio_pci_device( + Box::new(virtio_rng_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + + Ok(()) + } + + fn add_virtio_fs_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + mem_slots: &mut u32, + mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, + ) -> DeviceManagerResult<()> { + // Add virtio-fs if required + if let Some(fs_list_cfg) = &vm_info.vm_cfg.fs { + for fs_cfg in fs_list_cfg.iter() { + if let Some(fs_sock) = fs_cfg.sock.to_str() { + let mut cache: Option<(VirtioSharedMemoryList, u64)> = None; + if let Some(fs_cache) = fs_cfg.cache_size { + // The memory needs to be 2MiB aligned in order to support + // hugepages. + let fs_guest_addr = allocator + .allocate_mmio_addresses( + None, + fs_cache as GuestUsize, + Some(0x0020_0000), + ) + .ok_or(DeviceManagerError::FsRangeAllocation)?; + + let addr = unsafe { + libc::mmap( + null_mut(), + fs_cache as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_NORESERVE | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + -1, + 0 as libc::off_t, + ) + }; + if addr == libc::MAP_FAILED { + return Err(DeviceManagerError::Mmap(io::Error::last_os_error())); + } + + mmap_regions.push((addr, fs_cache as usize)); + + let mem_region = kvm_userspace_memory_region { + slot: *mem_slots as u32, + guest_phys_addr: fs_guest_addr.raw_value(), + memory_size: fs_cache, + userspace_addr: addr as u64, + flags: 0, + }; + // Safe because the guest regions are guaranteed not to overlap. + let _ = unsafe { vm_info.vm_fd.set_user_memory_region(mem_region) }; + + // Increment the KVM slot number + *mem_slots += 1; + + let mut region_list = Vec::new(); + region_list.push(VirtioSharedMemory { + offset: 0, + len: fs_cache, + }); + cache = Some(( + VirtioSharedMemoryList { + addr: fs_guest_addr, + len: fs_cache as GuestUsize, + region_list, + }, + addr as u64, + )); + } + + let virtio_fs_device = vm_virtio::vhost_user::Fs::new( + fs_sock, + fs_cfg.tag, + fs_cfg.num_queues, + fs_cfg.queue_size, + cache, + ) + .map_err(DeviceManagerError::CreateVirtioFs)?; + + DeviceManager::add_virtio_pci_device( + Box::new(virtio_fs_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + } + } + + Ok(()) + } + + fn add_virtio_pmem_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + mem_slots: &mut u32, + mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, + ) -> DeviceManagerResult<()> { + // Add virtio-pmem if required + if let Some(pmem_list_cfg) = &vm_info.vm_cfg.pmem { + for pmem_cfg in pmem_list_cfg.iter() { + let size = pmem_cfg.size; + + // The memory needs to be 2MiB aligned in order to support + // hugepages. + let pmem_guest_addr = allocator + .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) + .ok_or(DeviceManagerError::PmemRangeAllocation)?; + + let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { + (O_TMPFILE, true) + } else { + (0, false) + }; + + let file = OpenOptions::new() + .read(true) + .write(true) + .custom_flags(custom_flags) + .open(pmem_cfg.file) + .map_err(DeviceManagerError::PmemFileOpen)?; + + if set_len { + file.set_len(size) + .map_err(DeviceManagerError::PmemFileSetLen)?; + } + + let addr = unsafe { + libc::mmap( + null_mut(), + size as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_NORESERVE | libc::MAP_SHARED, + file.as_raw_fd(), + 0 as libc::off_t, + ) + }; + + mmap_regions.push((addr, size as usize)); + + let mem_region = kvm_userspace_memory_region { + slot: *mem_slots as u32, + guest_phys_addr: pmem_guest_addr.raw_value(), + memory_size: size, + userspace_addr: addr as u64, + flags: 0, + }; + // Safe because the guest regions are guaranteed not to overlap. + let _ = unsafe { vm_info.vm_fd.set_user_memory_region(mem_region) }; + + // Increment the KVM slot number + *mem_slots += 1; + + let virtio_pmem_device = + vm_virtio::Pmem::new(file, pmem_guest_addr, size as GuestUsize) + .map_err(DeviceManagerError::CreateVirtioPmem)?; + + DeviceManager::add_virtio_pci_device( + Box::new(virtio_pmem_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + } + + Ok(()) + } + + fn add_virtio_vhost_user_net_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + ) -> DeviceManagerResult<()> { + // Add vhost-user-net if required + if let Some(vhost_user_net_list_cfg) = &vm_info.vm_cfg.vhost_user_net { + for vhost_user_net_cfg in vhost_user_net_list_cfg.iter() { + let vhost_user_net_device = vm_virtio::vhost_user::Net::new( + vhost_user_net_cfg.mac, + vhost_user_net_cfg.vu_cfg, + ) + .map_err(DeviceManagerError::CreateVhostUserNet)?; + + DeviceManager::add_virtio_pci_device( + Box::new(vhost_user_net_device), + vm_info.memory, + allocator, + vm_info.vm_fd, + pci, + buses, + &interrupt_info, + )?; + } + } + + Ok(()) + } + + fn create_kvm_device(vm: &Arc) -> DeviceManagerResult { + let mut vfio_dev = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + + vm.create_device(&mut vfio_dev) + .map_err(DeviceManagerError::CreateKvmDevice) + } + + fn add_vfio_devices( + vm_info: &VmInfo, + allocator: &mut SystemAllocator, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + mem_slots: u32, + ) -> DeviceManagerResult<()> { + let mut mem_slot = mem_slots; + if let Some(device_list_cfg) = &vm_info.vm_cfg.devices { + // Create the KVM VFIO device + let device_fd = DeviceManager::create_kvm_device(vm_info.vm_fd)?; + let device_fd = Arc::new(device_fd); + + for device_cfg in device_list_cfg.iter() { + let vfio_device = + VfioDevice::new(device_cfg.path, device_fd.clone(), vm_info.memory.clone()) + .map_err(DeviceManagerError::VfioCreate)?; + + let mut vfio_pci_device = VfioPciDevice::new(vm_info.vm_fd, allocator, vfio_device) + .map_err(DeviceManagerError::VfioPciCreate)?; + + let bars = vfio_pci_device + .allocate_bars(allocator) + .map_err(DeviceManagerError::AllocateBars)?; + + mem_slot = vfio_pci_device + .map_mmio_regions(vm_info.vm_fd, mem_slot) + .map_err(DeviceManagerError::VfioMapRegion)?; + + let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); + + pci.add_device(vfio_pci_device.clone()) + .map_err(DeviceManagerError::AddPciDevice)?; + + pci.register_mapping(vfio_pci_device.clone(), buses.io, buses.mmio, bars) + .map_err(DeviceManagerError::AddPciDevice)?; + } + } + Ok(()) + } + + fn add_virtio_pci_device( + virtio_device: Box, + memory: &Arc>, + allocator: &mut SystemAllocator, + vm_fd: &Arc, + pci: &mut PciConfigIo, + buses: &mut BusInfo, + interrupt_info: &InterruptInfo, + ) -> DeviceManagerResult<()> { + let msix_num = if interrupt_info.msi_capable { + DEFAULT_MSIX_VEC_NUM + } else { + 0 + }; + + let mut virtio_pci_device = VirtioPciDevice::new(memory.clone(), virtio_device, msix_num) + .map_err(DeviceManagerError::VirtioDevice)?; + + let bars = virtio_pci_device + .allocate_bars(allocator) + .map_err(DeviceManagerError::AllocateBars)?; + + for (event, addr, _) in virtio_pci_device.ioeventfds() { + let io_addr = IoEventAddress::Mmio(addr); + vm_fd + .register_ioevent(event.as_raw_fd(), &io_addr, NoDatamatch) + .map_err(DeviceManagerError::RegisterIoevent)?; + } + + if interrupt_info.msi_capable { + let vm_fd_clone = vm_fd.clone(); + + let msi_cb = Arc::new(Box::new(move |p: InterruptParameters| { + if let Some(entry) = p.msix { + let msi_queue = kvm_msi { + address_lo: entry.msg_addr_lo, + address_hi: entry.msg_addr_hi, + data: entry.msg_data, + flags: 0u32, + devid: 0u32, + pad: [0u8; 12], + }; + + return vm_fd_clone.signal_msi(msi_queue).map(|ret| { + if ret > 0 { + debug!("MSI message successfully delivered"); + } else if ret == 0 { + warn!("failed to deliver MSI message, blocked by guest"); + } + }); + } + + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "missing MSI-X entry", + )) + }) as InterruptDelivery); + + virtio_pci_device.assign_msix(msi_cb); + } else { + let irq_num = allocator + .allocate_irq() + .ok_or(DeviceManagerError::AllocateIrq)?; + + let irq_cb = if let Some(ioapic) = interrupt_info.ioapic { + let ioapic_clone = ioapic.clone(); + Box::new(move |_p: InterruptParameters| { + ioapic_clone + .lock() + .unwrap() + .service_irq(irq_num as usize) + .map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("failed to inject IRQ #{}: {:?}", irq_num, e), + ) + }) + }) as InterruptDelivery + } else { + let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; + vm_fd + .register_irqfd(irqfd.as_raw_fd(), irq_num) + .map_err(DeviceManagerError::Irq)?; + + Box::new(move |_p: InterruptParameters| irqfd.write(1)) as InterruptDelivery + }; + + virtio_pci_device.assign_pin_irq( + Arc::new(irq_cb), + irq_num as u32, + PciInterruptPin::IntA, + ); + } + + let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); + + pci.add_device(virtio_pci_device.clone()) + .map_err(DeviceManagerError::AddPciDevice)?; + + pci.register_mapping( + virtio_pci_device.clone(), + &mut buses.io, + &mut buses.mmio, + bars, + ) + .map_err(DeviceManagerError::AddPciDevice)?; + + Ok(()) + } + + pub fn register_devices(&mut self) -> DeviceManagerResult<()> { + if self.serial.is_some() { + // Insert serial device + self.io_bus + .insert(self.serial.as_ref().unwrap().clone(), 0x3f8, 0x8) + .map_err(DeviceManagerError::BusError)?; + } + + // Insert i8042 device + self.io_bus + .insert(self.i8042.clone(), 0x61, 0x4) + .map_err(DeviceManagerError::BusError)?; + + #[cfg(feature = "acpi")] + self.io_bus + .insert(self.acpi_device.clone(), 0x3c0, 0x4) + .map_err(DeviceManagerError::BusError)?; + + // Insert the PCI root configuration space. + self.io_bus + .insert(self.pci.clone(), 0xcf8, 0x8) + .map_err(DeviceManagerError::BusError)?; + + if let Some(ioapic) = &self.ioapic { + // Insert IOAPIC + self.mmio_bus + .insert(ioapic.clone(), IOAPIC_RANGE_ADDR, IOAPIC_RANGE_SIZE) + .map_err(DeviceManagerError::BusError)?; + } + + Ok(()) + } +} + +impl Drop for DeviceManager { + fn drop(&mut self) { + for (addr, size) in self.mmap_regions.drain(..) { + unsafe { + libc::munmap(addr, size); + } + } + } +} diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index a2706ad68..10191ed97 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -12,6 +12,7 @@ use std::fmt::{self, Display}; use std::result; pub mod config; +pub mod device_manager; pub mod vm; use self::config::VmConfig; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 0ff21a393..bf89a24fd 100755 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -24,49 +24,37 @@ extern crate vm_virtio; extern crate vmm_sys_util; use crate::config::{ConsoleOutputMode, VmConfig}; +use crate::device_manager::{get_win_size, DeviceManager, DeviceManagerError}; use arch::RegionType; use devices::ioapic; use kvm_bindings::{ - kvm_enable_cap, kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_CAP_SPLIT_IRQCHIP, + kvm_enable_cap, kvm_pit_config, kvm_userspace_memory_region, KVM_CAP_SPLIT_IRQCHIP, KVM_PIT_SPEAKER_DUMMY, }; use kvm_ioctls::*; -use libc::O_TMPFILE; -use libc::{c_void, siginfo_t, EFD_NONBLOCK, TIOCGWINSZ}; +use libc::{c_void, siginfo_t}; use linux_loader::loader::KernelLoader; -use net_util::Tap; -use pci::{ - InterruptDelivery, InterruptParameters, PciConfigIo, PciDevice, PciInterruptPin, PciRoot, -}; -use qcow::{self, ImageType, QcowFile}; use signal_hook::{iterator::Signals, SIGWINCH}; use std::ffi::CString; use std::fs::{File, OpenOptions}; -use std::io::{self, sink, stdout}; +use std::io; use std::ops::Deref; -use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::thread::JoinHandleExt; -use std::ptr::null_mut; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex, RwLock}; use std::{fmt, result, str, thread}; -use vfio::{VfioDevice, VfioPciDevice, VfioPciError}; use vm_allocator::{GsiApic, SystemAllocator}; use vm_memory::guest_memory::FileOffset; use vm_memory::{ Address, Bytes, Error as MmapError, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, GuestUsize, }; -use vm_virtio::transport::VirtioPciDevice; -use vm_virtio::{VirtioSharedMemory, VirtioSharedMemoryList}; -use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::{register_signal_handler, validate_signal_num}; use vmm_sys_util::terminal::Terminal; const VCPU_RTSIG_OFFSET: i32 = 0; const X86_64_IRQ_BASE: u32 = 5; -const DEFAULT_MSIX_VEC_NUM: u16 = 2; // CPUID feature bits const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. @@ -75,10 +63,6 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. // 64 bit direct boot entry offset for bzImage const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200; -// IOAPIC address range -const IOAPIC_RANGE_ADDR: u64 = 0xfec0_0000; -const IOAPIC_RANGE_SIZE: u64 = 0x20; - // Debug I/O port #[cfg(target_arch = "x86_64")] const DEBUG_IOPORT: u16 = 0x80; @@ -183,9 +167,6 @@ pub enum Error { /// Cannot create EventFd. EventFd(io::Error), - /// Cannot add legacy device to Bus. - BusError(devices::BusError), - /// Cannot create epoll context. EpollError(io::Error), @@ -233,98 +214,6 @@ pub enum Error { } pub type Result = result::Result; -/// Errors associated with device manager -#[derive(Debug)] -pub enum DeviceManagerError { - /// Cannot create EventFd. - EventFd(io::Error), - - /// Cannot open disk path - Disk(io::Error), - - /// Cannot create vhost-user-net device - CreateVhostUserNet(vm_virtio::vhost_user::Error), - - /// Cannot create virtio-blk device - CreateVirtioBlock(io::Error), - - /// Cannot create virtio-net device - CreateVirtioNet(vm_virtio::net::Error), - - /// Cannot create virtio-console device - CreateVirtioConsole(io::Error), - - /// Cannot create virtio-rng device - CreateVirtioRng(io::Error), - - /// Cannot create virtio-fs device - CreateVirtioFs(vm_virtio::vhost_user::Error), - - /// Cannot create virtio-pmem device - CreateVirtioPmem(io::Error), - - /// Failed parsing disk image format - DetectImageType(qcow::Error), - - /// Cannot open qcow disk path - QcowDeviceCreate(qcow::Error), - - /// Cannot open tap interface - OpenTap(net_util::TapError), - - /// Cannot allocate IRQ. - AllocateIrq, - - /// Cannot configure the IRQ. - Irq(io::Error), - - /// Cannot allocate PCI BARs - AllocateBars(pci::PciDeviceError), - - /// Cannot register ioevent. - RegisterIoevent(io::Error), - - /// Cannot create virtio device - VirtioDevice(vmm_sys_util::errno::Error), - - /// Cannot add PCI device - AddPciDevice(pci::PciRootError), - - /// Cannot open persistent memory file - PmemFileOpen(io::Error), - - /// Cannot set persistent memory file size - PmemFileSetLen(io::Error), - - /// Cannot find a memory range for persistent memory - PmemRangeAllocation, - - /// Cannot find a memory range for virtio-fs - FsRangeAllocation, - - /// Error creating serial output file - SerialOutputFileOpen(io::Error), - - /// Error creating console output file - ConsoleOutputFileOpen(io::Error), - - /// Cannot create a VFIO device - VfioCreate(vfio::VfioError), - - /// Cannot create a VFIO PCI device - VfioPciCreate(vfio::VfioPciError), - - /// Failed to map VFIO MMIO region. - VfioMapRegion(VfioPciError), - - /// Failed to create the KVM device. - CreateKvmDevice(io::Error), - - /// Failed to memory map. - Mmap(io::Error), -} -pub type DeviceManagerResult = result::Result; - #[allow(dead_code)] #[derive(Copy, Clone)] enum CpuidReg { @@ -534,824 +423,10 @@ impl Vcpu { } } -struct VmInfo<'a> { - memory: &'a Arc>, - vm_fd: &'a Arc, - vm_cfg: &'a VmConfig<'a>, -} - -struct BusInfo<'a> { - io: &'a mut devices::Bus, - mmio: &'a mut devices::Bus, -} - -struct InterruptInfo<'a> { - msi_capable: bool, - ioapic: &'a Option>>, -} - -struct KernelIoapicIrq { - evt: EventFd, -} - -impl KernelIoapicIrq { - fn new(evt: EventFd) -> Self { - KernelIoapicIrq { evt } - } -} - -impl devices::Interrupt for KernelIoapicIrq { - fn deliver(&self) -> result::Result<(), io::Error> { - self.evt.write(1) - } -} - -struct UserIoapicIrq { - ioapic: Arc>, - irq: usize, -} - -impl UserIoapicIrq { - fn new(ioapic: Arc>, irq: usize) -> Self { - UserIoapicIrq { ioapic, irq } - } -} - -pub fn get_win_size() -> (u16, u16) { - #[repr(C)] - struct WS { - rows: u16, - cols: u16, - }; - let ws: WS = WS { - rows: 0u16, - cols: 0u16, - }; - unsafe { - libc::ioctl(0, TIOCGWINSZ, &ws); - } - - (ws.cols, ws.rows) -} - -impl devices::Interrupt for UserIoapicIrq { - fn deliver(&self) -> result::Result<(), io::Error> { - self.ioapic - .lock() - .unwrap() - .service_irq(self.irq) - .map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("failed to inject IRQ #{}: {:?}", self.irq, e), - ) - }) - } -} - -struct DeviceManager { - io_bus: devices::Bus, - mmio_bus: devices::Bus, - - // Serial port on 0x3f8 - serial: Option>>, - console_input: Option>, - - // i8042 device for i8042 reset - i8042: Arc>, - - #[cfg(feature = "acpi")] - // ACPI device for reboot/shutdwon - acpi_device: Arc>, - - // Shutdown (exit) and reboot (reset) control - exit_evt: EventFd, - reset_evt: EventFd, - - // IOAPIC - ioapic: Option>>, - - // PCI root - pci: Arc>, - - // mmap()ed region to unmap on drop - mmap_regions: Vec<(*mut libc::c_void, usize)>, -} - -impl DeviceManager { - fn new( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - msi_capable: bool, - userspace_ioapic: bool, - mut mem_slots: u32, - ) -> DeviceManagerResult { - let mut io_bus = devices::Bus::new(); - let mut mmio_bus = devices::Bus::new(); - - let mut buses = BusInfo { - io: &mut io_bus, - mmio: &mut mmio_bus, - }; - - let ioapic = if userspace_ioapic { - // Create IOAPIC - Some(Arc::new(Mutex::new(ioapic::Ioapic::new( - vm_info.vm_fd.clone(), - )))) - } else { - None - }; - - let interrupt_info = InterruptInfo { - msi_capable, - ioapic: &ioapic, - }; - - let serial_writer: Option> = match vm_info.vm_cfg.serial.mode { - ConsoleOutputMode::File => Some(Box::new( - File::create(vm_info.vm_cfg.serial.file.unwrap()) - .map_err(DeviceManagerError::SerialOutputFileOpen)?, - )), - ConsoleOutputMode::Tty => Some(Box::new(stdout())), - ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, - }; - let serial = if vm_info.vm_cfg.serial.mode != ConsoleOutputMode::Off { - // Serial is tied to IRQ #4 - let serial_irq = 4; - let interrupt: Box = if let Some(ioapic) = &ioapic { - Box::new(UserIoapicIrq::new(ioapic.clone(), serial_irq)) - } else { - let serial_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; - vm_info - .vm_fd - .register_irqfd(serial_evt.as_raw_fd(), serial_irq as u32) - .map_err(DeviceManagerError::Irq)?; - - Box::new(KernelIoapicIrq::new(serial_evt)) - }; - - Some(Arc::new(Mutex::new(devices::legacy::Serial::new( - interrupt, - serial_writer, - )))) - } else { - None - }; - - // Add a shutdown device (i8042) - let exit_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; - let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; - let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( - reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, - ))); - - #[cfg(feature = "acpi")] - let acpi_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( - exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, - reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, - ))); - - let pci_root = PciRoot::new(None); - let mut pci = PciConfigIo::new(pci_root); - - let console_writer: Option> = match vm_info.vm_cfg.console.mode { - ConsoleOutputMode::File => Some(Box::new( - File::create(vm_info.vm_cfg.console.file.unwrap()) - .map_err(DeviceManagerError::ConsoleOutputFileOpen)?, - )), - ConsoleOutputMode::Tty => Some(Box::new(stdout())), - ConsoleOutputMode::Null => Some(Box::new(sink())), - ConsoleOutputMode::Off => None, - }; - let (col, row) = get_win_size(); - let console = if console_writer.is_some() { - let (virtio_console_device, console_input) = - vm_virtio::Console::new(console_writer, col, row) - .map_err(DeviceManagerError::CreateVirtioConsole)?; - DeviceManager::add_virtio_pci_device( - Box::new(virtio_console_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - &mut pci, - &mut buses, - &interrupt_info, - )?; - Some(console_input) - } else { - None - }; - - let mut mmap_regions = Vec::new(); - - DeviceManager::add_virtio_devices( - vm_info, - allocator, - &mut pci, - &mut buses, - &interrupt_info, - &mut mem_slots, - &mut mmap_regions, - )?; - - DeviceManager::add_vfio_devices(vm_info, allocator, &mut pci, &mut buses, mem_slots)?; - - let pci = Arc::new(Mutex::new(pci)); - - Ok(DeviceManager { - io_bus, - mmio_bus, - serial, - console_input: console, - i8042, - #[cfg(feature = "acpi")] - acpi_device, - exit_evt, - reset_evt, - ioapic, - pci, - mmap_regions, - }) - } - - fn add_virtio_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - mut mem_slots: &mut u32, - mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, - ) -> DeviceManagerResult<()> { - // Add virtio-blk if required - DeviceManager::add_virtio_block_devices(vm_info, allocator, pci, buses, &interrupt_info)?; - - // Add virtio-net if required - DeviceManager::add_virtio_net_devices(vm_info, allocator, pci, buses, &interrupt_info)?; - - // Add virtio-rng if required - DeviceManager::add_virtio_rng_devices(vm_info, allocator, pci, buses, &interrupt_info)?; - - // Add virtio-fs if required - DeviceManager::add_virtio_fs_devices( - vm_info, - allocator, - pci, - buses, - &interrupt_info, - &mut mem_slots, - mmap_regions, - )?; - - // Add virtio-pmem if required - DeviceManager::add_virtio_pmem_devices( - vm_info, - allocator, - pci, - buses, - &interrupt_info, - &mut mem_slots, - mmap_regions, - )?; - - // Add virtio-vhost-user-net if required - DeviceManager::add_virtio_vhost_user_net_devices( - vm_info, - allocator, - pci, - buses, - &interrupt_info, - )?; - - Ok(()) - } - - fn add_virtio_block_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - ) -> DeviceManagerResult<()> { - if let Some(disk_list_cfg) = &vm_info.vm_cfg.disks { - for disk_cfg in disk_list_cfg.iter() { - // Open block device path - let raw_img: File = OpenOptions::new() - .read(true) - .write(true) - .open(disk_cfg.path) - .map_err(DeviceManagerError::Disk)?; - - let image_type = qcow::detect_image_type(&raw_img) - .map_err(DeviceManagerError::DetectImageType)?; - let block = match image_type { - ImageType::Raw => { - let raw_img = vm_virtio::RawFile::new(raw_img); - let dev = - vm_virtio::Block::new(raw_img, disk_cfg.path.to_path_buf(), false) - .map_err(DeviceManagerError::CreateVirtioBlock)?; - Box::new(dev) as Box - } - ImageType::Qcow2 => { - let qcow_img = QcowFile::from(raw_img) - .map_err(DeviceManagerError::QcowDeviceCreate)?; - let dev = - vm_virtio::Block::new(qcow_img, disk_cfg.path.to_path_buf(), false) - .map_err(DeviceManagerError::CreateVirtioBlock)?; - Box::new(dev) as Box - } - }; - - DeviceManager::add_virtio_pci_device( - block, - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - } - - Ok(()) - } - - fn add_virtio_net_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - ) -> DeviceManagerResult<()> { - // Add virtio-net if required - if let Some(net_list_cfg) = &vm_info.vm_cfg.net { - for net_cfg in net_list_cfg.iter() { - let virtio_net_device: vm_virtio::Net; - - if let Some(tap_if_name) = net_cfg.tap { - let tap = Tap::open_named(tap_if_name).map_err(DeviceManagerError::OpenTap)?; - virtio_net_device = vm_virtio::Net::new_with_tap(tap, Some(&net_cfg.mac)) - .map_err(DeviceManagerError::CreateVirtioNet)?; - } else { - virtio_net_device = - vm_virtio::Net::new(net_cfg.ip, net_cfg.mask, Some(&net_cfg.mac)) - .map_err(DeviceManagerError::CreateVirtioNet)?; - } - - DeviceManager::add_virtio_pci_device( - Box::new(virtio_net_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - } - - Ok(()) - } - - fn add_virtio_rng_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - ) -> DeviceManagerResult<()> { - // Add virtio-rng if required - if let Some(rng_path) = vm_info.vm_cfg.rng.src.to_str() { - let virtio_rng_device = - vm_virtio::Rng::new(rng_path).map_err(DeviceManagerError::CreateVirtioRng)?; - - DeviceManager::add_virtio_pci_device( - Box::new(virtio_rng_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - - Ok(()) - } - - fn add_virtio_fs_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - mem_slots: &mut u32, - mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, - ) -> DeviceManagerResult<()> { - // Add virtio-fs if required - if let Some(fs_list_cfg) = &vm_info.vm_cfg.fs { - for fs_cfg in fs_list_cfg.iter() { - if let Some(fs_sock) = fs_cfg.sock.to_str() { - let mut cache: Option<(VirtioSharedMemoryList, u64)> = None; - if let Some(fs_cache) = fs_cfg.cache_size { - // The memory needs to be 2MiB aligned in order to support - // hugepages. - let fs_guest_addr = allocator - .allocate_mmio_addresses( - None, - fs_cache as GuestUsize, - Some(0x0020_0000), - ) - .ok_or(DeviceManagerError::FsRangeAllocation)?; - - let addr = unsafe { - libc::mmap( - null_mut(), - fs_cache as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_NORESERVE | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, - -1, - 0 as libc::off_t, - ) - }; - if addr == libc::MAP_FAILED { - return Err(DeviceManagerError::Mmap(io::Error::last_os_error())); - } - mmap_regions.push((addr, fs_cache as usize)); - - let mem_region = kvm_userspace_memory_region { - slot: *mem_slots as u32, - guest_phys_addr: fs_guest_addr.raw_value(), - memory_size: fs_cache, - userspace_addr: addr as u64, - flags: 0, - }; - // Safe because the guest regions are guaranteed not to overlap. - let _ = unsafe { vm_info.vm_fd.set_user_memory_region(mem_region) }; - - // Increment the KVM slot number - *mem_slots += 1; - - let mut region_list = Vec::new(); - region_list.push(VirtioSharedMemory { - offset: 0, - len: fs_cache, - }); - cache = Some(( - VirtioSharedMemoryList { - addr: fs_guest_addr, - len: fs_cache as GuestUsize, - region_list, - }, - addr as u64, - )); - } - - let virtio_fs_device = vm_virtio::vhost_user::Fs::new( - fs_sock, - fs_cfg.tag, - fs_cfg.num_queues, - fs_cfg.queue_size, - cache, - ) - .map_err(DeviceManagerError::CreateVirtioFs)?; - - DeviceManager::add_virtio_pci_device( - Box::new(virtio_fs_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - } - } - - Ok(()) - } - - fn add_virtio_pmem_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - mem_slots: &mut u32, - mmap_regions: &mut Vec<(*mut libc::c_void, usize)>, - ) -> DeviceManagerResult<()> { - // Add virtio-pmem if required - if let Some(pmem_list_cfg) = &vm_info.vm_cfg.pmem { - for pmem_cfg in pmem_list_cfg.iter() { - let size = pmem_cfg.size; - - // The memory needs to be 2MiB aligned in order to support - // hugepages. - let pmem_guest_addr = allocator - .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) - .ok_or(DeviceManagerError::PmemRangeAllocation)?; - - let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { - (O_TMPFILE, true) - } else { - (0, false) - }; - - let file = OpenOptions::new() - .read(true) - .write(true) - .custom_flags(custom_flags) - .open(pmem_cfg.file) - .map_err(DeviceManagerError::PmemFileOpen)?; - - if set_len { - file.set_len(size) - .map_err(DeviceManagerError::PmemFileSetLen)?; - } - - let addr = unsafe { - libc::mmap( - null_mut(), - size as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_NORESERVE | libc::MAP_SHARED, - file.as_raw_fd(), - 0 as libc::off_t, - ) - }; - - mmap_regions.push((addr, size as usize)); - - let mem_region = kvm_userspace_memory_region { - slot: *mem_slots as u32, - guest_phys_addr: pmem_guest_addr.raw_value(), - memory_size: size, - userspace_addr: addr as u64, - flags: 0, - }; - // Safe because the guest regions are guaranteed not to overlap. - let _ = unsafe { vm_info.vm_fd.set_user_memory_region(mem_region) }; - - // Increment the KVM slot number - *mem_slots += 1; - - let virtio_pmem_device = - vm_virtio::Pmem::new(file, pmem_guest_addr, size as GuestUsize) - .map_err(DeviceManagerError::CreateVirtioPmem)?; - - DeviceManager::add_virtio_pci_device( - Box::new(virtio_pmem_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - } - - Ok(()) - } - - fn add_virtio_vhost_user_net_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - ) -> DeviceManagerResult<()> { - // Add vhost-user-net if required - if let Some(vhost_user_net_list_cfg) = &vm_info.vm_cfg.vhost_user_net { - for vhost_user_net_cfg in vhost_user_net_list_cfg.iter() { - let vhost_user_net_device = vm_virtio::vhost_user::Net::new( - vhost_user_net_cfg.mac, - vhost_user_net_cfg.vu_cfg, - ) - .map_err(DeviceManagerError::CreateVhostUserNet)?; - - DeviceManager::add_virtio_pci_device( - Box::new(vhost_user_net_device), - vm_info.memory, - allocator, - vm_info.vm_fd, - pci, - buses, - &interrupt_info, - )?; - } - } - - Ok(()) - } - - fn create_kvm_device(vm: &Arc) -> DeviceManagerResult { - let mut vfio_dev = kvm_bindings::kvm_create_device { - type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO, - fd: 0, - flags: 0, - }; - - vm.create_device(&mut vfio_dev) - .map_err(DeviceManagerError::CreateKvmDevice) - } - - fn add_vfio_devices( - vm_info: &VmInfo, - allocator: &mut SystemAllocator, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - mem_slots: u32, - ) -> DeviceManagerResult<()> { - let mut mem_slot = mem_slots; - if let Some(device_list_cfg) = &vm_info.vm_cfg.devices { - // Create the KVM VFIO device - let device_fd = DeviceManager::create_kvm_device(vm_info.vm_fd)?; - let device_fd = Arc::new(device_fd); - - for device_cfg in device_list_cfg.iter() { - let vfio_device = - VfioDevice::new(device_cfg.path, device_fd.clone(), vm_info.memory.clone()) - .map_err(DeviceManagerError::VfioCreate)?; - - let mut vfio_pci_device = VfioPciDevice::new(vm_info.vm_fd, allocator, vfio_device) - .map_err(DeviceManagerError::VfioPciCreate)?; - - let bars = vfio_pci_device - .allocate_bars(allocator) - .map_err(DeviceManagerError::AllocateBars)?; - - mem_slot = vfio_pci_device - .map_mmio_regions(vm_info.vm_fd, mem_slot) - .map_err(DeviceManagerError::VfioMapRegion)?; - - let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); - - pci.add_device(vfio_pci_device.clone()) - .map_err(DeviceManagerError::AddPciDevice)?; - - pci.register_mapping(vfio_pci_device.clone(), buses.io, buses.mmio, bars) - .map_err(DeviceManagerError::AddPciDevice)?; - } - } - Ok(()) - } - - fn add_virtio_pci_device( - virtio_device: Box, - memory: &Arc>, - allocator: &mut SystemAllocator, - vm_fd: &Arc, - pci: &mut PciConfigIo, - buses: &mut BusInfo, - interrupt_info: &InterruptInfo, - ) -> DeviceManagerResult<()> { - let msix_num = if interrupt_info.msi_capable { - DEFAULT_MSIX_VEC_NUM - } else { - 0 - }; - - let mut virtio_pci_device = VirtioPciDevice::new(memory.clone(), virtio_device, msix_num) - .map_err(DeviceManagerError::VirtioDevice)?; - - let bars = virtio_pci_device - .allocate_bars(allocator) - .map_err(DeviceManagerError::AllocateBars)?; - - for (event, addr, _) in virtio_pci_device.ioeventfds() { - let io_addr = IoEventAddress::Mmio(addr); - vm_fd - .register_ioevent(event.as_raw_fd(), &io_addr, NoDatamatch) - .map_err(DeviceManagerError::RegisterIoevent)?; - } - - if interrupt_info.msi_capable { - let vm_fd_clone = vm_fd.clone(); - - let msi_cb = Arc::new(Box::new(move |p: InterruptParameters| { - if let Some(entry) = p.msix { - let msi_queue = kvm_msi { - address_lo: entry.msg_addr_lo, - address_hi: entry.msg_addr_hi, - data: entry.msg_data, - flags: 0u32, - devid: 0u32, - pad: [0u8; 12], - }; - - return vm_fd_clone.signal_msi(msi_queue).map(|ret| { - if ret > 0 { - debug!("MSI message successfully delivered"); - } else if ret == 0 { - warn!("failed to deliver MSI message, blocked by guest"); - } - }); - } - - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "missing MSI-X entry", - )) - }) as InterruptDelivery); - - virtio_pci_device.assign_msix(msi_cb); - } else { - let irq_num = allocator - .allocate_irq() - .ok_or(DeviceManagerError::AllocateIrq)?; - - let irq_cb = if let Some(ioapic) = interrupt_info.ioapic { - let ioapic_clone = ioapic.clone(); - Box::new(move |_p: InterruptParameters| { - ioapic_clone - .lock() - .unwrap() - .service_irq(irq_num as usize) - .map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("failed to inject IRQ #{}: {:?}", irq_num, e), - ) - }) - }) as InterruptDelivery - } else { - let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; - vm_fd - .register_irqfd(irqfd.as_raw_fd(), irq_num) - .map_err(DeviceManagerError::Irq)?; - - Box::new(move |_p: InterruptParameters| irqfd.write(1)) as InterruptDelivery - }; - - virtio_pci_device.assign_pin_irq( - Arc::new(irq_cb), - irq_num as u32, - PciInterruptPin::IntA, - ); - } - - let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); - - pci.add_device(virtio_pci_device.clone()) - .map_err(DeviceManagerError::AddPciDevice)?; - - pci.register_mapping( - virtio_pci_device.clone(), - &mut buses.io, - &mut buses.mmio, - bars, - ) - .map_err(DeviceManagerError::AddPciDevice)?; - - Ok(()) - } - - pub fn register_devices(&mut self) -> Result<()> { - if self.serial.is_some() { - // Insert serial device - self.io_bus - .insert(self.serial.as_ref().unwrap().clone(), 0x3f8, 0x8) - .map_err(Error::BusError)?; - } - - // Insert i8042 device - self.io_bus - .insert(self.i8042.clone(), 0x61, 0x4) - .map_err(Error::BusError)?; - - #[cfg(feature = "acpi")] - self.io_bus - .insert(self.acpi_device.clone(), 0x3c0, 0x4) - .map_err(Error::BusError)?; - - // Insert the PCI root configuration space. - self.io_bus - .insert(self.pci.clone(), 0xcf8, 0x8) - .map_err(Error::BusError)?; - - if let Some(ioapic) = &self.ioapic { - // Insert IOAPIC - self.mmio_bus - .insert(ioapic.clone(), IOAPIC_RANGE_ADDR, IOAPIC_RANGE_SIZE) - .map_err(Error::BusError)?; - } - - Ok(()) - } -} - -impl Drop for DeviceManager { - fn drop(&mut self) { - for (addr, size) in self.mmap_regions.drain(..) { - unsafe { - libc::munmap(addr, size); - } - } - } +pub struct VmInfo<'a> { + pub memory: &'a Arc>, + pub vm_fd: &'a Arc, + pub vm_cfg: &'a VmConfig<'a>, } #[derive(Debug, Clone, Copy, PartialEq)] @@ -1871,7 +946,9 @@ impl<'a> Vm<'a> { } pub fn start(&mut self, entry_addr: GuestAddress) -> Result { - self.devices.register_devices()?; + self.devices + .register_devices() + .map_err(Error::DeviceManager)?; let vcpu_count = u8::from(&self.config.cpus);