1
0
mirror of https://github.com/cloud-hypervisor/cloud-hypervisor.git synced 2025-03-20 07:58:55 +00:00
Michael Zhao bf6920f774 aarch64: Optimize cpu-map creating code in FDT
The logic of determining VCPU index in creating `cpu-map` node of FDT
can be optimized.

The code is invoked when VCPU topology is specified.

Signed-off-by: Michael Zhao <michael.zhao@arm.com>
2022-02-22 09:21:00 +08:00

800 lines
31 KiB
Rust

// Copyright 2020 Arm Limited (or its affiliates). All rights reserved.
// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use crate::{NumaNodes, PciSpaceInfo};
use byteorder::{BigEndian, ByteOrder};
use std::cmp;
use std::collections::HashMap;
use std::ffi::CStr;
use std::fmt::Debug;
use std::result;
use std::str;
use super::super::DeviceType;
use super::super::GuestMemoryMmap;
use super::super::InitramfsConfig;
use super::get_fdt_addr;
use super::gic::GicDevice;
use super::layout::{
IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START,
PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
};
use vm_fdt::{FdtWriter, FdtWriterResult};
use vm_memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryRegion};
// This is a value for uniquely identifying the FDT node declaring the interrupt controller.
const GIC_PHANDLE: u32 = 1;
// This is a value for uniquely identifying the FDT node declaring the MSI controller.
const MSI_PHANDLE: u32 = 2;
// This is a value for uniquely identifying the FDT node containing the clock definition.
const CLOCK_PHANDLE: u32 = 3;
// This is a value for uniquely identifying the FDT node containing the gpio controller.
const GPIO_PHANDLE: u32 = 4;
// This is a value for virtio-iommu. Now only one virtio-iommu device is supported.
const VIRTIO_IOMMU_PHANDLE: u32 = 5;
// NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined.
// This is a value for uniquely identifying the FDT node containing the first vCPU.
// The last number of vCPU phandle depends on the number of vCPUs.
const FIRST_VCPU_PHANDLE: u32 = 6;
// Read the documentation specified when appending the root node to the FDT.
const ADDRESS_CELLS: u32 = 0x2;
const SIZE_CELLS: u32 = 0x2;
// As per kvm tool and
// https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt
// Look for "The 1st cell..."
const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
// From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17
const IRQ_TYPE_EDGE_RISING: u32 = 1;
const IRQ_TYPE_LEVEL_HI: u32 = 4;
// PMU PPI interrupt number
pub const AARCH64_PMU_IRQ: u32 = 7;
// Keys and Buttons
// System Power Down
const KEY_POWER: u32 = 116;
/// Trait for devices to be added to the Flattened Device Tree.
pub trait DeviceInfoForFdt {
/// Returns the address where this device will be loaded.
fn addr(&self) -> u64;
/// Returns the associated interrupt for this device.
fn irq(&self) -> u32;
/// Returns the amount of memory that needs to be reserved for this device.
fn length(&self) -> u64;
}
/// Errors thrown while configuring the Flattened Device Tree for aarch64.
#[derive(Debug)]
pub enum Error {
/// Failure in writing FDT in memory.
WriteFdtToMemory(GuestMemoryError),
}
type Result<T> = result::Result<T, Error>;
/// Creates the flattened device tree for this aarch64 VM.
#[allow(clippy::too_many_arguments)]
pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
guest_mem: &GuestMemoryMmap,
cmdline: &str,
vcpu_mpidr: Vec<u64>,
vcpu_topology: Option<(u8, u8, u8)>,
device_info: &HashMap<(DeviceType, String), T, S>,
gic_device: &dyn GicDevice,
initrd: &Option<InitramfsConfig>,
pci_space_info: &[PciSpaceInfo],
numa_nodes: &NumaNodes,
virtio_iommu_bdf: Option<u32>,
pmu_supported: bool,
) -> FdtWriterResult<Vec<u8>> {
// Allocate stuff necessary for the holding the blob.
let mut fdt = FdtWriter::new().unwrap();
// For an explanation why these nodes were introduced in the blob take a look at
// https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845
// Look for "Required nodes and properties".
// Header or the root node as per above mentioned documentation.
let root_node = fdt.begin_node("")?;
fdt.property_string("compatible", "linux,dummy-virt")?;
// For info on #address-cells and size-cells read "Note about cells and address representation"
// from the above mentioned txt file.
fdt.property_u32("#address-cells", ADDRESS_CELLS)?;
fdt.property_u32("#size-cells", SIZE_CELLS)?;
// This is not mandatory but we use it to point the root node to the node
// containing description of the interrupt controller for this VM.
fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?;
create_memory_node(&mut fdt, guest_mem, numa_nodes)?;
create_chosen_node(&mut fdt, cmdline, initrd)?;
create_gic_node(&mut fdt, gic_device)?;
create_timer_node(&mut fdt)?;
if pmu_supported {
create_pmu_node(&mut fdt, vcpu_mpidr.len())?;
}
create_clock_node(&mut fdt)?;
create_psci_node(&mut fdt)?;
create_devices_node(&mut fdt, device_info)?;
create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?;
if numa_nodes.len() > 1 {
create_distance_map_node(&mut fdt, numa_nodes)?;
}
// End Header node.
fdt.end_node(root_node)?;
let fdt_final = fdt.finish()?;
Ok(fdt_final)
}
pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> {
// Write FDT to memory.
let fdt_address = GuestAddress(get_fdt_addr());
guest_mem
.write_slice(fdt_final.as_slice(), fdt_address)
.map_err(Error::WriteFdtToMemory)?;
Ok(())
}
// Following are the auxiliary function for creating the different nodes that we append to our FDT.
fn create_cpu_nodes(
fdt: &mut FdtWriter,
vcpu_mpidr: &[u64],
vcpu_topology: Option<(u8, u8, u8)>,
numa_nodes: &NumaNodes,
) -> FdtWriterResult<()> {
// See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml.
let cpus_node = fdt.begin_node("cpus")?;
fdt.property_u32("#address-cells", 0x1)?;
fdt.property_u32("#size-cells", 0x0)?;
let num_cpus = vcpu_mpidr.len();
for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) {
let cpu_name = format!("cpu@{:x}", cpu_id);
let cpu_node = fdt.begin_node(&cpu_name)?;
fdt.property_string("device_type", "cpu")?;
fdt.property_string("compatible", "arm,arm-v8")?;
if num_cpus > 1 {
// This is required on armv8 64-bit. See aforementioned documentation.
fdt.property_string("enable-method", "psci")?;
}
// Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register.
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html.
fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?;
fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?;
// Add `numa-node-id` property if there is any numa config.
if numa_nodes.len() > 1 {
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) {
fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
}
}
}
fdt.end_node(cpu_node)?;
}
if let Some(topology) = vcpu_topology {
let (threads_per_core, cores_per_package, packages) = topology;
let cpu_map_node = fdt.begin_node("cpu-map")?;
// Create device tree nodes with regard of above mapping.
for cluster_idx in 0..packages {
let cluster_name = format!("cluster{:x}", cluster_idx);
let cluster_node = fdt.begin_node(&cluster_name)?;
for core_idx in 0..cores_per_package {
let core_name = format!("core{:x}", core_idx);
let core_node = fdt.begin_node(&core_name)?;
for thread_idx in 0..threads_per_core {
let thread_name = format!("thread{:x}", thread_idx);
let thread_node = fdt.begin_node(&thread_name)?;
let cpu_idx = threads_per_core * cores_per_package * cluster_idx
+ threads_per_core * core_idx
+ thread_idx;
fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?;
fdt.end_node(thread_node)?;
}
fdt.end_node(core_node)?;
}
fdt.end_node(cluster_node)?;
}
fdt.end_node(cpu_map_node)?;
} else {
debug!("Boot using device tree, CPU topology is not (correctly) specified");
}
fdt.end_node(cpus_node)?;
Ok(())
}
fn create_memory_node(
fdt: &mut FdtWriter,
guest_mem: &GuestMemoryMmap,
numa_nodes: &NumaNodes,
) -> FdtWriterResult<()> {
if numa_nodes.len() > 1 {
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
// Each memory zone of numa will have its own memory node, but
// different numa nodes should not share same memory zones.
for memory_region in numa_node.unwrap().memory_regions.iter() {
let memory_region_start_addr: u64 = memory_region.start_addr().raw_value();
let memory_region_size: u64 = memory_region.size() as u64;
let mem_reg_prop = [memory_region_start_addr, memory_region_size];
// With feature `acpi` enabled, RAM at 0-4M is for edk2 only
// and should be hidden to the guest.
#[cfg(feature = "acpi")]
if memory_region_start_addr == 0 {
continue;
}
let memory_node_name = format!("memory@{:x}", memory_region_start_addr);
let memory_node = fdt.begin_node(&memory_node_name)?;
fdt.property_string("device_type", "memory")?;
fdt.property_array_u64("reg", &mem_reg_prop)?;
fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
fdt.end_node(memory_node)?;
}
}
} else {
let mem_size = guest_mem.last_addr().raw_value() - super::layout::RAM_64BIT_START + 1;
// See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L960
// for an explanation of this.
let mem_reg_prop = [super::layout::RAM_64BIT_START as u64, mem_size as u64];
let memory_node = fdt.begin_node("memory")?;
fdt.property_string("device_type", "memory")?;
fdt.property_array_u64("reg", &mem_reg_prop)?;
fdt.end_node(memory_node)?;
}
Ok(())
}
fn create_chosen_node(
fdt: &mut FdtWriter,
cmdline: &str,
initrd: &Option<InitramfsConfig>,
) -> FdtWriterResult<()> {
let chosen_node = fdt.begin_node("chosen")?;
fdt.property_string("bootargs", cmdline)?;
if let Some(initrd_config) = initrd {
let initrd_start = initrd_config.address.raw_value() as u64;
let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64;
fdt.property_u64("linux,initrd-start", initrd_start)?;
fdt.property_u64("linux,initrd-end", initrd_end)?;
}
fdt.end_node(chosen_node)?;
Ok(())
}
fn create_gic_node(fdt: &mut FdtWriter, gic_device: &dyn GicDevice) -> FdtWriterResult<()> {
let gic_reg_prop = gic_device.device_properties();
let intc_node = fdt.begin_node("intc")?;
fdt.property_string("compatible", gic_device.fdt_compatibility())?;
fdt.property_null("interrupt-controller")?;
// "interrupt-cells" field specifies the number of cells needed to encode an
// interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description
// is required.
fdt.property_u32("#interrupt-cells", 3)?;
fdt.property_array_u64("reg", gic_reg_prop)?;
fdt.property_u32("phandle", GIC_PHANDLE)?;
fdt.property_u32("#address-cells", 2)?;
fdt.property_u32("#size-cells", 2)?;
fdt.property_null("ranges")?;
let gic_intr_prop = [
GIC_FDT_IRQ_TYPE_PPI,
gic_device.fdt_maint_irq(),
IRQ_TYPE_LEVEL_HI,
];
fdt.property_array_u32("interrupts", &gic_intr_prop)?;
if gic_device.msi_compatible() {
let msic_node = fdt.begin_node("msic")?;
fdt.property_string("compatible", gic_device.msi_compatibility())?;
fdt.property_null("msi-controller")?;
fdt.property_u32("phandle", MSI_PHANDLE)?;
let msi_reg_prop = gic_device.msi_properties();
fdt.property_array_u64("reg", msi_reg_prop)?;
fdt.end_node(msic_node)?;
}
fdt.end_node(intc_node)?;
Ok(())
}
fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
// The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture
// (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power
// consumption and reduced interface complexity.
// PCLK is the clock source and this node defines exactly the clock for the APB.
let clock_node = fdt.begin_node("apb-pclk")?;
fdt.property_string("compatible", "fixed-clock")?;
fdt.property_u32("#clock-cells", 0x0)?;
fdt.property_u32("clock-frequency", 24000000)?;
fdt.property_string("clock-output-names", "clk24mhz")?;
fdt.property_u32("phandle", CLOCK_PHANDLE)?;
fdt.end_node(clock_node)?;
Ok(())
}
fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
// See
// https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt
// These are fixed interrupt numbers for the timer device.
let irqs = [13, 14, 11, 10];
let compatible = "arm,armv8-timer";
let mut timer_reg_cells: Vec<u32> = Vec::new();
for &irq in irqs.iter() {
timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
timer_reg_cells.push(irq);
timer_reg_cells.push(IRQ_TYPE_LEVEL_HI);
}
let timer_node = fdt.begin_node("timer")?;
fdt.property_string("compatible", compatible)?;
fdt.property_null("always-on")?;
fdt.property_array_u32("interrupts", &timer_reg_cells)?;
fdt.end_node(timer_node)?;
Ok(())
}
fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
let compatible = "arm,psci-0.2";
let psci_node = fdt.begin_node("psci")?;
fdt.property_string("compatible", compatible)?;
// Two methods available: hvc and smc.
// As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC.
// So, since we are using kvm, we need to use hvc.
fdt.property_string("method", "hvc")?;
fdt.end_node(psci_node)?;
Ok(())
}
fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>(
fdt: &mut FdtWriter,
dev_info: &T,
) -> FdtWriterResult<()> {
let device_reg_prop = [dev_info.addr(), dev_info.length()];
let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING];
let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?;
fdt.property_string("compatible", "virtio,mmio")?;
fdt.property_array_u64("reg", &device_reg_prop)?;
fdt.property_array_u32("interrupts", &irq)?;
fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
fdt.end_node(virtio_node)?;
Ok(())
}
fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>(
fdt: &mut FdtWriter,
dev_info: &T,
) -> FdtWriterResult<()> {
let compatible = b"arm,pl011\0arm,primecell\0";
let serial_reg_prop = [dev_info.addr(), dev_info.length()];
let irq = [
GIC_FDT_IRQ_TYPE_SPI,
dev_info.irq() - IRQ_BASE,
IRQ_TYPE_EDGE_RISING,
];
let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?;
fdt.property("compatible", compatible)?;
fdt.property_array_u64("reg", &serial_reg_prop)?;
fdt.property_u32("clocks", CLOCK_PHANDLE)?;
fdt.property_string("clock-names", "apb_pclk")?;
fdt.property_array_u32("interrupts", &irq)?;
fdt.end_node(serial_node)?;
Ok(())
}
fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>(
fdt: &mut FdtWriter,
dev_info: &T,
) -> FdtWriterResult<()> {
let compatible = b"arm,pl031\0arm,primecell\0";
let rtc_reg_prop = [dev_info.addr(), dev_info.length()];
let irq = [
GIC_FDT_IRQ_TYPE_SPI,
dev_info.irq() - IRQ_BASE,
IRQ_TYPE_LEVEL_HI,
];
let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?;
fdt.property("compatible", compatible)?;
fdt.property_array_u64("reg", &rtc_reg_prop)?;
fdt.property_array_u32("interrupts", &irq)?;
fdt.property_u32("clocks", CLOCK_PHANDLE)?;
fdt.property_string("clock-names", "apb_pclk")?;
fdt.end_node(rtc_node)?;
Ok(())
}
fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>(
fdt: &mut FdtWriter,
dev_info: &T,
) -> FdtWriterResult<()> {
// PL061 GPIO controller node
let compatible = b"arm,pl061\0arm,primecell\0";
let gpio_reg_prop = [dev_info.addr(), dev_info.length()];
let irq = [
GIC_FDT_IRQ_TYPE_SPI,
dev_info.irq() - IRQ_BASE,
IRQ_TYPE_EDGE_RISING,
];
let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?;
fdt.property("compatible", compatible)?;
fdt.property_array_u64("reg", &gpio_reg_prop)?;
fdt.property_array_u32("interrupts", &irq)?;
fdt.property_null("gpio-controller")?;
fdt.property_u32("#gpio-cells", 2)?;
fdt.property_u32("clocks", CLOCK_PHANDLE)?;
fdt.property_string("clock-names", "apb_pclk")?;
fdt.property_u32("phandle", GPIO_PHANDLE)?;
fdt.end_node(gpio_node)?;
// gpio-keys node
let gpio_keys_node = fdt.begin_node("gpio-keys")?;
fdt.property_string("compatible", "gpio-keys")?;
fdt.property_u32("#size-cells", 0)?;
fdt.property_u32("#address-cells", 1)?;
let gpio_keys_poweroff_node = fdt.begin_node("button@1")?;
fdt.property_string("label", "GPIO Key Poweroff")?;
fdt.property_u32("linux,code", KEY_POWER)?;
let gpios = [GPIO_PHANDLE, 3, 0];
fdt.property_array_u32("gpios", &gpios)?;
fdt.end_node(gpio_keys_poweroff_node)?;
fdt.end_node(gpio_keys_node)?;
Ok(())
}
fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
fdt: &mut FdtWriter,
dev_info: &HashMap<(DeviceType, String), T, S>,
) -> FdtWriterResult<()> {
// Create one temp Vec to store all virtio devices
let mut ordered_virtio_device: Vec<&T> = Vec::new();
for ((device_type, _device_id), info) in dev_info {
match device_type {
DeviceType::Gpio => create_gpio_node(fdt, info)?,
DeviceType::Rtc => create_rtc_node(fdt, info)?,
DeviceType::Serial => create_serial_node(fdt, info)?,
DeviceType::Virtio(_) => {
ordered_virtio_device.push(info);
}
}
}
// Sort out virtio devices by address from low to high and insert them into fdt table.
ordered_virtio_device.sort_by_key(|&a| a.addr());
// Current address allocation strategy in cloud-hypervisor is: the first created device
// will be allocated to higher address. Here we reverse the vector to make sure that
// the older created device will appear in front of the newer created device in FDT.
ordered_virtio_device.reverse();
for ordered_device_info in ordered_virtio_device.drain(..) {
create_virtio_node(fdt, ordered_device_info)?;
}
Ok(())
}
fn create_pmu_node(fdt: &mut FdtWriter, cpu_nums: usize) -> FdtWriterResult<()> {
let num_cpus = cpu_nums as u64 as u32;
let compatible = "arm,armv8-pmuv3";
let cpu_mask: u32 =
(((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
let irq = [
GIC_FDT_IRQ_TYPE_PPI,
AARCH64_PMU_IRQ,
cpu_mask | IRQ_TYPE_LEVEL_HI,
];
let pmu_node = fdt.begin_node("pmu")?;
fdt.property_string("compatible", compatible)?;
fdt.property_array_u32("interrupts", &irq)?;
fdt.end_node(pmu_node)?;
Ok(())
}
fn create_pci_nodes(
fdt: &mut FdtWriter,
pci_device_info: &[PciSpaceInfo],
virtio_iommu_bdf: Option<u32>,
) -> FdtWriterResult<()> {
// Add node for PCIe controller.
// See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
// and https://elinux.org/Device_Tree_Usage.
// In multiple PCI segments setup, each PCI segment needs a PCI node.
for pci_device_info_elem in pci_device_info.iter() {
// EDK2 requires the PCIe high space above 4G address.
// The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space
// could fall bellow 4G.
// Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check.
// But the address written in ACPI is not impacted.
let (pci_device_base_64bit, pci_device_size_64bit) = if cfg!(feature = "acpi")
&& (pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE)
{
(
PCI_HIGH_BASE,
pci_device_info_elem.pci_device_space_size
- (PCI_HIGH_BASE - pci_device_info_elem.pci_device_space_start),
)
} else {
(
pci_device_info_elem.pci_device_space_start,
pci_device_info_elem.pci_device_space_size,
)
};
// There is no specific requirement of the 32bit MMIO range, and
// therefore at least we can make these ranges 4K aligned.
let pci_device_size_32bit: u64 =
MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12);
let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0
+ pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64;
let ranges = [
// io addresses. Since AArch64 will not use IO address,
// we can set the same IO address range for every segment.
0x1000000,
0_u32,
0_u32,
(MEM_PCI_IO_START.0 >> 32) as u32,
MEM_PCI_IO_START.0 as u32,
(MEM_PCI_IO_SIZE >> 32) as u32,
MEM_PCI_IO_SIZE as u32,
// mmio addresses
0x2000000, // (ss = 10: 32-bit memory space)
(pci_device_base_32bit >> 32) as u32, // PCI address
pci_device_base_32bit as u32,
(pci_device_base_32bit >> 32) as u32, // CPU address
pci_device_base_32bit as u32,
(pci_device_size_32bit >> 32) as u32, // size
pci_device_size_32bit as u32,
// device addresses
0x3000000, // (ss = 11: 64-bit memory space)
(pci_device_base_64bit >> 32) as u32, // PCI address
pci_device_base_64bit as u32,
(pci_device_base_64bit >> 32) as u32, // CPU address
pci_device_base_64bit as u32,
(pci_device_size_64bit >> 32) as u32, // size
pci_device_size_64bit as u32,
];
let bus_range = [0, 0]; // Only bus 0
let reg = [
pci_device_info_elem.mmio_config_address,
PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
];
// See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt
let msi_map = [
// rid-base: A single cell describing the first RID matched by the entry.
0x0,
// msi-controller: A single phandle to an MSI controller.
MSI_PHANDLE,
// msi-base: An msi-specifier describing the msi-specifier produced for the
// first RID matched by the entry.
(pci_device_info_elem.pci_segment_id as u32) << 8,
// length: A single cell describing how many consecutive RIDs are matched
// following the rid-base.
0x100,
];
let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address);
let pci_node = fdt.begin_node(&pci_node_name)?;
fdt.property_string("compatible", "pci-host-ecam-generic")?;
fdt.property_string("device_type", "pci")?;
fdt.property_array_u32("ranges", &ranges)?;
fdt.property_array_u32("bus-range", &bus_range)?;
fdt.property_u32(
"linux,pci-domain",
pci_device_info_elem.pci_segment_id as u32,
)?;
fdt.property_u32("#address-cells", 3)?;
fdt.property_u32("#size-cells", 2)?;
fdt.property_array_u64("reg", &reg)?;
fdt.property_u32("#interrupt-cells", 1)?;
fdt.property_null("interrupt-map")?;
fdt.property_null("interrupt-map-mask")?;
fdt.property_null("dma-coherent")?;
fdt.property_array_u32("msi-map", &msi_map)?;
fdt.property_u32("msi-parent", MSI_PHANDLE)?;
if pci_device_info_elem.pci_segment_id == 0 {
if let Some(virtio_iommu_bdf) = virtio_iommu_bdf {
// See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt
// for 'iommu-map' attribute setting.
let iommu_map = [
0_u32,
VIRTIO_IOMMU_PHANDLE,
0_u32,
virtio_iommu_bdf,
virtio_iommu_bdf + 1,
VIRTIO_IOMMU_PHANDLE,
virtio_iommu_bdf + 1,
0xffff - virtio_iommu_bdf,
];
fdt.property_array_u32("iommu-map", &iommu_map)?;
// See kernel document Documentation/devicetree/bindings/virtio/iommu.txt
// for virtio-iommu node settings.
let virtio_iommu_node_name = format!("virtio_iommu@{:x}", virtio_iommu_bdf);
let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?;
fdt.property_u32("#iommu-cells", 1)?;
fdt.property_string("compatible", "virtio,pci-iommu")?;
// 'reg' is a five-cell address encoded as
// (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the
// device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells
// should be zero.
let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32];
fdt.property_array_u32("reg", &reg)?;
fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?;
fdt.end_node(virtio_iommu_node)?;
}
}
fdt.end_node(pci_node)?;
}
Ok(())
}
fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> {
let distance_map_node = fdt.begin_node("distance-map")?;
fdt.property_string("compatible", "numa-distance-map-v1")?;
// Construct the distance matrix.
// 1. We use the word entry to describe a distance from a node to
// its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>.
// 2. Each entry represents distance from first node to second node.
// The distances are equal in either direction.
// 3. The distance from a node to self (local distance) is represented
// with value 10 and all internode distance should be represented with
// a value greater than 10.
// 4. distance-matrix should have entries in lexicographical ascending
// order of nodes.
let mut distance_matrix = Vec::new();
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 {
if numa_node_idx == dest_numa_node {
distance_matrix.push(numa_node_idx as u32);
distance_matrix.push(dest_numa_node as u32);
distance_matrix.push(10_u32);
continue;
}
distance_matrix.push(numa_node_idx as u32);
distance_matrix.push(dest_numa_node as u32);
distance_matrix.push(
*numa_node
.unwrap()
.distances
.get(&(dest_numa_node as u32))
.unwrap() as u32,
);
}
}
fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?;
fdt.end_node(distance_map_node)?;
Ok(())
}
// Parse the DTB binary and print for debugging
pub fn print_fdt(dtb: &[u8]) {
match fdt_parser::Fdt::new(dtb) {
Ok(fdt) => {
if let Some(root) = fdt.find_node("/") {
debug!("Printing the FDT:");
print_node(root, 0);
} else {
debug!("Failed to find root node in FDT for debugging.");
}
}
Err(_) => debug!("Failed to parse FDT for debugging."),
}
}
fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) {
debug!("{:indent$}{}/", "", node.name, indent = n_spaces);
for property in node.properties() {
let name = property.name;
// If the property is 'compatible', its value requires special handling.
// The u8 array could contain multiple null-terminated strings.
// We copy the original array and simply replace all 'null' characters with spaces.
let value = if name == "compatible" {
let mut compatible = vec![0u8; 256];
let handled_value = property
.value
.iter()
.map(|&c| if c == 0 { b' ' } else { c })
.collect::<Vec<_>>();
let len = cmp::min(255, handled_value.len());
compatible[..len].copy_from_slice(&handled_value[..len]);
compatible[..(len + 1)].to_vec()
} else {
property.value.to_vec()
};
let value = &value;
// Now the value can be either:
// - A null-terminated C string, or
// - Binary data
// We follow a very simple logic to present the value:
// - At first, try to convert it to CStr and print,
// - If failed, print it as u32 array.
let value_result = match CStr::from_bytes_with_nul(value) {
Ok(value_cstr) => match value_cstr.to_str() {
Ok(value_str) => Some(value_str),
Err(_e) => None,
},
Err(_e) => None,
};
if let Some(value_str) = value_result {
debug!(
"{:indent$}{} : {:#?}",
"",
name,
value_str,
indent = (n_spaces + 2)
);
} else {
let mut array = Vec::with_capacity(256);
array.resize(value.len() / 4, 0u32);
BigEndian::read_u32_into(value, &mut array);
debug!(
"{:indent$}{} : {:X?}",
"",
name,
array,
indent = (n_spaces + 2)
);
};
}
// Print children nodes if there is any
for child in node.children() {
print_node(child, n_spaces + 2);
}
}