mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-21 19:02:30 +00:00
arch: x86_64: Add 5th level of paging when needed
For correctness, when the CPUID supports the LA57 feature, the VMM sets the CR4.LA57 register, which means a fifth level of page table might be needed. Even if it's not needed because the kernel should not use addresses over 1GiB, it's better to define this new level anyway. This patch only applies to the Linux boot codepath, which means it affects both vmlinux without PVH and bzImage binaries. The bzImage does not need this since the page tables and CR4 registers are set in the decompression code from the kernel. And for vmlinux with PVH, if we follow the PVH specification, the kernel must be responsible for setting things up, but the implementation is missing. This means for now that PVH does not support LA57 with 5 levels of paging. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
parent
abd6204d27
commit
bf37ebdcb6
@ -47,9 +47,10 @@ pub const BOOT_STACK_START: GuestAddress = GuestAddress(0x8000);
|
||||
pub const BOOT_STACK_POINTER: GuestAddress = GuestAddress(0x8ff0);
|
||||
|
||||
// Initial pagetables.
|
||||
pub const PML4_START: GuestAddress = GuestAddress(0x9000);
|
||||
pub const PDPTE_START: GuestAddress = GuestAddress(0xa000);
|
||||
pub const PDE_START: GuestAddress = GuestAddress(0xb000);
|
||||
pub const PML5_START: GuestAddress = GuestAddress(0x9000);
|
||||
pub const PML4_START: GuestAddress = GuestAddress(0xa000);
|
||||
pub const PDPTE_START: GuestAddress = GuestAddress(0xb000);
|
||||
pub const PDE_START: GuestAddress = GuestAddress(0xc000);
|
||||
|
||||
/// Kernel command line start address.
|
||||
pub const CMDLINE_START: GuestAddress = GuestAddress(0x20000);
|
||||
|
@ -14,7 +14,9 @@ use super::BootProtocol;
|
||||
use arch_gen::x86::msr_index;
|
||||
use kvm_bindings::{kvm_fpu, kvm_msr_entry, kvm_regs, kvm_sregs, Msrs};
|
||||
use kvm_ioctls::VcpuFd;
|
||||
use layout::{BOOT_GDT_START, BOOT_IDT_START, PDE_START, PDPTE_START, PML4_START, PVH_INFO_START};
|
||||
use layout::{
|
||||
BOOT_GDT_START, BOOT_IDT_START, PDE_START, PDPTE_START, PML4_START, PML5_START, PVH_INFO_START,
|
||||
};
|
||||
use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryMmap};
|
||||
|
||||
// MTRR constants
|
||||
@ -45,6 +47,8 @@ pub enum Error {
|
||||
WritePDEAddress(GuestMemoryError),
|
||||
/// Writing PML4 to RAM failed.
|
||||
WritePML4Address(GuestMemoryError),
|
||||
/// Writing PML5 to RAM failed.
|
||||
WritePML5Address(GuestMemoryError),
|
||||
}
|
||||
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
@ -221,7 +225,18 @@ fn configure_segments_and_sregs(
|
||||
}
|
||||
|
||||
fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()> {
|
||||
// Puts PML4 right after zero page but aligned to 4k.
|
||||
// Puts PML5 or PML4 right after zero page but aligned to 4k.
|
||||
|
||||
if unsafe { std::arch::x86_64::__cpuid(7).ecx } & (1 << 16) != 0 {
|
||||
// Entry covering VA [0..256TB)
|
||||
mem.write_obj(PML4_START.raw_value() | 0x03, PML5_START)
|
||||
.map_err(Error::WritePML5Address)?;
|
||||
|
||||
sregs.cr3 = PML5_START.raw_value();
|
||||
sregs.cr4 |= X86_CR4_LA57;
|
||||
} else {
|
||||
sregs.cr3 = PML4_START.raw_value();
|
||||
}
|
||||
|
||||
// Entry covering VA [0..512GB)
|
||||
mem.write_obj(PDPTE_START.raw_value() | 0x03, PML4_START)
|
||||
@ -230,6 +245,7 @@ fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()>
|
||||
// Entry covering VA [0..1GB)
|
||||
mem.write_obj(PDE_START.raw_value() | 0x03, PDPTE_START)
|
||||
.map_err(Error::WritePDPTEAddress)?;
|
||||
|
||||
// 512 2MB entries together covering VA [0..1GB). Note we are assuming
|
||||
// CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
|
||||
for i in 0..512 {
|
||||
@ -237,14 +253,9 @@ fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()>
|
||||
.map_err(Error::WritePDEAddress)?;
|
||||
}
|
||||
|
||||
sregs.cr3 = PML4_START.raw_value();
|
||||
sregs.cr4 |= X86_CR4_PAE;
|
||||
sregs.cr0 |= X86_CR0_PG;
|
||||
|
||||
if unsafe { std::arch::x86_64::__cpuid(7).ecx } & (1 << 16) != 0 {
|
||||
sregs.cr4 |= X86_CR4_LA57;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -374,8 +385,11 @@ mod tests {
|
||||
let gm = create_guest_mem();
|
||||
setup_page_tables(&gm, &mut sregs).unwrap();
|
||||
|
||||
assert_eq!(0xa003, read_u64(&gm, PML4_START));
|
||||
assert_eq!(0xb003, read_u64(&gm, PDPTE_START));
|
||||
if unsafe { std::arch::x86_64::__cpuid(7).ecx } & (1 << 16) != 0 {
|
||||
assert_eq!(0xa003, read_u64(&gm, PML5_START));
|
||||
}
|
||||
assert_eq!(0xb003, read_u64(&gm, PML4_START));
|
||||
assert_eq!(0xc003, read_u64(&gm, PDPTE_START));
|
||||
for i in 0..512 {
|
||||
assert_eq!(
|
||||
(i << 21) + 0x83u64,
|
||||
@ -383,7 +397,11 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(PML4_START.raw_value(), sregs.cr3);
|
||||
if unsafe { std::arch::x86_64::__cpuid(7).ecx } & (1 << 16) != 0 {
|
||||
assert_eq!(PML5_START.raw_value(), sregs.cr3);
|
||||
} else {
|
||||
assert_eq!(PML4_START.raw_value(), sregs.cr3);
|
||||
}
|
||||
assert_eq!(X86_CR4_PAE, sregs.cr4);
|
||||
assert_eq!(X86_CR0_PG, sregs.cr0);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user