cloud-hypervisor/arch/src/x86_64/regs.rs

458 lines
15 KiB
Rust
Raw Normal View History

// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE-BSD-3-Clause file.
use std::{io, mem, result};
use super::gdt::{gdt_entry, kvm_segment_from_gdt};
use arch_gen::x86::msr_index;
use kvm_bindings::{kvm_fpu, kvm_msr_entry, kvm_msrs, kvm_regs, kvm_sregs};
use kvm_ioctls::VcpuFd;
use layout::{PDE_START, PDPTE_START, PML4_START};
use vm_memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap};
// MTRR constants
const MTRR_ENABLE: u64 = 0x800; // IA32_MTRR_DEF_TYPE MSR: E (MTRRs enabled) flag, bit 11
const MTRR_MEM_TYPE_WB: u64 = 0x6;
#[derive(Debug)]
pub enum Error {
/// Failed to get SREGs for this CPU.
GetStatusRegisters(io::Error),
/// Failed to set base registers for this CPU.
SetBaseRegisters(io::Error),
/// Failed to configure the FPU.
SetFPURegisters(io::Error),
/// Setting up MSRs failed.
SetModelSpecificRegisters(io::Error),
/// Failed to set SREGs for this CPU.
SetStatusRegisters(io::Error),
/// Writing the GDT to RAM failed.
WriteGDT,
/// Writing the IDT to RAM failed.
WriteIDT,
/// Writing PDPTE to RAM failed.
WritePDPTEAddress,
/// Writing PDE to RAM failed.
WritePDEAddress,
/// Writing PML4 to RAM failed.
WritePML4Address,
}
pub type Result<T> = result::Result<T, Error>;
/// Configure Floating-Point Unit (FPU) registers for a given CPU.
///
/// # Arguments
///
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
pub fn setup_fpu(vcpu: &VcpuFd) -> Result<()> {
let fpu: kvm_fpu = kvm_fpu {
fcw: 0x37f,
mxcsr: 0x1f80,
..Default::default()
};
vcpu.set_fpu(&fpu).map_err(Error::SetFPURegisters)
}
/// Configure Model Specific Registers (MSRs) for a given CPU.
///
/// # Arguments
///
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
pub fn setup_msrs(vcpu: &VcpuFd) -> Result<()> {
let entry_vec = create_msr_entries();
let vec_size_bytes =
mem::size_of::<kvm_msrs>() + (entry_vec.len() * mem::size_of::<kvm_msr_entry>());
let vec: Vec<u8> = Vec::with_capacity(vec_size_bytes);
let msrs: &mut kvm_msrs = unsafe {
// Converting the vector's memory to a struct is unsafe. Carefully using the read-only
// vector to size and set the members ensures no out-of-bounds errors below.
&mut *(vec.as_ptr() as *mut kvm_msrs)
};
unsafe {
// Mapping the unsized array to a slice is unsafe because the length isn't known.
// Providing the length used to create the struct guarantees the entire slice is valid.
let entries: &mut [kvm_msr_entry] = msrs.entries.as_mut_slice(entry_vec.len());
entries.copy_from_slice(&entry_vec);
}
msrs.nmsrs = entry_vec.len() as u32;
vcpu.set_msrs(msrs)
.map_err(Error::SetModelSpecificRegisters)
}
/// Configure base registers for a given CPU.
///
/// # Arguments
///
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
/// * `boot_ip` - Starting instruction pointer.
/// * `boot_sp` - Starting stack pointer.
/// * `boot_si` - Must point to zero page address per Linux ABI.
pub fn setup_regs(vcpu: &VcpuFd, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()> {
let regs: kvm_regs = kvm_regs {
rflags: 0x0000000000000002u64,
rip: boot_ip,
rsp: boot_sp,
rbp: boot_sp,
rsi: boot_si,
..Default::default()
};
vcpu.set_regs(&regs).map_err(Error::SetBaseRegisters)
}
/// Configures the segment registers and system page tables for a given CPU.
///
/// # Arguments
///
/// * `mem` - The memory that will be passed to the guest.
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
pub fn setup_sregs(mem: &GuestMemoryMmap, vcpu: &VcpuFd) -> Result<()> {
let mut sregs: kvm_sregs = vcpu.get_sregs().map_err(Error::GetStatusRegisters)?;
configure_segments_and_sregs(mem, &mut sregs)?;
setup_page_tables(mem, &mut sregs)?; // TODO(dgreid) - Can this be done once per system instead?
vcpu.set_sregs(&sregs).map_err(Error::SetStatusRegisters)
}
const BOOT_GDT_OFFSET: GuestAddress = GuestAddress(0x500);
const BOOT_IDT_OFFSET: GuestAddress = GuestAddress(0x520);
const BOOT_GDT_MAX: usize = 4;
const EFER_LMA: u64 = 0x400;
const EFER_LME: u64 = 0x100;
const X86_CR0_PE: u64 = 0x1;
const X86_CR0_PG: u64 = 0x80000000;
const X86_CR4_PAE: u64 = 0x20;
fn write_gdt_table(table: &[u64], guest_mem: &GuestMemoryMmap) -> Result<()> {
let boot_gdt_addr = BOOT_GDT_OFFSET;
for (index, entry) in table.iter().enumerate() {
let addr = guest_mem
.checked_offset(boot_gdt_addr, index * mem::size_of::<u64>())
.ok_or(Error::WriteGDT)?;
guest_mem
.write_obj(*entry, addr)
.map_err(|_| Error::WriteGDT)?;
}
Ok(())
}
fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<()> {
let boot_idt_addr = BOOT_IDT_OFFSET;
guest_mem
.write_obj(val, boot_idt_addr)
.map_err(|_| Error::WriteIDT)
}
fn configure_segments_and_sregs(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()> {
let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
gdt_entry(0, 0, 0), // NULL
gdt_entry(0xa09b, 0, 0xfffff), // CODE
gdt_entry(0xc093, 0, 0xfffff), // DATA
gdt_entry(0x808b, 0, 0xfffff), // TSS
];
let code_seg = kvm_segment_from_gdt(gdt_table[1], 1);
let data_seg = kvm_segment_from_gdt(gdt_table[2], 2);
let tss_seg = kvm_segment_from_gdt(gdt_table[3], 3);
// Write segments
write_gdt_table(&gdt_table[..], mem)?;
sregs.gdt.base = BOOT_GDT_OFFSET.raw_value();
sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
write_idt_value(0, mem)?;
sregs.idt.base = BOOT_IDT_OFFSET.raw_value();
sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
sregs.cs = code_seg;
sregs.ds = data_seg;
sregs.es = data_seg;
sregs.fs = data_seg;
sregs.gs = data_seg;
sregs.ss = data_seg;
sregs.tr = tss_seg;
/* 64-bit protected mode */
sregs.cr0 |= X86_CR0_PE;
sregs.efer |= EFER_LME | EFER_LMA;
Ok(())
}
fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()> {
// Puts PML4 right after zero page but aligned to 4k.
// Entry covering VA [0..512GB)
mem.write_obj(PDPTE_START.raw_value() | 0x03, PML4_START)
.map_err(|_| Error::WritePML4Address)?;
// Entry covering VA [0..1GB)
mem.write_obj(PDE_START.raw_value() | 0x03, PDPTE_START)
.map_err(|_| Error::WritePDPTEAddress)?;
// 512 2MB entries together covering VA [0..1GB). Note we are assuming
// CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
for i in 0..512 {
mem.write_obj((i << 21) + 0x83u64, PDE_START.unchecked_add(i * 8))
.map_err(|_| Error::WritePDEAddress)?;
}
sregs.cr3 = PML4_START.raw_value();
sregs.cr4 |= X86_CR4_PAE;
sregs.cr0 |= X86_CR0_PG;
Ok(())
}
fn create_msr_entries() -> Vec<kvm_msr_entry> {
let mut entries = Vec::<kvm_msr_entry>::new();
entries.push(kvm_msr_entry {
index: msr_index::MSR_IA32_SYSENTER_CS,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_IA32_SYSENTER_ESP,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_IA32_SYSENTER_EIP,
data: 0x0,
..Default::default()
});
// x86_64 specific msrs, we only run on x86_64 not x86.
entries.push(kvm_msr_entry {
index: msr_index::MSR_STAR,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_CSTAR,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_KERNEL_GS_BASE,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_SYSCALL_MASK,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_LSTAR,
data: 0x0,
..Default::default()
});
// end of x86_64 specific code
entries.push(kvm_msr_entry {
index: msr_index::MSR_IA32_TSC,
data: 0x0,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_IA32_MISC_ENABLE,
data: msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
..Default::default()
});
entries.push(kvm_msr_entry {
index: msr_index::MSR_MTRRdefType,
data: MTRR_ENABLE | MTRR_MEM_TYPE_WB,
..Default::default()
});
entries
}
#[cfg(test)]
mod tests {
extern crate kvm_ioctls;
extern crate vm_memory;
use super::*;
use kvm_ioctls::Kvm;
use vm_memory::{GuestAddress, GuestMemoryMmap};
fn create_guest_mem() -> GuestMemoryMmap {
GuestMemoryMmap::new(&vec![(GuestAddress(0), 0x10000)]).unwrap()
}
fn read_u64(gm: &GuestMemoryMmap, offset: GuestAddress) -> u64 {
gm.read_obj(offset).unwrap()
}
#[test]
fn segments_and_sregs() {
let mut sregs: kvm_sregs = Default::default();
let gm = create_guest_mem();
configure_segments_and_sregs(&gm, &mut sregs).unwrap();
assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
assert_eq!(
0xaf9b000000ffff,
read_u64(&gm, BOOT_GDT_OFFSET.unchecked_add(8))
);
assert_eq!(
0xcf93000000ffff,
read_u64(&gm, BOOT_GDT_OFFSET.unchecked_add(16))
);
assert_eq!(
0x8f8b000000ffff,
read_u64(&gm, BOOT_GDT_OFFSET.unchecked_add(24))
);
assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
assert_eq!(0, sregs.cs.base);
assert_eq!(0xfffff, sregs.ds.limit);
assert_eq!(0x10, sregs.es.selector);
assert_eq!(1, sregs.fs.present);
assert_eq!(1, sregs.gs.g);
assert_eq!(0, sregs.ss.avl);
assert_eq!(0, sregs.tr.base);
assert_eq!(0xfffff, sregs.tr.limit);
assert_eq!(0, sregs.tr.avl);
assert_eq!(X86_CR0_PE, sregs.cr0);
assert_eq!(EFER_LME | EFER_LMA, sregs.efer);
}
#[test]
fn page_tables() {
let mut sregs: kvm_sregs = Default::default();
let gm = create_guest_mem();
setup_page_tables(&gm, &mut sregs).unwrap();
assert_eq!(0xa003, read_u64(&gm, PML4_START));
assert_eq!(0xb003, read_u64(&gm, PDPTE_START));
for i in 0..512 {
assert_eq!(
(i << 21) + 0x83u64,
read_u64(&gm, PDE_START.unchecked_add(i * 8))
);
}
assert_eq!(PML4_START.raw_value(), sregs.cr3);
assert_eq!(X86_CR4_PAE, sregs.cr4);
assert_eq!(X86_CR0_PG, sregs.cr0);
}
#[test]
fn test_setup_fpu() {
let kvm = Kvm::new().unwrap();
let vm = kvm.create_vm().unwrap();
let vcpu = vm.create_vcpu(0).unwrap();
setup_fpu(&vcpu).unwrap();
let expected_fpu: kvm_fpu = kvm_fpu {
fcw: 0x37f,
mxcsr: 0x1f80,
..Default::default()
};
let actual_fpu: kvm_fpu = vcpu.get_fpu().unwrap();
// TODO: auto-generate kvm related structures with PartialEq on.
assert_eq!(expected_fpu.fcw, actual_fpu.fcw);
// Setting the mxcsr register from kvm_fpu inside setup_fpu does not influence anything.
// See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c.
// The mxcsr will stay 0 and the assert below fails. Decide whether or not we should
// remove it at all.
// assert!(expected_fpu.mxcsr == actual_fpu.mxcsr);
}
#[test]
fn test_setup_msrs() {
let kvm = Kvm::new().unwrap();
let vm = kvm.create_vm().unwrap();
let vcpu = vm.create_vcpu(0).unwrap();
setup_msrs(&vcpu).unwrap();
// This test will check against the last MSR entry configured (the tenth one).
// See create_msr_entries for details.
let test_kvm_msrs_entry = [kvm_msr_entry {
index: msr_index::MSR_IA32_MISC_ENABLE,
..Default::default()
}];
let vec_size_bytes = mem::size_of::<kvm_msrs>() + mem::size_of::<kvm_msr_entry>();
let vec: Vec<u8> = Vec::with_capacity(vec_size_bytes);
let mut msrs: &mut kvm_msrs = unsafe {
// Converting the vector's memory to a struct is unsafe. Carefully using the read-only
// vector to size and set the members ensures no out-of-bounds errors below.
&mut *(vec.as_ptr() as *mut kvm_msrs)
};
unsafe {
let entries: &mut [kvm_msr_entry] = msrs.entries.as_mut_slice(1);
entries.copy_from_slice(&test_kvm_msrs_entry);
}
msrs.nmsrs = 1;
// get_msrs returns the number of msrs that it succeed in reading. We only want to read 1
// in this test case scenario.
let read_msrs = vcpu.get_msrs(&mut msrs).unwrap();
assert_eq!(read_msrs, 1);
// Official entries that were setup when we did setup_msrs. We need to assert that the
// tenth one (i.e the one with index msr_index::MSR_IA32_MISC_ENABLE has the data we
// expect.
let entry_vec = create_msr_entries();
unsafe {
assert_eq!(entry_vec[9], msrs.entries.as_slice(1)[0]);
}
}
#[test]
fn test_setup_regs() {
let kvm = Kvm::new().unwrap();
let vm = kvm.create_vm().unwrap();
let vcpu = vm.create_vcpu(0).unwrap();
let expected_regs: kvm_regs = kvm_regs {
rflags: 0x0000000000000002u64,
rip: 1,
rsp: 2,
rbp: 2,
rsi: 3,
..Default::default()
};
setup_regs(
&vcpu,
expected_regs.rip,
expected_regs.rsp,
expected_regs.rsi,
)
.unwrap();
let actual_regs: kvm_regs = vcpu.get_regs().unwrap();
assert_eq!(actual_regs, expected_regs);
}
#[test]
fn test_setup_sregs() {
let kvm = Kvm::new().unwrap();
let vm = kvm.create_vm().unwrap();
let vcpu = vm.create_vcpu(0).unwrap();
let mut expected_sregs: kvm_sregs = vcpu.get_sregs().unwrap();
let gm = create_guest_mem();
configure_segments_and_sregs(&gm, &mut expected_sregs).unwrap();
setup_page_tables(&gm, &mut expected_sregs).unwrap();
setup_sregs(&gm, &vcpu).unwrap();
let actual_sregs: kvm_sregs = vcpu.get_sregs().unwrap();
assert_eq!(expected_sregs, actual_sregs);
}
}