diff --git a/vmm/src/coredump.rs b/vmm/src/coredump.rs index ccdd4f1a0..62080ed2f 100644 --- a/vmm/src/coredump.rs +++ b/vmm/src/coredump.rs @@ -3,6 +3,10 @@ // SPDX-License-Identifier: Apache-2.0 // +#[cfg(target_arch = "x86_64")] +use hypervisor::kvm::kvm_bindings::kvm_dtable as DTableRegister; +#[cfg(target_arch = "x86_64")] +use hypervisor::x86_64::SegmentRegister; use linux_loader::elf; use std::fs::File; use std::io::Write; @@ -72,6 +76,8 @@ pub struct X86_64UserRegs { pub gs: u64, } +unsafe impl ByteValued for X86_64UserRegs {} + #[repr(C)] #[allow(dead_code)] pub struct X86_64ElfPrStatus { @@ -82,12 +88,101 @@ pub struct X86_64ElfPrStatus { pub pad3: [u8; 8], } +#[repr(C)] +#[derive(Default, Copy, Clone)] +pub struct CpuSegment { + pub selector: u32, + pub limit: u32, + pub flags: u32, + pub pad: u32, + pub base: u64, +} + +const DESC_TYPE_SHIFT: u32 = 8; +const DESC_S_SHIFT: u32 = 12; +const DESC_DPL_SHIFT: u32 = 13; +const DESC_P_SHIFT: u32 = 15; +const DESC_P_MASK: u32 = 1 << DESC_P_SHIFT; +const DESC_AVL_SHIFT: u32 = 20; +const DESC_AVL_MASK: u32 = 1 << DESC_AVL_SHIFT; +const DESC_L_SHIFT: u32 = 21; +const DESC_B_SHIFT: u32 = 22; +const DESC_S_MASK: u32 = 1 << DESC_S_SHIFT; +const DESC_G_SHIFT: u32 = 23; +const DESC_G_MASK: u32 = 1 << DESC_G_SHIFT; + +impl CpuSegment { + pub fn new(reg: SegmentRegister) -> Self { + let p_mask = if (reg.present > 0) && (reg.unusable == 0) { + DESC_P_MASK + } else { + 0 + }; + let flags = ((reg.type_ as u32) << DESC_TYPE_SHIFT) + | p_mask + | ((reg.dpl as u32) << DESC_DPL_SHIFT) + | ((reg.db as u32) << DESC_B_SHIFT) + | ((reg.s as u32) * DESC_S_MASK) + | ((reg.l as u32) << DESC_L_SHIFT) + | ((reg.g as u32) * DESC_G_MASK) + | ((reg.avl as u32) * DESC_AVL_MASK); + + CpuSegment { + selector: reg.selector as u32, + limit: reg.limit as u32, + flags, + pad: 0, + base: reg.base, + } + } + + pub fn new_from_table(reg: DTableRegister) -> Self { + CpuSegment { + selector: 0, + limit: reg.limit as u32, + flags: 0, + pad: 0, + base: reg.base, + } + } +} + +#[repr(C)] +#[derive(Default, Copy, Clone)] +pub struct CpuState { + pub version: u32, + pub size: u32, + /// rax, rbx, rcx, rdx, rsi, rdi, rsp, rbp + pub regs1: [u64; 8], + /// r8, r9, r10, r11, r12, r13, r14, r15 + pub regs2: [u64; 8], + pub rip: u64, + pub rflags: u64, + pub cs: CpuSegment, + pub ds: CpuSegment, + pub es: CpuSegment, + pub fs: CpuSegment, + pub gs: CpuSegment, + pub ss: CpuSegment, + pub ldt: CpuSegment, + pub tr: CpuSegment, + pub gdt: CpuSegment, + pub idt: CpuSegment, + pub cr: [u64; 5], + pub kernel_gs_base: u64, +} + +unsafe impl ByteValued for CpuState {} + pub enum NoteDescType { ElfDesc = 0, + VmmDesc = 1, + ElfAndVmmDesc = 2, } // "CORE" or "QEMU" pub const COREDUMP_NAME_SIZE: u32 = 5; +pub const NT_PRSTATUS: u32 = 1; /// Core file. const ET_CORE: u16 = 4; @@ -223,11 +318,32 @@ pub trait Elf64Writable { fn get_note_size(&self, desc_type: NoteDescType, nr_cpus: u32) -> u32 { let note_head_size = std::mem::size_of::() as u32; let elf_desc_size = std::mem::size_of::() as u32; + let cpu_state_desc_size = std::mem::size_of::() as u32; let elf_note_size = self.elf_note_size(note_head_size, COREDUMP_NAME_SIZE, elf_desc_size); + let vmm_note_size = + self.elf_note_size(note_head_size, COREDUMP_NAME_SIZE, cpu_state_desc_size); match desc_type { NoteDescType::ElfDesc => elf_note_size * nr_cpus, + NoteDescType::VmmDesc => vmm_note_size * nr_cpus, + NoteDescType::ElfAndVmmDesc => (elf_note_size + vmm_note_size) * nr_cpus, } } } + +pub trait CpuElf64Writable { + fn cpu_write_elf64_note( + &mut self, + _dump_state: &DumpState, + ) -> std::result::Result<(), GuestDebuggableError> { + Ok(()) + } + + fn cpu_write_vmm_note( + &mut self, + _dump_state: &DumpState, + ) -> std::result::Result<(), GuestDebuggableError> { + Ok(()) + } +} diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 4c7b13c23..3bd33a443 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -12,6 +12,12 @@ // use crate::config::CpusConfig; +#[cfg(feature = "guest_debug")] +use crate::coredump::{ + CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, Elf64Writable, + GuestDebuggableError, NoteDescType, X86_64ElfPrStatus, X86_64UserRegs, COREDUMP_NAME_SIZE, + NT_PRSTATUS, +}; use crate::device_manager::DeviceManager; #[cfg(feature = "gdb")] use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; @@ -28,24 +34,36 @@ use arch::NumaNodes; use devices::interrupt_controller::InterruptController; #[cfg(all(target_arch = "x86_64", feature = "gdb"))] use gdbstub_arch::x86::reg::{X86SegmentRegs, X86_64CoreRegs}; +#[cfg(feature = "guest_debug")] +use hypervisor::arch::x86::msr_index; #[cfg(target_arch = "aarch64")] use hypervisor::kvm::kvm_bindings; #[cfg(feature = "tdx")] use hypervisor::kvm::{TdxExitDetails, TdxExitStatus}; #[cfg(target_arch = "x86_64")] use hypervisor::x86_64::CpuId; +#[cfg(feature = "guest_debug")] +use hypervisor::x86_64::{MsrEntries, MsrEntry}; #[cfg(all(target_arch = "x86_64", feature = "gdb"))] use hypervisor::x86_64::{SpecialRegisters, StandardRegisters}; use hypervisor::{CpuState, HypervisorCpuError, VmExit, VmOps}; use libc::{c_void, siginfo_t}; +#[cfg(feature = "guest_debug")] +use linux_loader::elf::Elf64_Nhdr; use seccompiler::{apply_filter, SeccompAction}; use std::collections::BTreeMap; +#[cfg(feature = "guest_debug")] +use std::io::Write; +#[cfg(feature = "guest_debug")] +use std::mem::size_of; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex}; use std::{cmp, io, result, thread}; use thiserror::Error; use vm_device::BusDevice; +#[cfg(feature = "guest_debug")] +use vm_memory::ByteValued; #[cfg(feature = "gdb")] use vm_memory::{Bytes, GuestAddressSpace}; use vm_memory::{GuestAddress, GuestMemoryAtomic}; @@ -238,6 +256,13 @@ struct InterruptSourceOverride { pub flags: u16, } +#[cfg(feature = "guest_debug")] +macro_rules! round_up { + ($n:expr,$d:expr) => { + (($n / ($d + 1)) + 1) * $d + }; +} + /// A wrapper around creating and using a kvm-based VCPU. pub struct Vcpu { // The hypervisor abstracted CPU. @@ -2097,6 +2122,217 @@ impl Debuggable for CpuManager { } } +#[cfg(feature = "guest_debug")] +impl Elf64Writable for CpuManager {} + +#[cfg(feature = "guest_debug")] +impl CpuElf64Writable for CpuManager { + fn cpu_write_elf64_note( + &mut self, + dump_state: &DumpState, + ) -> std::result::Result<(), GuestDebuggableError> { + let mut coredump_file = dump_state.file.as_ref().unwrap(); + for vcpu in &self.vcpus { + let note_size = self.get_note_size(NoteDescType::ElfDesc, 1); + let mut pos: usize = 0; + let mut buf = vec![0 as u8; note_size as usize]; + let descsz = size_of::(); + let vcpu_id = vcpu.lock().unwrap().id; + + let note = Elf64_Nhdr { + n_namesz: COREDUMP_NAME_SIZE, + n_descsz: descsz as u32, + n_type: NT_PRSTATUS, + }; + + let bytes: &[u8] = note.as_slice(); + buf.splice(0.., bytes.to_vec()); + pos += round_up!(size_of::(), 4); + buf.resize(pos + 4, 0); + buf.splice(pos.., "CORE".to_string().into_bytes()); + + pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); + buf.resize(pos + 32 + 4, 0); + let pid = vcpu_id as u64; + let bytes: &[u8] = pid.as_slice(); + buf.splice(pos + 32.., bytes.to_vec()); /* pr_pid */ + + pos += descsz - size_of::() - size_of::(); + + let orig_rax: u64 = 0; + let gregs = self.vcpus[usize::from(vcpu_id)] + .lock() + .unwrap() + .vcpu + .get_regs() + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; + + let regs1 = [ + gregs.r15, gregs.r14, gregs.r13, gregs.r12, gregs.rbp, gregs.rbx, gregs.r11, + gregs.r10, + ]; + let regs2 = [ + gregs.r9, gregs.r8, gregs.rax, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, orig_rax, + ]; + + let sregs = self.vcpus[usize::from(vcpu_id)] + .lock() + .unwrap() + .vcpu + .get_sregs() + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; + + debug!( + "rip 0x{:x} rsp 0x{:x} gs 0x{:x} cs 0x{:x} ss 0x{:x} ds 0x{:x}", + gregs.rip, + gregs.rsp, + sregs.gs.base, + sregs.cs.selector, + sregs.ss.selector, + sregs.ds.selector, + ); + + let regs = X86_64UserRegs { + regs1, + regs2, + rip: gregs.rip, + cs: sregs.cs.selector as u64, + eflags: gregs.rflags, + rsp: gregs.rsp, + ss: sregs.ss.selector as u64, + fs_base: sregs.fs.base as u64, + gs_base: sregs.gs.base as u64, + ds: sregs.ds.selector as u64, + es: sregs.es.selector as u64, + fs: sregs.fs.selector as u64, + gs: sregs.gs.selector as u64, + }; + + // let bytes: &[u8] = unsafe { any_as_u8_slice(®s) }; + let bytes: &[u8] = regs.as_slice(); + buf.resize(note_size as usize, 0); + buf.splice(pos.., bytes.to_vec()); + buf.resize(note_size as usize, 0); + + coredump_file + .write(&buf) + .map_err(|e| GuestDebuggableError::CoredumpFile(e.into()))?; + } + + Ok(()) + } + + fn cpu_write_vmm_note( + &mut self, + dump_state: &DumpState, + ) -> std::result::Result<(), GuestDebuggableError> { + let mut coredump_file = dump_state.file.as_ref().unwrap(); + for vcpu in &self.vcpus { + let note_size = self.get_note_size(NoteDescType::VmmDesc, 1); + let mut pos: usize = 0; + let mut buf = vec![0 as u8; note_size as usize]; + let descsz = size_of::(); + let vcpu_id = vcpu.lock().unwrap().id; + + let note = Elf64_Nhdr { + n_namesz: COREDUMP_NAME_SIZE, + n_descsz: descsz as u32, + n_type: 0 as u32, + }; + + let bytes: &[u8] = note.as_slice(); + buf.splice(0.., bytes.to_vec()); + pos += round_up!(size_of::(), 4); + + buf.resize(pos + 4, 0); + buf.splice(pos.., "QEMU".to_string().into_bytes()); + + pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); + + let gregs = self.vcpus[usize::from(vcpu_id)] + .lock() + .unwrap() + .vcpu + .get_regs() + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; + + let regs1 = [ + gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rsp, + gregs.rbp, + ]; + + let regs2 = [ + gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, + gregs.r15, + ]; + + let sregs = self.vcpus[usize::from(vcpu_id)] + .lock() + .unwrap() + .vcpu + .get_sregs() + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; + + let mut msrs = MsrEntries::from_entries(&[MsrEntry { + index: msr_index::MSR_KERNEL_GS_BASE, + ..Default::default() + }]) + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get msr failed")))?; + + self.vcpus[vcpu_id as usize] + .lock() + .unwrap() + .vcpu + .get_msrs(&mut msrs) + .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get msr failed")))?; + let kernel_gs_base = msrs.as_slice()[0].data; + + let cs = CpuSegment::new(sregs.cs); + let ds = CpuSegment::new(sregs.ds); + let es = CpuSegment::new(sregs.es); + let fs = CpuSegment::new(sregs.fs); + let gs = CpuSegment::new(sregs.gs); + let ss = CpuSegment::new(sregs.ss); + let ldt = CpuSegment::new(sregs.ldt); + let tr = CpuSegment::new(sregs.tr); + let gdt = CpuSegment::new_from_table(sregs.gdt); + let idt = CpuSegment::new_from_table(sregs.idt); + let cr = [sregs.cr0, sregs.cr8, sregs.cr2, sregs.cr3, sregs.cr4]; + let regs = DumpCpusState { + version: 1 as u32, + size: size_of::() as u32, + regs1, + regs2, + rip: gregs.rip, + rflags: gregs.rflags, + cs, + ds, + es, + fs, + gs, + ss, + ldt, + tr, + gdt, + idt, + cr, + kernel_gs_base, + }; + + let bytes: &[u8] = regs.as_slice(); + buf.resize(note_size as usize, 0); + buf.splice(pos.., bytes.to_vec()); + buf.resize(note_size as usize, 0); + + coredump_file + .write(&buf) + .map_err(|e| GuestDebuggableError::CoredumpFile(e.into()))?; + } + + Ok(()) + } +} + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] #[cfg(test)] mod tests { diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 645f3fc88..5d6fa08cf 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -17,11 +17,9 @@ use crate::config::{ UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig, }; #[cfg(feature = "guest_debug")] -use crate::coredump::DumpState; -#[cfg(feature = "guest_debug")] -use crate::coredump::{Elf64Writable, NoteDescType}; -#[cfg(feature = "guest_debug")] -use crate::coredump::{GuestDebuggable, GuestDebuggableError}; +use crate::coredump::{ + CpuElf64Writable, DumpState, Elf64Writable, GuestDebuggable, GuestDebuggableError, NoteDescType, +}; use crate::cpu; use crate::device_manager::{Console, DeviceManager, DeviceManagerError, PtyPair}; use crate::device_tree::DeviceTree; @@ -3052,6 +3050,15 @@ impl GuestDebuggable for Vm { self.write_note(&coredump_state)?; self.write_loads(&coredump_state)?; + self.cpu_manager + .lock() + .unwrap() + .cpu_write_elf64_note(&coredump_state)?; + self.cpu_manager + .lock() + .unwrap() + .cpu_write_vmm_note(&coredump_state)?; + self.memory_manager .lock() .unwrap()