diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index d32f77621..8e6219e65 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -16,6 +16,7 @@ use crate::config::CpuTopology; use crate::config::CpusConfig; use crate::device_manager::DeviceManager; use crate::memory_manager::MemoryManager; +use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::CPU_MANAGER_SNAPSHOT_ID; #[cfg(feature = "acpi")] use acpi_tables::{aml, aml::Aml, sdt::SDT}; @@ -31,6 +32,8 @@ use devices::interrupt_controller::InterruptController; #[cfg(target_arch = "x86_64")] use hypervisor::CpuId; use hypervisor::{CpuState, VmExit}; +use seccomp::{SeccompAction, SeccompFilter}; + use libc::{c_void, siginfo_t}; #[cfg(target_arch = "x86_64")] use std::fmt; @@ -192,6 +195,12 @@ pub enum Error { /// Error resuming vCPU on shutdown ResumeOnShutdown(MigratableError), + + /// Cannot create seccomp filter + CreateSeccompFilter(seccomp::SeccompError), + + /// Cannot apply seccomp filter + ApplySeccompFilter(seccomp::Error), } pub type Result = result::Result; @@ -475,6 +484,7 @@ pub struct CpuManager { vcpu_states: Vec, selected_cpu: u8, vcpus: Vec>>, + seccomp_action: SeccompAction, } const CPU_ENABLE_FLAG: usize = 0; @@ -601,6 +611,7 @@ impl CpuManager { vm: Arc, reset_evt: EventFd, hypervisor: Arc, + seccomp_action: SeccompAction, ) -> Result>> { let guest_memory = memory_manager.lock().unwrap().guest_memory(); let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); @@ -633,6 +644,7 @@ impl CpuManager { reset_evt, selected_cpu: 0, vcpus: Vec::with_capacity(usize::from(config.max_vcpus)), + seccomp_action, })); #[cfg(target_arch = "x86_64")] @@ -812,10 +824,22 @@ impl CpuManager { info!("Starting vCPU: cpu_id = {}", cpu_id); + // Retrieve seccomp filter for vcpu thread + let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu) + .map_err(Error::CreateSeccompFilter)?; + let handle = Some( thread::Builder::new() .name(format!("vcpu{}", cpu_id)) .spawn(move || { + // Apply seccomp filter for vcpu thread. + if let Err(e) = + SeccompFilter::apply(vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter) + { + error!("Error applying seccomp filter: {:?}", e); + return; + } + extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} // This uses an async signal safe handler to kill the vcpu handles. register_signal_handler(SIGRTMIN(), handle_signal) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index c71a7b679..cb6838986 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -12,8 +12,9 @@ use seccomp::{ use std::convert::TryInto; pub enum Thread { - Vmm, Api, + Vcpu, + Vmm, } /// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`. @@ -79,33 +80,33 @@ const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71; const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72; const VFIO_DEVICE_IOEVENTFD: u64 = 0x3b74; -fn create_vmm_ioctl_seccomp_rule_common() -> Result, Error> { - // See include/uapi/linux/kvm.h in the kernel code. - const KVM_GET_API_VERSION: u64 = 0xae00; - const KVM_CREATE_VM: u64 = 0xae01; - const KVM_CHECK_EXTENSION: u64 = 0xae03; - const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04; - const KVM_CREATE_VCPU: u64 = 0xae41; - const KVM_CREATE_IRQCHIP: u64 = 0xae60; - const KVM_RUN: u64 = 0xae80; - const KVM_SET_MP_STATE: u64 = 0x4004_ae99; - const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a; - const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1; - const KVM_SET_ONE_REG: u64 = 0x4010_aeac; - const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46; - const KVM_IRQFD: u64 = 0x4020_ae76; - const KVM_IOEVENTFD: u64 = 0x4040_ae79; - const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0; - const KVM_ENABLE_CAP: u64 = 0x4068_aea3; - const KVM_SET_REGS: u64 = 0x4090_ae82; - const KVM_GET_MP_STATE: u64 = 0x8004_ae98; - const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2; - const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f; - const KVM_GET_ONE_REG: u64 = 0x4010_aeab; - const KVM_GET_REGS: u64 = 0x8090_ae81; - const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05; - const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0; +// See include/uapi/linux/kvm.h in the kernel code. +const KVM_GET_API_VERSION: u64 = 0xae00; +const KVM_CREATE_VM: u64 = 0xae01; +const KVM_CHECK_EXTENSION: u64 = 0xae03; +const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04; +const KVM_CREATE_VCPU: u64 = 0xae41; +const KVM_CREATE_IRQCHIP: u64 = 0xae60; +const KVM_RUN: u64 = 0xae80; +const KVM_SET_MP_STATE: u64 = 0x4004_ae99; +const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a; +const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1; +const KVM_SET_ONE_REG: u64 = 0x4010_aeac; +const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46; +const KVM_IRQFD: u64 = 0x4020_ae76; +const KVM_IOEVENTFD: u64 = 0x4040_ae79; +const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0; +const KVM_ENABLE_CAP: u64 = 0x4068_aea3; +const KVM_SET_REGS: u64 = 0x4090_ae82; +const KVM_GET_MP_STATE: u64 = 0x8004_ae98; +const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2; +const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f; +const KVM_GET_ONE_REG: u64 = 0x4010_aeab; +const KVM_GET_REGS: u64 = 0x8090_ae81; +const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05; +const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0; +fn create_vmm_ioctl_seccomp_rule_common() -> Result, Error> { Ok(or![ and![Cond::new(1, ArgLen::DWORD, Eq, FIOCLEX)?], and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO)?], @@ -349,6 +350,84 @@ fn vmm_thread_rules() -> Result, Error> { ]) } +fn create_vcpu_ioctl_seccomp_rule() -> Result, Error> { + Ok(or![ + and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_CHECK_EXTENSION,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IOEVENTFD)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IRQFD,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_DEVICE_ATTR,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_GSI_ROUTING,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_USER_MEMORY_REGION,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_RUN,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_DEVICE_SET_IRQS)?], + and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_GROUP_UNSET_CONTAINER)?], + and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA)?], + ]) +} + +fn vcpu_thread_rules() -> Result, Error> { + Ok(vec![ + allow_syscall(libc::SYS_accept4), + allow_syscall(libc::SYS_brk), + allow_syscall(libc::SYS_clock_nanosleep), + allow_syscall(libc::SYS_clone), + allow_syscall(libc::SYS_close), + allow_syscall(libc::SYS_dup), + allow_syscall(libc::SYS_epoll_create1), + allow_syscall(libc::SYS_epoll_ctl), + allow_syscall(libc::SYS_epoll_pwait), + #[cfg(target_arch = "x86_64")] + allow_syscall(libc::SYS_epoll_wait), + allow_syscall(libc::SYS_eventfd2), + allow_syscall(libc::SYS_exit), + allow_syscall(libc::SYS_fallocate), + allow_syscall(libc::SYS_fcntl), + allow_syscall(libc::SYS_fdatasync), + allow_syscall(libc::SYS_fsync), + #[cfg(target_arch = "x86_64")] + allow_syscall(libc::SYS_ftruncate), + #[cfg(target_arch = "aarch64")] + // The definition of libc::SYS_ftruncate is missing on AArch64. + // Use a hard-code number instead. + allow_syscall(46), + #[cfg(target_arch = "aarch64")] + allow_syscall(libc::SYS_faccessat), + #[cfg(target_arch = "aarch64")] + allow_syscall(libc::SYS_newfstatat), + allow_syscall(libc::SYS_futex), + allow_syscall(libc::SYS_getpid), + allow_syscall_if(libc::SYS_ioctl, create_vcpu_ioctl_seccomp_rule()?), + allow_syscall(libc::SYS_lseek), + allow_syscall(libc::SYS_madvise), + allow_syscall(libc::SYS_mmap), + allow_syscall(libc::SYS_mprotect), + allow_syscall(libc::SYS_munmap), + allow_syscall(libc::SYS_nanosleep), + allow_syscall(libc::SYS_openat), + allow_syscall(libc::SYS_prctl), + allow_syscall(libc::SYS_pread64), + allow_syscall(libc::SYS_pwrite64), + allow_syscall(libc::SYS_read), + allow_syscall(libc::SYS_recvfrom), + allow_syscall(libc::SYS_recvmsg), + allow_syscall(libc::SYS_rt_sigaction), + allow_syscall(libc::SYS_rt_sigprocmask), + allow_syscall(libc::SYS_rt_sigreturn), + allow_syscall(libc::SYS_sched_getaffinity), + allow_syscall(libc::SYS_sendmsg), + allow_syscall(libc::SYS_set_robust_list), + allow_syscall(libc::SYS_sigaltstack), + allow_syscall(libc::SYS_socketpair), + #[cfg(target_arch = "x86_64")] + allow_syscall(libc::SYS_stat), + allow_syscall(libc::SYS_statx), + allow_syscall(libc::SYS_tgkill), + allow_syscall(libc::SYS_tkill), + allow_syscall(libc::SYS_write), + ]) +} + // The filter containing the white listed syscall rules required by the API to // function. fn api_thread_rules() -> Result, Error> { @@ -380,8 +459,9 @@ fn api_thread_rules() -> Result, Error> { fn get_seccomp_filter_trap(thread_type: Thread) -> Result { let rules = match thread_type { - Thread::Vmm => vmm_thread_rules()?, Thread::Api => api_thread_rules()?, + Thread::Vcpu => vcpu_thread_rules()?, + Thread::Vmm => vmm_thread_rules()?, }; Ok(SeccompFilter::new( @@ -392,8 +472,9 @@ fn get_seccomp_filter_trap(thread_type: Thread) -> Result fn get_seccomp_filter_log(thread_type: Thread) -> Result { let rules = match thread_type { - Thread::Vmm => vmm_thread_rules()?, Thread::Api => api_thread_rules()?, + Thread::Vcpu => vcpu_thread_rules()?, + Thread::Vmm => vmm_thread_rules()?, }; Ok(SeccompFilter::new( diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 2b1d65dae..8d7f6ab6a 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -327,6 +327,7 @@ impl Vm { vm.clone(), reset_evt, hypervisor, + seccomp_action.clone(), ) .map_err(Error::CpuManager)?;