vmm: seccomp: Add seccomp filters for the vcpu worker thread

Partially fixes: #925

Signed-off-by: Bo Chen <chen.bo@intel.com>
This commit is contained in:
Bo Chen 2020-09-09 15:15:26 -07:00 committed by Sebastien Boeuf
parent d793cc4da3
commit 2612a6df29
3 changed files with 135 additions and 29 deletions

View File

@ -16,6 +16,7 @@ use crate::config::CpuTopology;
use crate::config::CpusConfig;
use crate::device_manager::DeviceManager;
use crate::memory_manager::MemoryManager;
use crate::seccomp_filters::{get_seccomp_filter, Thread};
use crate::CPU_MANAGER_SNAPSHOT_ID;
#[cfg(feature = "acpi")]
use acpi_tables::{aml, aml::Aml, sdt::SDT};
@ -31,6 +32,8 @@ use devices::interrupt_controller::InterruptController;
#[cfg(target_arch = "x86_64")]
use hypervisor::CpuId;
use hypervisor::{CpuState, VmExit};
use seccomp::{SeccompAction, SeccompFilter};
use libc::{c_void, siginfo_t};
#[cfg(target_arch = "x86_64")]
use std::fmt;
@ -192,6 +195,12 @@ pub enum Error {
/// Error resuming vCPU on shutdown
ResumeOnShutdown(MigratableError),
/// Cannot create seccomp filter
CreateSeccompFilter(seccomp::SeccompError),
/// Cannot apply seccomp filter
ApplySeccompFilter(seccomp::Error),
}
pub type Result<T> = result::Result<T, Error>;
@ -475,6 +484,7 @@ pub struct CpuManager {
vcpu_states: Vec<VcpuState>,
selected_cpu: u8,
vcpus: Vec<Arc<Mutex<Vcpu>>>,
seccomp_action: SeccompAction,
}
const CPU_ENABLE_FLAG: usize = 0;
@ -601,6 +611,7 @@ impl CpuManager {
vm: Arc<dyn hypervisor::Vm>,
reset_evt: EventFd,
hypervisor: Arc<dyn hypervisor::Hypervisor>,
seccomp_action: SeccompAction,
) -> Result<Arc<Mutex<CpuManager>>> {
let guest_memory = memory_manager.lock().unwrap().guest_memory();
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
@ -633,6 +644,7 @@ impl CpuManager {
reset_evt,
selected_cpu: 0,
vcpus: Vec::with_capacity(usize::from(config.max_vcpus)),
seccomp_action,
}));
#[cfg(target_arch = "x86_64")]
@ -812,10 +824,22 @@ impl CpuManager {
info!("Starting vCPU: cpu_id = {}", cpu_id);
// Retrieve seccomp filter for vcpu thread
let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu)
.map_err(Error::CreateSeccompFilter)?;
let handle = Some(
thread::Builder::new()
.name(format!("vcpu{}", cpu_id))
.spawn(move || {
// Apply seccomp filter for vcpu thread.
if let Err(e) =
SeccompFilter::apply(vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter)
{
error!("Error applying seccomp filter: {:?}", e);
return;
}
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {}
// This uses an async signal safe handler to kill the vcpu handles.
register_signal_handler(SIGRTMIN(), handle_signal)

View File

@ -12,8 +12,9 @@ use seccomp::{
use std::convert::TryInto;
pub enum Thread {
Vmm,
Api,
Vcpu,
Vmm,
}
/// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`.
@ -79,33 +80,33 @@ const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71;
const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72;
const VFIO_DEVICE_IOEVENTFD: u64 = 0x3b74;
fn create_vmm_ioctl_seccomp_rule_common() -> Result<Vec<SeccompRule>, Error> {
// See include/uapi/linux/kvm.h in the kernel code.
const KVM_GET_API_VERSION: u64 = 0xae00;
const KVM_CREATE_VM: u64 = 0xae01;
const KVM_CHECK_EXTENSION: u64 = 0xae03;
const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04;
const KVM_CREATE_VCPU: u64 = 0xae41;
const KVM_CREATE_IRQCHIP: u64 = 0xae60;
const KVM_RUN: u64 = 0xae80;
const KVM_SET_MP_STATE: u64 = 0x4004_ae99;
const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a;
const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1;
const KVM_SET_ONE_REG: u64 = 0x4010_aeac;
const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46;
const KVM_IRQFD: u64 = 0x4020_ae76;
const KVM_IOEVENTFD: u64 = 0x4040_ae79;
const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0;
const KVM_ENABLE_CAP: u64 = 0x4068_aea3;
const KVM_SET_REGS: u64 = 0x4090_ae82;
const KVM_GET_MP_STATE: u64 = 0x8004_ae98;
const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2;
const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f;
const KVM_GET_ONE_REG: u64 = 0x4010_aeab;
const KVM_GET_REGS: u64 = 0x8090_ae81;
const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05;
const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0;
// See include/uapi/linux/kvm.h in the kernel code.
const KVM_GET_API_VERSION: u64 = 0xae00;
const KVM_CREATE_VM: u64 = 0xae01;
const KVM_CHECK_EXTENSION: u64 = 0xae03;
const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04;
const KVM_CREATE_VCPU: u64 = 0xae41;
const KVM_CREATE_IRQCHIP: u64 = 0xae60;
const KVM_RUN: u64 = 0xae80;
const KVM_SET_MP_STATE: u64 = 0x4004_ae99;
const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a;
const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1;
const KVM_SET_ONE_REG: u64 = 0x4010_aeac;
const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46;
const KVM_IRQFD: u64 = 0x4020_ae76;
const KVM_IOEVENTFD: u64 = 0x4040_ae79;
const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0;
const KVM_ENABLE_CAP: u64 = 0x4068_aea3;
const KVM_SET_REGS: u64 = 0x4090_ae82;
const KVM_GET_MP_STATE: u64 = 0x8004_ae98;
const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2;
const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f;
const KVM_GET_ONE_REG: u64 = 0x4010_aeab;
const KVM_GET_REGS: u64 = 0x8090_ae81;
const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05;
const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0;
fn create_vmm_ioctl_seccomp_rule_common() -> Result<Vec<SeccompRule>, Error> {
Ok(or![
and![Cond::new(1, ArgLen::DWORD, Eq, FIOCLEX)?],
and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO)?],
@ -349,6 +350,84 @@ fn vmm_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
])
}
fn create_vcpu_ioctl_seccomp_rule() -> Result<Vec<SeccompRule>, Error> {
Ok(or![
and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_CHECK_EXTENSION,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IOEVENTFD)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IRQFD,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_DEVICE_ATTR,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_GSI_ROUTING,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_USER_MEMORY_REGION,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_RUN,)?],
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_DEVICE_SET_IRQS)?],
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_GROUP_UNSET_CONTAINER)?],
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA)?],
])
}
fn vcpu_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
Ok(vec![
allow_syscall(libc::SYS_accept4),
allow_syscall(libc::SYS_brk),
allow_syscall(libc::SYS_clock_nanosleep),
allow_syscall(libc::SYS_clone),
allow_syscall(libc::SYS_close),
allow_syscall(libc::SYS_dup),
allow_syscall(libc::SYS_epoll_create1),
allow_syscall(libc::SYS_epoll_ctl),
allow_syscall(libc::SYS_epoll_pwait),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_epoll_wait),
allow_syscall(libc::SYS_eventfd2),
allow_syscall(libc::SYS_exit),
allow_syscall(libc::SYS_fallocate),
allow_syscall(libc::SYS_fcntl),
allow_syscall(libc::SYS_fdatasync),
allow_syscall(libc::SYS_fsync),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_ftruncate),
#[cfg(target_arch = "aarch64")]
// The definition of libc::SYS_ftruncate is missing on AArch64.
// Use a hard-code number instead.
allow_syscall(46),
#[cfg(target_arch = "aarch64")]
allow_syscall(libc::SYS_faccessat),
#[cfg(target_arch = "aarch64")]
allow_syscall(libc::SYS_newfstatat),
allow_syscall(libc::SYS_futex),
allow_syscall(libc::SYS_getpid),
allow_syscall_if(libc::SYS_ioctl, create_vcpu_ioctl_seccomp_rule()?),
allow_syscall(libc::SYS_lseek),
allow_syscall(libc::SYS_madvise),
allow_syscall(libc::SYS_mmap),
allow_syscall(libc::SYS_mprotect),
allow_syscall(libc::SYS_munmap),
allow_syscall(libc::SYS_nanosleep),
allow_syscall(libc::SYS_openat),
allow_syscall(libc::SYS_prctl),
allow_syscall(libc::SYS_pread64),
allow_syscall(libc::SYS_pwrite64),
allow_syscall(libc::SYS_read),
allow_syscall(libc::SYS_recvfrom),
allow_syscall(libc::SYS_recvmsg),
allow_syscall(libc::SYS_rt_sigaction),
allow_syscall(libc::SYS_rt_sigprocmask),
allow_syscall(libc::SYS_rt_sigreturn),
allow_syscall(libc::SYS_sched_getaffinity),
allow_syscall(libc::SYS_sendmsg),
allow_syscall(libc::SYS_set_robust_list),
allow_syscall(libc::SYS_sigaltstack),
allow_syscall(libc::SYS_socketpair),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_stat),
allow_syscall(libc::SYS_statx),
allow_syscall(libc::SYS_tgkill),
allow_syscall(libc::SYS_tkill),
allow_syscall(libc::SYS_write),
])
}
// The filter containing the white listed syscall rules required by the API to
// function.
fn api_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
@ -380,8 +459,9 @@ fn api_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
fn get_seccomp_filter_trap(thread_type: Thread) -> Result<SeccompFilter, Error> {
let rules = match thread_type {
Thread::Vmm => vmm_thread_rules()?,
Thread::Api => api_thread_rules()?,
Thread::Vcpu => vcpu_thread_rules()?,
Thread::Vmm => vmm_thread_rules()?,
};
Ok(SeccompFilter::new(
@ -392,8 +472,9 @@ fn get_seccomp_filter_trap(thread_type: Thread) -> Result<SeccompFilter, Error>
fn get_seccomp_filter_log(thread_type: Thread) -> Result<SeccompFilter, Error> {
let rules = match thread_type {
Thread::Vmm => vmm_thread_rules()?,
Thread::Api => api_thread_rules()?,
Thread::Vcpu => vcpu_thread_rules()?,
Thread::Vmm => vmm_thread_rules()?,
};
Ok(SeccompFilter::new(

View File

@ -327,6 +327,7 @@ impl Vm {
vm.clone(),
reset_evt,
hypervisor,
seccomp_action.clone(),
)
.map_err(Error::CpuManager)?;