mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-03 20:15:45 +00:00
vmm: seccomp: Add seccomp filters for the vcpu worker thread
Partially fixes: #925 Signed-off-by: Bo Chen <chen.bo@intel.com>
This commit is contained in:
parent
d793cc4da3
commit
2612a6df29
@ -16,6 +16,7 @@ use crate::config::CpuTopology;
|
|||||||
use crate::config::CpusConfig;
|
use crate::config::CpusConfig;
|
||||||
use crate::device_manager::DeviceManager;
|
use crate::device_manager::DeviceManager;
|
||||||
use crate::memory_manager::MemoryManager;
|
use crate::memory_manager::MemoryManager;
|
||||||
|
use crate::seccomp_filters::{get_seccomp_filter, Thread};
|
||||||
use crate::CPU_MANAGER_SNAPSHOT_ID;
|
use crate::CPU_MANAGER_SNAPSHOT_ID;
|
||||||
#[cfg(feature = "acpi")]
|
#[cfg(feature = "acpi")]
|
||||||
use acpi_tables::{aml, aml::Aml, sdt::SDT};
|
use acpi_tables::{aml, aml::Aml, sdt::SDT};
|
||||||
@ -31,6 +32,8 @@ use devices::interrupt_controller::InterruptController;
|
|||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
use hypervisor::CpuId;
|
use hypervisor::CpuId;
|
||||||
use hypervisor::{CpuState, VmExit};
|
use hypervisor::{CpuState, VmExit};
|
||||||
|
use seccomp::{SeccompAction, SeccompFilter};
|
||||||
|
|
||||||
use libc::{c_void, siginfo_t};
|
use libc::{c_void, siginfo_t};
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
@ -192,6 +195,12 @@ pub enum Error {
|
|||||||
|
|
||||||
/// Error resuming vCPU on shutdown
|
/// Error resuming vCPU on shutdown
|
||||||
ResumeOnShutdown(MigratableError),
|
ResumeOnShutdown(MigratableError),
|
||||||
|
|
||||||
|
/// Cannot create seccomp filter
|
||||||
|
CreateSeccompFilter(seccomp::SeccompError),
|
||||||
|
|
||||||
|
/// Cannot apply seccomp filter
|
||||||
|
ApplySeccompFilter(seccomp::Error),
|
||||||
}
|
}
|
||||||
pub type Result<T> = result::Result<T, Error>;
|
pub type Result<T> = result::Result<T, Error>;
|
||||||
|
|
||||||
@ -475,6 +484,7 @@ pub struct CpuManager {
|
|||||||
vcpu_states: Vec<VcpuState>,
|
vcpu_states: Vec<VcpuState>,
|
||||||
selected_cpu: u8,
|
selected_cpu: u8,
|
||||||
vcpus: Vec<Arc<Mutex<Vcpu>>>,
|
vcpus: Vec<Arc<Mutex<Vcpu>>>,
|
||||||
|
seccomp_action: SeccompAction,
|
||||||
}
|
}
|
||||||
|
|
||||||
const CPU_ENABLE_FLAG: usize = 0;
|
const CPU_ENABLE_FLAG: usize = 0;
|
||||||
@ -601,6 +611,7 @@ impl CpuManager {
|
|||||||
vm: Arc<dyn hypervisor::Vm>,
|
vm: Arc<dyn hypervisor::Vm>,
|
||||||
reset_evt: EventFd,
|
reset_evt: EventFd,
|
||||||
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
hypervisor: Arc<dyn hypervisor::Hypervisor>,
|
||||||
|
seccomp_action: SeccompAction,
|
||||||
) -> Result<Arc<Mutex<CpuManager>>> {
|
) -> Result<Arc<Mutex<CpuManager>>> {
|
||||||
let guest_memory = memory_manager.lock().unwrap().guest_memory();
|
let guest_memory = memory_manager.lock().unwrap().guest_memory();
|
||||||
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
|
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
|
||||||
@ -633,6 +644,7 @@ impl CpuManager {
|
|||||||
reset_evt,
|
reset_evt,
|
||||||
selected_cpu: 0,
|
selected_cpu: 0,
|
||||||
vcpus: Vec::with_capacity(usize::from(config.max_vcpus)),
|
vcpus: Vec::with_capacity(usize::from(config.max_vcpus)),
|
||||||
|
seccomp_action,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
@ -812,10 +824,22 @@ impl CpuManager {
|
|||||||
|
|
||||||
info!("Starting vCPU: cpu_id = {}", cpu_id);
|
info!("Starting vCPU: cpu_id = {}", cpu_id);
|
||||||
|
|
||||||
|
// Retrieve seccomp filter for vcpu thread
|
||||||
|
let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu)
|
||||||
|
.map_err(Error::CreateSeccompFilter)?;
|
||||||
|
|
||||||
let handle = Some(
|
let handle = Some(
|
||||||
thread::Builder::new()
|
thread::Builder::new()
|
||||||
.name(format!("vcpu{}", cpu_id))
|
.name(format!("vcpu{}", cpu_id))
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
|
// Apply seccomp filter for vcpu thread.
|
||||||
|
if let Err(e) =
|
||||||
|
SeccompFilter::apply(vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter)
|
||||||
|
{
|
||||||
|
error!("Error applying seccomp filter: {:?}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {}
|
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {}
|
||||||
// This uses an async signal safe handler to kill the vcpu handles.
|
// This uses an async signal safe handler to kill the vcpu handles.
|
||||||
register_signal_handler(SIGRTMIN(), handle_signal)
|
register_signal_handler(SIGRTMIN(), handle_signal)
|
||||||
|
@ -12,8 +12,9 @@ use seccomp::{
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
pub enum Thread {
|
pub enum Thread {
|
||||||
Vmm,
|
|
||||||
Api,
|
Api,
|
||||||
|
Vcpu,
|
||||||
|
Vmm,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`.
|
/// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`.
|
||||||
@ -79,33 +80,33 @@ const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71;
|
|||||||
const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72;
|
const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72;
|
||||||
const VFIO_DEVICE_IOEVENTFD: u64 = 0x3b74;
|
const VFIO_DEVICE_IOEVENTFD: u64 = 0x3b74;
|
||||||
|
|
||||||
fn create_vmm_ioctl_seccomp_rule_common() -> Result<Vec<SeccompRule>, Error> {
|
// See include/uapi/linux/kvm.h in the kernel code.
|
||||||
// See include/uapi/linux/kvm.h in the kernel code.
|
const KVM_GET_API_VERSION: u64 = 0xae00;
|
||||||
const KVM_GET_API_VERSION: u64 = 0xae00;
|
const KVM_CREATE_VM: u64 = 0xae01;
|
||||||
const KVM_CREATE_VM: u64 = 0xae01;
|
const KVM_CHECK_EXTENSION: u64 = 0xae03;
|
||||||
const KVM_CHECK_EXTENSION: u64 = 0xae03;
|
const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04;
|
||||||
const KVM_GET_VCPU_MMAP_SIZE: u64 = 0xae04;
|
const KVM_CREATE_VCPU: u64 = 0xae41;
|
||||||
const KVM_CREATE_VCPU: u64 = 0xae41;
|
const KVM_CREATE_IRQCHIP: u64 = 0xae60;
|
||||||
const KVM_CREATE_IRQCHIP: u64 = 0xae60;
|
const KVM_RUN: u64 = 0xae80;
|
||||||
const KVM_RUN: u64 = 0xae80;
|
const KVM_SET_MP_STATE: u64 = 0x4004_ae99;
|
||||||
const KVM_SET_MP_STATE: u64 = 0x4004_ae99;
|
const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a;
|
||||||
const KVM_SET_GSI_ROUTING: u64 = 0x4008_ae6a;
|
const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1;
|
||||||
const KVM_SET_DEVICE_ATTR: u64 = 0x4018_aee1;
|
const KVM_SET_ONE_REG: u64 = 0x4010_aeac;
|
||||||
const KVM_SET_ONE_REG: u64 = 0x4010_aeac;
|
const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46;
|
||||||
const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46;
|
const KVM_IRQFD: u64 = 0x4020_ae76;
|
||||||
const KVM_IRQFD: u64 = 0x4020_ae76;
|
const KVM_IOEVENTFD: u64 = 0x4040_ae79;
|
||||||
const KVM_IOEVENTFD: u64 = 0x4040_ae79;
|
const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0;
|
||||||
const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0;
|
const KVM_ENABLE_CAP: u64 = 0x4068_aea3;
|
||||||
const KVM_ENABLE_CAP: u64 = 0x4068_aea3;
|
const KVM_SET_REGS: u64 = 0x4090_ae82;
|
||||||
const KVM_SET_REGS: u64 = 0x4090_ae82;
|
const KVM_GET_MP_STATE: u64 = 0x8004_ae98;
|
||||||
const KVM_GET_MP_STATE: u64 = 0x8004_ae98;
|
const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2;
|
||||||
const KVM_GET_DEVICE_ATTR: u64 = 0x4018_aee2;
|
const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f;
|
||||||
const KVM_GET_VCPU_EVENTS: u64 = 0x8040_ae9f;
|
const KVM_GET_ONE_REG: u64 = 0x4010_aeab;
|
||||||
const KVM_GET_ONE_REG: u64 = 0x4010_aeab;
|
const KVM_GET_REGS: u64 = 0x8090_ae81;
|
||||||
const KVM_GET_REGS: u64 = 0x8090_ae81;
|
const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05;
|
||||||
const KVM_GET_SUPPORTED_CPUID: u64 = 0xc008_ae05;
|
const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0;
|
||||||
const KVM_CREATE_DEVICE: u64 = 0xc00c_aee0;
|
|
||||||
|
|
||||||
|
fn create_vmm_ioctl_seccomp_rule_common() -> Result<Vec<SeccompRule>, Error> {
|
||||||
Ok(or![
|
Ok(or![
|
||||||
and![Cond::new(1, ArgLen::DWORD, Eq, FIOCLEX)?],
|
and![Cond::new(1, ArgLen::DWORD, Eq, FIOCLEX)?],
|
||||||
and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO)?],
|
and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO)?],
|
||||||
@ -349,6 +350,84 @@ fn vmm_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
|||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_vcpu_ioctl_seccomp_rule() -> Result<Vec<SeccompRule>, Error> {
|
||||||
|
Ok(or![
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_CHECK_EXTENSION,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IOEVENTFD)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_IRQFD,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_DEVICE_ATTR,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_GSI_ROUTING,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_USER_MEMORY_REGION,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, KVM_RUN,)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_DEVICE_SET_IRQS)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_GROUP_UNSET_CONTAINER)?],
|
||||||
|
and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA)?],
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vcpu_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
||||||
|
Ok(vec![
|
||||||
|
allow_syscall(libc::SYS_accept4),
|
||||||
|
allow_syscall(libc::SYS_brk),
|
||||||
|
allow_syscall(libc::SYS_clock_nanosleep),
|
||||||
|
allow_syscall(libc::SYS_clone),
|
||||||
|
allow_syscall(libc::SYS_close),
|
||||||
|
allow_syscall(libc::SYS_dup),
|
||||||
|
allow_syscall(libc::SYS_epoll_create1),
|
||||||
|
allow_syscall(libc::SYS_epoll_ctl),
|
||||||
|
allow_syscall(libc::SYS_epoll_pwait),
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
allow_syscall(libc::SYS_epoll_wait),
|
||||||
|
allow_syscall(libc::SYS_eventfd2),
|
||||||
|
allow_syscall(libc::SYS_exit),
|
||||||
|
allow_syscall(libc::SYS_fallocate),
|
||||||
|
allow_syscall(libc::SYS_fcntl),
|
||||||
|
allow_syscall(libc::SYS_fdatasync),
|
||||||
|
allow_syscall(libc::SYS_fsync),
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
allow_syscall(libc::SYS_ftruncate),
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
// The definition of libc::SYS_ftruncate is missing on AArch64.
|
||||||
|
// Use a hard-code number instead.
|
||||||
|
allow_syscall(46),
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
allow_syscall(libc::SYS_faccessat),
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
allow_syscall(libc::SYS_newfstatat),
|
||||||
|
allow_syscall(libc::SYS_futex),
|
||||||
|
allow_syscall(libc::SYS_getpid),
|
||||||
|
allow_syscall_if(libc::SYS_ioctl, create_vcpu_ioctl_seccomp_rule()?),
|
||||||
|
allow_syscall(libc::SYS_lseek),
|
||||||
|
allow_syscall(libc::SYS_madvise),
|
||||||
|
allow_syscall(libc::SYS_mmap),
|
||||||
|
allow_syscall(libc::SYS_mprotect),
|
||||||
|
allow_syscall(libc::SYS_munmap),
|
||||||
|
allow_syscall(libc::SYS_nanosleep),
|
||||||
|
allow_syscall(libc::SYS_openat),
|
||||||
|
allow_syscall(libc::SYS_prctl),
|
||||||
|
allow_syscall(libc::SYS_pread64),
|
||||||
|
allow_syscall(libc::SYS_pwrite64),
|
||||||
|
allow_syscall(libc::SYS_read),
|
||||||
|
allow_syscall(libc::SYS_recvfrom),
|
||||||
|
allow_syscall(libc::SYS_recvmsg),
|
||||||
|
allow_syscall(libc::SYS_rt_sigaction),
|
||||||
|
allow_syscall(libc::SYS_rt_sigprocmask),
|
||||||
|
allow_syscall(libc::SYS_rt_sigreturn),
|
||||||
|
allow_syscall(libc::SYS_sched_getaffinity),
|
||||||
|
allow_syscall(libc::SYS_sendmsg),
|
||||||
|
allow_syscall(libc::SYS_set_robust_list),
|
||||||
|
allow_syscall(libc::SYS_sigaltstack),
|
||||||
|
allow_syscall(libc::SYS_socketpair),
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
allow_syscall(libc::SYS_stat),
|
||||||
|
allow_syscall(libc::SYS_statx),
|
||||||
|
allow_syscall(libc::SYS_tgkill),
|
||||||
|
allow_syscall(libc::SYS_tkill),
|
||||||
|
allow_syscall(libc::SYS_write),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
// The filter containing the white listed syscall rules required by the API to
|
// The filter containing the white listed syscall rules required by the API to
|
||||||
// function.
|
// function.
|
||||||
fn api_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
fn api_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
||||||
@ -380,8 +459,9 @@ fn api_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
|||||||
|
|
||||||
fn get_seccomp_filter_trap(thread_type: Thread) -> Result<SeccompFilter, Error> {
|
fn get_seccomp_filter_trap(thread_type: Thread) -> Result<SeccompFilter, Error> {
|
||||||
let rules = match thread_type {
|
let rules = match thread_type {
|
||||||
Thread::Vmm => vmm_thread_rules()?,
|
|
||||||
Thread::Api => api_thread_rules()?,
|
Thread::Api => api_thread_rules()?,
|
||||||
|
Thread::Vcpu => vcpu_thread_rules()?,
|
||||||
|
Thread::Vmm => vmm_thread_rules()?,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SeccompFilter::new(
|
Ok(SeccompFilter::new(
|
||||||
@ -392,8 +472,9 @@ fn get_seccomp_filter_trap(thread_type: Thread) -> Result<SeccompFilter, Error>
|
|||||||
|
|
||||||
fn get_seccomp_filter_log(thread_type: Thread) -> Result<SeccompFilter, Error> {
|
fn get_seccomp_filter_log(thread_type: Thread) -> Result<SeccompFilter, Error> {
|
||||||
let rules = match thread_type {
|
let rules = match thread_type {
|
||||||
Thread::Vmm => vmm_thread_rules()?,
|
|
||||||
Thread::Api => api_thread_rules()?,
|
Thread::Api => api_thread_rules()?,
|
||||||
|
Thread::Vcpu => vcpu_thread_rules()?,
|
||||||
|
Thread::Vmm => vmm_thread_rules()?,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SeccompFilter::new(
|
Ok(SeccompFilter::new(
|
||||||
|
@ -327,6 +327,7 @@ impl Vm {
|
|||||||
vm.clone(),
|
vm.clone(),
|
||||||
reset_evt,
|
reset_evt,
|
||||||
hypervisor,
|
hypervisor,
|
||||||
|
seccomp_action.clone(),
|
||||||
)
|
)
|
||||||
.map_err(Error::CpuManager)?;
|
.map_err(Error::CpuManager)?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user