// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Copyright © 2020 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 use seccomp::{ allow_syscall, allow_syscall_if, BpfProgram, Error, SeccompAction, SeccompCmpArgLen as ArgLen, SeccompCmpOp::Eq, SeccompCondition as Cond, SeccompError, SeccompFilter, SeccompRule, SyscallRuleSet, }; use std::convert::TryInto; pub enum Thread { VirtioBalloon, VirtioBlock, VirtioConsole, VirtioIommu, VirtioMem, VirtioNet, VirtioNetCtl, VirtioPmem, VirtioRng, VirtioVhostFs, VirtioVhostNetCtl, VirtioVsock, VirtioWatchdog, } /// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`. /// The rule will take the `Allow` action if _all_ the conditions are true. /// /// [`Allow`]: enum.SeccompAction.html /// [`SeccompCondition`]: struct.SeccompCondition.html /// [`SeccompRule`]: struct.SeccompRule.html macro_rules! and { ($($x:expr,)*) => (SeccompRule::new(vec![$($x),*], SeccompAction::Allow)); ($($x:expr),*) => (SeccompRule::new(vec![$($x),*], SeccompAction::Allow)) } /// Shorthand for chaining `SeccompRule`s with the `or` operator in a `SeccompFilter`. /// /// [`SeccompFilter`]: struct.SeccompFilter.html /// [`SeccompRule`]: struct.SeccompRule.html macro_rules! or { ($($x:expr,)*) => (vec![$($x),*]); ($($x:expr),*) => (vec![$($x),*]) } // Define io_uring syscalls as they are not yet part of libc. const SYS_IO_URING_ENTER: i64 = 426; // See include/uapi/asm-generic/ioctls.h in the kernel code. const FIONBIO: u64 = 0x5421; // See include/uapi/linux/vfio.h in the kernel code. const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71; const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72; // See include/uapi/linux/if_tun.h in the kernel code. const TUNSETOFFLOAD: u64 = 0x4004_54d0; fn create_virtio_iommu_ioctl_seccomp_rule() -> Vec { or![ and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_MAP_DMA).unwrap()], and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA).unwrap()], ] } fn create_virtio_mem_ioctl_seccomp_rule() -> Vec { or![ and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_MAP_DMA).unwrap()], and![Cond::new(1, ArgLen::DWORD, Eq, VFIO_IOMMU_UNMAP_DMA).unwrap()], ] } fn virtio_balloon_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_block_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_fallocate), allow_syscall(libc::SYS_fdatasync), allow_syscall(libc::SYS_fsync), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_ftruncate), #[cfg(target_arch = "aarch64")] // The definition of libc::SYS_ftruncate is missing on AArch64. // Use a hard-code number instead. allow_syscall(46), allow_syscall(libc::SYS_futex), allow_syscall(SYS_IO_URING_ENTER), allow_syscall(libc::SYS_lseek), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_openat), allow_syscall(libc::SYS_prctl), allow_syscall(libc::SYS_pread64), allow_syscall(libc::SYS_preadv), allow_syscall(libc::SYS_pwritev), allow_syscall(libc::SYS_pwrite64), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sched_getaffinity), allow_syscall(libc::SYS_set_robust_list), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_timerfd_settime), allow_syscall(libc::SYS_write), ] } fn virtio_console_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_prctl), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sched_getaffinity), allow_syscall(libc::SYS_set_robust_list), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_iommu_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall_if(libc::SYS_ioctl, create_virtio_iommu_ioctl_seccomp_rule()), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_mem_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_fallocate), allow_syscall(libc::SYS_futex), allow_syscall_if(libc::SYS_ioctl, create_virtio_mem_ioctl_seccomp_rule()), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_net_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_openat), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_readv), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_timerfd_settime), allow_syscall(libc::SYS_write), allow_syscall(libc::SYS_writev), ] } fn create_virtio_net_ctl_ioctl_seccomp_rule() -> Result, Error> { Ok(or![and![Cond::new(1, ArgLen::DWORD, Eq, TUNSETOFFLOAD)?],]) } fn virtio_net_ctl_thread_rules() -> Result, Error> { Ok(vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall_if(libc::SYS_ioctl, create_virtio_net_ctl_ioctl_seccomp_rule()?), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ]) } fn virtio_pmem_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_fsync), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_rng_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_prctl), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sched_getaffinity), allow_syscall(libc::SYS_set_robust_list), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_vhost_fs_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_recvmsg), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sendmsg), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_vhost_net_ctl_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn create_vsock_ioctl_seccomp_rule() -> Vec { or![and![Cond::new(1, ArgLen::DWORD, Eq, FIONBIO,).unwrap()],] } fn virtio_vsock_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_accept4), allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall_if(libc::SYS_ioctl, create_vsock_ioctl_seccomp_rule()), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_recvfrom), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_write), ] } fn virtio_watchdog_thread_rules() -> Vec { vec![ allow_syscall(libc::SYS_brk), allow_syscall(libc::SYS_close), allow_syscall(libc::SYS_dup), allow_syscall(libc::SYS_epoll_create1), allow_syscall(libc::SYS_epoll_ctl), allow_syscall(libc::SYS_epoll_pwait), #[cfg(target_arch = "x86_64")] allow_syscall(libc::SYS_epoll_wait), allow_syscall(libc::SYS_exit), allow_syscall(libc::SYS_futex), allow_syscall(libc::SYS_madvise), allow_syscall(libc::SYS_mmap), allow_syscall(libc::SYS_mprotect), allow_syscall(libc::SYS_munmap), allow_syscall(libc::SYS_prctl), allow_syscall(libc::SYS_read), allow_syscall(libc::SYS_rt_sigprocmask), allow_syscall(libc::SYS_sched_getaffinity), allow_syscall(libc::SYS_set_robust_list), allow_syscall(libc::SYS_sigaltstack), allow_syscall(libc::SYS_timerfd_settime), allow_syscall(libc::SYS_write), ] } fn get_seccomp_filter_trap(thread_type: Thread) -> Result { let rules = match thread_type { Thread::VirtioBalloon => virtio_balloon_thread_rules(), Thread::VirtioBlock => virtio_block_thread_rules(), Thread::VirtioConsole => virtio_console_thread_rules(), Thread::VirtioIommu => virtio_iommu_thread_rules(), Thread::VirtioMem => virtio_mem_thread_rules(), Thread::VirtioNet => virtio_net_thread_rules(), Thread::VirtioNetCtl => virtio_net_ctl_thread_rules()?, Thread::VirtioPmem => virtio_pmem_thread_rules(), Thread::VirtioRng => virtio_rng_thread_rules(), Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(), Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(), Thread::VirtioVsock => virtio_vsock_thread_rules(), Thread::VirtioWatchdog => virtio_watchdog_thread_rules(), }; SeccompFilter::new(rules.into_iter().collect(), SeccompAction::Trap) } fn get_seccomp_filter_log(thread_type: Thread) -> Result { let rules = match thread_type { Thread::VirtioBalloon => virtio_balloon_thread_rules(), Thread::VirtioBlock => virtio_block_thread_rules(), Thread::VirtioConsole => virtio_console_thread_rules(), Thread::VirtioIommu => virtio_iommu_thread_rules(), Thread::VirtioMem => virtio_mem_thread_rules(), Thread::VirtioNet => virtio_net_thread_rules(), Thread::VirtioNetCtl => virtio_net_ctl_thread_rules()?, Thread::VirtioPmem => virtio_pmem_thread_rules(), Thread::VirtioRng => virtio_rng_thread_rules(), Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(), Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(), Thread::VirtioVsock => virtio_vsock_thread_rules(), Thread::VirtioWatchdog => virtio_watchdog_thread_rules(), }; SeccompFilter::new(rules.into_iter().collect(), SeccompAction::Log) } /// Generate a BPF program based on the seccomp_action value pub fn get_seccomp_filter( seccomp_action: &SeccompAction, thread_type: Thread, ) -> Result { match seccomp_action { SeccompAction::Allow => Ok(vec![]), SeccompAction::Log => get_seccomp_filter_log(thread_type) .and_then(|filter| filter.try_into()) .map_err(SeccompError::SeccompFilter), _ => get_seccomp_filter_trap(thread_type) .and_then(|filter| filter.try_into()) .map_err(SeccompError::SeccompFilter), } }