vhost_user_fs: Add seccomp

Implement seccomp; we use one filter for all threads.
The syscall list comes from the C daemon with syscalls added
as I hit them.

The default behaviour is to kill the process, this normally gets
audit logged.

--seccomp none  disables seccomp
          log   Just logs violations but doesn't stop it
          trap  causes a signal to be be sent that can be trapped.

If you suspect you're hitting a seccomp action then you can
check the audit log;  you could also switch to running with 'log'
to collect a bunch of calls to report.
To see where the syscalls are coming from use 'trap' with a debugger
or coredump to backtrace it.

This can be improved for some syscalls to restrict the parameters
to some syscalls to make them more restrictive.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Dr. David Alan Gilbert 2020-05-01 17:26:07 +01:00 committed by Sebastien Boeuf
parent 6aa29bdb24
commit 4120a7dee9
5 changed files with 157 additions and 0 deletions

1
Cargo.lock generated
View File

@ -1414,6 +1414,7 @@ dependencies = [
"bitflags 1.2.1",
"libc",
"log 0.4.8",
"seccomp",
"tempdir",
"vhost",
"vm-memory",

View File

@ -14,6 +14,7 @@ use epoll;
use futures::executor::{ThreadPool, ThreadPoolBuilder};
use libc::EFD_NONBLOCK;
use log::*;
use seccomp::SeccompAction;
use std::num::Wrapping;
use std::sync::{Arc, Mutex, RwLock};
use std::{convert, error, fmt, io, process};
@ -26,6 +27,7 @@ use vhost_user_fs::descriptor_utils::{Reader, Writer};
use vhost_user_fs::filesystem::FileSystem;
use vhost_user_fs::passthrough::{self, PassthroughFs};
use vhost_user_fs::sandbox::Sandbox;
use vhost_user_fs::seccomp::enable_seccomp;
use vhost_user_fs::server::Server;
use vhost_user_fs::Error as VhostUserFsError;
use virtio_bindings::bindings::virtio_net::*;
@ -316,6 +318,13 @@ fn main() {
.long("disable-sandbox")
.help("Don't set up a sandbox for the daemon"),
)
.arg(
Arg::with_name("seccomp")
.long("seccomp")
.help("Disable/debug seccomp security")
.possible_values(&["kill", "log", "trap", "none"])
.default_value("kill"),
)
.get_matches();
// Retrieve arguments
@ -331,6 +340,13 @@ fn main() {
};
let xattr: bool = !cmd_arguments.is_present("disable-xattr");
let create_sandbox: bool = !cmd_arguments.is_present("disable-sandbox");
let seccomp_mode: SeccompAction = match cmd_arguments.value_of("seccomp").unwrap() {
"none" => SeccompAction::Allow, // i.e. no seccomp
"kill" => SeccompAction::Kill,
"log" => SeccompAction::Log,
"trap" => SeccompAction::Trap,
_ => unreachable!(), // We told Arg possible_values
};
let listener = Listener::new(sock, true).unwrap();
@ -356,6 +372,11 @@ fn main() {
}
};
// Must happen before we start the thread pool
if seccomp_mode != SeccompAction::Allow {
enable_seccomp(seccomp_mode).unwrap();
};
let fs = PassthroughFs::new(fs_cfg).unwrap();
let fs_backend = Arc::new(RwLock::new(
VhostUserFsBackend::new(fs, thread_pool_size).unwrap(),

View File

@ -8,6 +8,8 @@ edition = "2018"
bitflags = "1.1.0"
libc = "0.2.70"
log = "0.4.8"
# Match the version in vmm
seccomp = { git = "https://github.com/firecracker-microvm/firecracker", tag = "v0.21.1" }
tempdir= "0.3.7"
vm-memory = "0.2.0"
vm-virtio = { path = "../vm-virtio" }

View File

@ -13,6 +13,7 @@ pub mod fuse;
pub mod multikey;
pub mod passthrough;
pub mod sandbox;
pub mod seccomp;
pub mod server;
use std::ffi::FromBytesWithNulError;

View File

@ -0,0 +1,132 @@
// Copyright 2020 Red Hat, Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use seccomp::{allow_syscall, BpfProgram, SeccompAction, SeccompFilter};
use std::convert::TryInto;
use std::{convert, fmt};
#[derive(Debug)]
pub enum Error {
/// Cannot create seccomp filter
CreateSeccompFilter(seccomp::SeccompError),
/// Cannot apply seccomp filter
ApplySeccompFilter(seccomp::Error),
}
impl convert::From<seccomp::Error> for Error {
fn from(e: seccomp::Error) -> Self {
Error::ApplySeccompFilter(e)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_seccomp_error: {:?}", self)
}
}
fn vuf_filter(action: SeccompAction) -> Result<SeccompFilter, Error> {
Ok(SeccompFilter::new(
vec![
allow_syscall(libc::SYS_accept4),
allow_syscall(libc::SYS_brk),
allow_syscall(libc::SYS_capget), // For CAP_FSETID
allow_syscall(libc::SYS_capset),
allow_syscall(libc::SYS_clock_gettime),
allow_syscall(libc::SYS_clone),
allow_syscall(libc::SYS_close),
allow_syscall(libc::SYS_copy_file_range),
allow_syscall(libc::SYS_dup),
allow_syscall(libc::SYS_epoll_create),
allow_syscall(libc::SYS_epoll_create1),
allow_syscall(libc::SYS_epoll_ctl),
allow_syscall(libc::SYS_epoll_wait),
allow_syscall(libc::SYS_eventfd2),
allow_syscall(libc::SYS_exit),
allow_syscall(libc::SYS_exit_group),
allow_syscall(libc::SYS_fallocate),
allow_syscall(libc::SYS_fchdir),
allow_syscall(libc::SYS_fchmodat),
allow_syscall(libc::SYS_fchownat),
allow_syscall(libc::SYS_fcntl),
allow_syscall(libc::SYS_fdatasync),
allow_syscall(libc::SYS_fgetxattr),
allow_syscall(libc::SYS_flistxattr),
allow_syscall(libc::SYS_flock),
allow_syscall(libc::SYS_fremovexattr),
allow_syscall(libc::SYS_fsetxattr),
allow_syscall(libc::SYS_fstat),
allow_syscall(libc::SYS_fstatfs),
allow_syscall(libc::SYS_fsync),
allow_syscall(libc::SYS_ftruncate),
allow_syscall(libc::SYS_futex),
allow_syscall(libc::SYS_getdents),
allow_syscall(libc::SYS_getdents64),
allow_syscall(libc::SYS_getegid),
allow_syscall(libc::SYS_geteuid),
allow_syscall(libc::SYS_getpid),
allow_syscall(libc::SYS_gettid),
allow_syscall(libc::SYS_gettimeofday),
allow_syscall(libc::SYS_getxattr),
allow_syscall(libc::SYS_linkat),
allow_syscall(libc::SYS_listxattr),
allow_syscall(libc::SYS_lseek),
allow_syscall(libc::SYS_madvise),
allow_syscall(libc::SYS_mkdirat),
allow_syscall(libc::SYS_mknodat),
allow_syscall(libc::SYS_mmap),
allow_syscall(libc::SYS_mprotect),
allow_syscall(libc::SYS_mremap),
allow_syscall(libc::SYS_munmap),
allow_syscall(libc::SYS_newfstatat),
allow_syscall(libc::SYS_open),
allow_syscall(libc::SYS_openat),
allow_syscall(libc::SYS_prctl), // TODO restrict to just PR_SET_NAME?
allow_syscall(libc::SYS_preadv),
allow_syscall(libc::SYS_pread64),
allow_syscall(libc::SYS_pwritev),
allow_syscall(libc::SYS_pwrite64),
allow_syscall(libc::SYS_read),
allow_syscall(libc::SYS_readlinkat),
allow_syscall(libc::SYS_recvmsg),
allow_syscall(libc::SYS_renameat),
allow_syscall(libc::SYS_renameat2),
allow_syscall(libc::SYS_removexattr),
allow_syscall(libc::SYS_rt_sigaction),
allow_syscall(libc::SYS_rt_sigprocmask),
allow_syscall(libc::SYS_rt_sigreturn),
allow_syscall(libc::SYS_sched_getaffinity), // used by thread_pool
allow_syscall(libc::SYS_sendmsg),
allow_syscall(libc::SYS_setresgid),
allow_syscall(libc::SYS_setresuid),
//allow_syscall(libc::SYS_setresgid32), Needed on some platforms,
//allow_syscall(libc::SYS_setresuid32), Needed on some platforms
allow_syscall(libc::SYS_set_robust_list),
allow_syscall(libc::SYS_setxattr),
allow_syscall(libc::SYS_sigaltstack),
allow_syscall(libc::SYS_statx),
allow_syscall(libc::SYS_symlinkat),
allow_syscall(libc::SYS_time), // Rarely needed, except on static builds
allow_syscall(libc::SYS_tgkill),
allow_syscall(libc::SYS_umask),
allow_syscall(libc::SYS_unlink),
allow_syscall(libc::SYS_unlinkat),
allow_syscall(libc::SYS_unshare),
allow_syscall(libc::SYS_utimensat),
allow_syscall(libc::SYS_write),
allow_syscall(libc::SYS_writev),
]
.into_iter()
.collect(),
action,
)?)
}
pub fn enable_seccomp(action: SeccompAction) -> Result<(), Error> {
let scfilter = vuf_filter(action)?;
let bpfprog: BpfProgram = scfilter.try_into()?;
SeccompFilter::apply(bpfprog.try_into().unwrap()).unwrap();
Ok(())
}