build: Remove vhost_user_fs

This has been superseded by virtiofsd-rs.

Fixes: #2013

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
Rob Bradford 2020-11-27 15:15:33 +00:00 committed by Samuel Ortiz
parent c4ebe83576
commit ead8453120
15 changed files with 0 additions and 8346 deletions

177
Cargo.lock generated
View File

@ -374,102 +374,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "futures"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b3b0c040a1fe6529d30b3c5944b280c7f0dcb2930d2c3062bca967b602583d0"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b7109687aa4e177ef6fe84553af6280ef2778bdb7783ba44c9dc3399110fe64"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "847ce131b72ffb13b6109a221da9ad97a64cbe48feb1028356b836b47b8f1748"
[[package]]
name = "futures-executor"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4caa2b2b68b880003057c1dd49f1ed937e38f22fcf6c212188a121f08cf40a65"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
"num_cpus",
]
[[package]]
name = "futures-io"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611834ce18aaa1bd13c4b374f5d653e1027cf99b6b502584ff8c9a64413b30bb"
[[package]]
name = "futures-macro"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77408a692f1f97bcc61dc001d752e00643408fbc922e4d634c655df50d595556"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f878195a49cee50e006b02b93cf7e0a95a38ac7b776b4c4d9cc1207cd20fcb3d"
[[package]]
name = "futures-task"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c554eb5bf48b2426c4771ab68c6b14468b6e76cc90996f528c3338d761a4d0d"
dependencies = [
"once_cell",
]
[[package]]
name = "futures-util"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d304cff4a7b99cfb7986f7d43fbe93d175e72e704a8860787cc95e9ffd85cbd2"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project",
"pin-utils",
"proc-macro-hack",
"proc-macro-nested",
"slab",
]
[[package]]
name = "getrandom"
version = "0.1.15"
@ -734,28 +638,12 @@ dependencies = [
"vmm-sys-util",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "object"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3b63360ec3cb337817c2dbd47ab4a0f170d285d8e5a2064600f3def1402397"
[[package]]
name = "once_cell"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
[[package]]
name = "openssl-sys"
version = "0.9.58"
@ -824,32 +712,6 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "pin-project"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ccc2237c2c489783abd8c4c80e5450fc0e98644555b1364da68cc29aa151ca7"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8e8d2bf0b23038a4424865103a4df472855692821aab4e4f5c3312d461d9e5f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.19"
@ -950,18 +812,6 @@ version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro-nested"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a"
[[package]]
name = "proc-macro2"
version = "1.0.24"
@ -1240,12 +1090,6 @@ dependencies = [
"libc",
]
[[package]]
name = "slab"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
[[package]]
name = "smallvec"
version = "1.5.0"
@ -1580,27 +1424,6 @@ dependencies = [
"vmm-sys-util",
]
[[package]]
name = "vhost_user_fs"
version = "0.1.0"
dependencies = [
"bitflags 1.2.1",
"clap",
"epoll",
"futures",
"libc",
"log 0.4.11",
"seccomp",
"tempdir",
"vhost",
"vhost_user_backend",
"virtio-bindings",
"virtio-devices",
"vm-memory",
"vm-virtio",
"vmm-sys-util",
]
[[package]]
name = "vhost_user_net"
version = "0.1.0"

View File

@ -77,7 +77,6 @@ members = [
"qcow",
"vhost_user_backend",
"vhost_user_block",
"vhost_user_fs",
"vhost_user_net",
"virtio-devices",
"vmm",

View File

@ -1,23 +0,0 @@
[package]
name = "vhost_user_fs"
version = "0.1.0"
authors = ["The Cloud Hypervisor Authors"]
edition = "2018"
[dependencies]
bitflags = "1.1.0"
clap = { version = "2.33.3", features=["wrap_help"] }
epoll = ">=4.0.1"
futures = { version = "0.3.8", features = ["thread-pool"] }
libc = "0.2.80"
log = "0.4.11"
# Match the version in vmm
seccomp = { git = "https://github.com/firecracker-microvm/firecracker", tag = "v0.22.0" }
tempdir = "0.3.7"
virtio-bindings = { version = "0.1", features = ["virtio-v5_0_0"]}
virtio-devices = { path = "../virtio-devices" }
vhost_rs = { git = "https://github.com/rust-vmm/vhost", branch = "master", package = "vhost", features = ["vhost-user-slave"] }
vhost_user_backend = { path = "../vhost_user_backend"}
vm-memory = "0.4.0"
vm-virtio = { path = "../vm-virtio" }
vmm-sys-util = "0.7.0"

File diff suppressed because it is too large Load Diff

View File

@ -1,409 +0,0 @@
// Copyright 2018 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::fs::File;
use std::io::{Error, ErrorKind, Result};
use std::os::unix::io::AsRawFd;
use vm_memory::VolatileSlice;
use libc::{
c_int, c_void, off64_t, pread64, preadv64, pwrite64, pwritev64, read, readv, size_t, write,
writev,
};
/// A trait for setting the size of a file.
/// This is equivalent to File's `set_len` method, but
/// wrapped in a trait so that it can be implemented for
/// other types.
pub trait FileSetLen {
// Set the size of this file.
// This is the moral equivalent of `ftruncate()`.
fn set_len(&self, _len: u64) -> Result<()>;
}
impl FileSetLen for File {
fn set_len(&self, len: u64) -> Result<()> {
File::set_len(self, len)
}
}
/// A trait similar to `Read` and `Write`, but uses volatile memory as buffers.
pub trait FileReadWriteVolatile {
/// Read bytes from this file into the given slice, returning the number of bytes read on
/// success.
fn read_volatile(&mut self, slice: VolatileSlice) -> Result<usize>;
/// Like `read_volatile`, except it reads to a slice of buffers. Data is copied to fill each
/// buffer in order, with the final buffer written to possibly being only partially filled. This
/// method must behave as a single call to `read_volatile` with the buffers concatenated would.
/// The default implementation calls `read_volatile` with either the first nonempty buffer
/// provided, or returns `Ok(0)` if none exists.
fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
bufs.iter()
.find(|b| !b.is_empty())
.map(|&b| self.read_volatile(b))
.unwrap_or(Ok(0))
}
/// Reads bytes from this into the given slice until all bytes in the slice are written, or an
/// error is returned.
fn read_exact_volatile(&mut self, mut slice: VolatileSlice) -> Result<()> {
while !slice.is_empty() {
let bytes_read = self.read_volatile(slice)?;
if bytes_read == 0 {
return Err(Error::from(ErrorKind::UnexpectedEof));
}
// Will panic if read_volatile read more bytes than we gave it, which would be worthy of
// a panic.
slice = slice.offset(bytes_read).unwrap();
}
Ok(())
}
/// Write bytes from the slice to the given file, returning the number of bytes written on
/// success.
fn write_volatile(&mut self, slice: VolatileSlice) -> Result<usize>;
/// Like `write_volatile`, except that it writes from a slice of buffers. Data is copied from
/// each buffer in order, with the final buffer read from possibly being only partially
/// consumed. This method must behave as a call to `write_volatile` with the buffers
/// concatenated would. The default implementation calls `write_volatile` with either the first
/// nonempty buffer provided, or returns `Ok(0)` if none exists.
fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
bufs.iter()
.find(|b| !b.is_empty())
.map(|&b| self.write_volatile(b))
.unwrap_or(Ok(0))
}
/// Write bytes from the slice to the given file until all the bytes from the slice have been
/// written, or an error is returned.
fn write_all_volatile(&mut self, mut slice: VolatileSlice) -> Result<()> {
while !slice.is_empty() {
let bytes_written = self.write_volatile(slice)?;
if bytes_written == 0 {
return Err(Error::from(ErrorKind::WriteZero));
}
// Will panic if read_volatile read more bytes than we gave it, which would be worthy of
// a panic.
slice = slice.offset(bytes_written).unwrap();
}
Ok(())
}
}
impl<'a, T: FileReadWriteVolatile + ?Sized> FileReadWriteVolatile for &'a mut T {
fn read_volatile(&mut self, slice: VolatileSlice) -> Result<usize> {
(**self).read_volatile(slice)
}
fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
(**self).read_vectored_volatile(bufs)
}
fn read_exact_volatile(&mut self, slice: VolatileSlice) -> Result<()> {
(**self).read_exact_volatile(slice)
}
fn write_volatile(&mut self, slice: VolatileSlice) -> Result<usize> {
(**self).write_volatile(slice)
}
fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
(**self).write_vectored_volatile(bufs)
}
fn write_all_volatile(&mut self, slice: VolatileSlice) -> Result<()> {
(**self).write_all_volatile(slice)
}
}
/// A trait similar to the unix `ReadExt` and `WriteExt` traits, but for volatile memory.
pub trait FileReadWriteAtVolatile {
/// Reads bytes from this file at `offset` into the given slice, returning the number of bytes
/// read on success.
fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize>;
/// Like `read_at_volatile`, except it reads to a slice of buffers. Data is copied to fill each
/// buffer in order, with the final buffer written to possibly being only partially filled. This
/// method must behave as a single call to `read_at_volatile` with the buffers concatenated
/// would. The default implementation calls `read_at_volatile` with either the first nonempty
/// buffer provided, or returns `Ok(0)` if none exists.
fn read_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result<usize> {
if let Some(&slice) = bufs.first() {
self.read_at_volatile(slice, offset)
} else {
Ok(0)
}
}
/// Reads bytes from this file at `offset` into the given slice until all bytes in the slice are
/// read, or an error is returned.
fn read_exact_at_volatile(&mut self, mut slice: VolatileSlice, mut offset: u64) -> Result<()> {
while !slice.is_empty() {
match self.read_at_volatile(slice, offset) {
Ok(0) => return Err(Error::from(ErrorKind::UnexpectedEof)),
Ok(n) => {
slice = slice.offset(n).unwrap();
offset = offset.checked_add(n as u64).unwrap();
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
}
Ok(())
}
/// Writes bytes from this file at `offset` into the given slice, returning the number of bytes
/// written on success.
fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize>;
/// Like `write_at_at_volatile`, except that it writes from a slice of buffers. Data is copied
/// from each buffer in order, with the final buffer read from possibly being only partially
/// consumed. This method must behave as a call to `write_at_volatile` with the buffers
/// concatenated would. The default implementation calls `write_at_volatile` with either the
/// first nonempty buffer provided, or returns `Ok(0)` if none exists.
fn write_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result<usize> {
if let Some(&slice) = bufs.first() {
self.write_at_volatile(slice, offset)
} else {
Ok(0)
}
}
/// Writes bytes from this file at `offset` into the given slice until all bytes in the slice
/// are written, or an error is returned.
fn write_all_at_volatile(&mut self, mut slice: VolatileSlice, mut offset: u64) -> Result<()> {
while !slice.is_empty() {
match self.write_at_volatile(slice, offset) {
Ok(0) => return Err(Error::from(ErrorKind::WriteZero)),
Ok(n) => {
slice = slice.offset(n).unwrap();
offset = offset.checked_add(n as u64).unwrap();
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
}
Ok(())
}
}
impl<'a, T: FileReadWriteAtVolatile + ?Sized> FileReadWriteAtVolatile for &'a mut T {
fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize> {
(**self).read_at_volatile(slice, offset)
}
fn read_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result<usize> {
(**self).read_vectored_at_volatile(bufs, offset)
}
fn read_exact_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<()> {
(**self).read_exact_at_volatile(slice, offset)
}
fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize> {
(**self).write_at_volatile(slice, offset)
}
fn write_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result<usize> {
(**self).write_vectored_at_volatile(bufs, offset)
}
fn write_all_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<()> {
(**self).write_all_at_volatile(slice, offset)
}
}
macro_rules! volatile_impl {
($ty:ty) => {
impl FileReadWriteVolatile for $ty {
fn read_volatile(&mut self, slice: VolatileSlice) -> Result<usize> {
// Safe because only bytes inside the slice are accessed and the kernel is expected
// to handle arbitrary memory for I/O.
let ret =
unsafe { read(self.as_raw_fd(), slice.as_ptr() as *mut c_void, slice.len()) };
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
let iovecs: Vec<libc::iovec> = bufs
.iter()
.map(|s| libc::iovec {
iov_base: s.as_ptr() as *mut c_void,
iov_len: s.len() as size_t,
})
.collect();
if iovecs.is_empty() {
return Ok(0);
}
// Safe because only bytes inside the buffers are accessed and the kernel is
// expected to handle arbitrary memory for I/O.
let ret = unsafe { readv(self.as_raw_fd(), &iovecs[0], iovecs.len() as c_int) };
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn write_volatile(&mut self, slice: VolatileSlice) -> Result<usize> {
// Safe because only bytes inside the slice are accessed and the kernel is expected
// to handle arbitrary memory for I/O.
let ret = unsafe {
write(
self.as_raw_fd(),
slice.as_ptr() as *const c_void,
slice.len(),
)
};
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result<usize> {
let iovecs: Vec<libc::iovec> = bufs
.iter()
.map(|s| libc::iovec {
iov_base: s.as_ptr() as *mut c_void,
iov_len: s.len() as size_t,
})
.collect();
if iovecs.is_empty() {
return Ok(0);
}
// Safe because only bytes inside the buffers are accessed and the kernel is
// expected to handle arbitrary memory for I/O.
let ret = unsafe { writev(self.as_raw_fd(), &iovecs[0], iovecs.len() as c_int) };
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
}
impl FileReadWriteAtVolatile for $ty {
fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize> {
// Safe because only bytes inside the slice are accessed and the kernel is expected
// to handle arbitrary memory for I/O.
let ret = unsafe {
pread64(
self.as_raw_fd(),
slice.as_ptr() as *mut c_void,
slice.len(),
offset as off64_t,
)
};
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn read_vectored_at_volatile(
&mut self,
bufs: &[VolatileSlice],
offset: u64,
) -> Result<usize> {
let iovecs: Vec<libc::iovec> = bufs
.iter()
.map(|s| libc::iovec {
iov_base: s.as_ptr() as *mut c_void,
iov_len: s.len() as size_t,
})
.collect();
if iovecs.is_empty() {
return Ok(0);
}
// Safe because only bytes inside the buffers are accessed and the kernel is
// expected to handle arbitrary memory for I/O.
let ret = unsafe {
preadv64(
self.as_raw_fd(),
&iovecs[0],
iovecs.len() as c_int,
offset as off64_t,
)
};
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<usize> {
// Safe because only bytes inside the slice are accessed and the kernel is expected
// to handle arbitrary memory for I/O.
let ret = unsafe {
pwrite64(
self.as_raw_fd(),
slice.as_ptr() as *const c_void,
slice.len(),
offset as off64_t,
)
};
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
fn write_vectored_at_volatile(
&mut self,
bufs: &[VolatileSlice],
offset: u64,
) -> Result<usize> {
let iovecs: Vec<libc::iovec> = bufs
.iter()
.map(|s| libc::iovec {
iov_base: s.as_ptr() as *mut c_void,
iov_len: s.len() as size_t,
})
.collect();
if iovecs.is_empty() {
return Ok(0);
}
// Safe because only bytes inside the buffers are accessed and the kernel is
// expected to handle arbitrary memory for I/O.
let ret = unsafe {
pwritev64(
self.as_raw_fd(),
&iovecs[0],
iovecs.len() as c_int,
offset as off64_t,
)
};
if ret >= 0 {
Ok(ret as usize)
} else {
Err(Error::last_os_error())
}
}
}
};
}
volatile_impl!(File);

File diff suppressed because it is too large Load Diff

View File

@ -1,62 +0,0 @@
use crate::fuse;
use std::io;
use std::os::unix::io::RawFd;
use vhost_rs::vhost_user::message::{
VhostUserFSSlaveMsg, VhostUserFSSlaveMsgFlags, VHOST_USER_FS_SLAVE_ENTRIES,
};
use vhost_rs::vhost_user::{SlaveFsCacheReq, VhostUserMasterReqHandler};
/// Trait for virtio-fs cache requests operations. This is mainly used to hide
/// vhost-user details from virtio-fs's fuse part.
pub trait FsCacheReqHandler: Send + Sync + 'static {
/// Setup a dedicated mapping so that guest can access file data in DAX style.
fn map(
&mut self,
foffset: u64,
moffset: u64,
len: u64,
flags: u64,
fd: RawFd,
) -> io::Result<()>;
/// Remove those mappings that provide the access to file data.
fn unmap(&mut self, requests: Vec<fuse::RemovemappingOne>) -> io::Result<()>;
}
impl FsCacheReqHandler for SlaveFsCacheReq {
fn map(
&mut self,
foffset: u64,
moffset: u64,
len: u64,
flags: u64,
fd: RawFd,
) -> io::Result<()> {
let mut msg: VhostUserFSSlaveMsg = Default::default();
msg.fd_offset[0] = foffset;
msg.cache_offset[0] = moffset;
msg.len[0] = len;
msg.flags[0] = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 {
VhostUserFSSlaveMsgFlags::MAP_W | VhostUserFSSlaveMsgFlags::MAP_R
} else {
VhostUserFSSlaveMsgFlags::MAP_R
};
self.fs_slave_map(&msg, fd)?;
Ok(())
}
fn unmap(&mut self, requests: Vec<fuse::RemovemappingOne>) -> io::Result<()> {
for chunk in requests.chunks(VHOST_USER_FS_SLAVE_ENTRIES) {
let mut msg: VhostUserFSSlaveMsg = Default::default();
for (ind, req) in chunk.iter().enumerate() {
msg.len[ind] = req.len;
msg.cache_offset[ind] = req.moffset;
}
self.fs_slave_unmap(&msg)?;
}
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,60 +0,0 @@
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
#[macro_use]
extern crate log;
pub mod descriptor_utils;
pub mod file_traits;
pub mod filesystem;
pub mod fs_cache_req_handler;
pub mod fuse;
pub mod multikey;
pub mod passthrough;
pub mod sandbox;
pub mod seccomp;
pub mod server;
use std::ffi::FromBytesWithNulError;
use std::{error, fmt, io};
#[derive(Debug)]
pub enum Error {
/// Failed to decode protocol messages.
DecodeMessage(io::Error),
/// Failed to encode protocol messages.
EncodeMessage(io::Error),
/// One or more parameters are missing.
MissingParameter,
/// A C string parameter is invalid.
InvalidCString(FromBytesWithNulError),
/// The `len` field of the header is too small.
InvalidHeaderLength,
/// The `size` field of the `SetxattrIn` message does not match the length
/// of the decoded value.
InvalidXattrSize((u32, usize)),
}
impl error::Error for Error {}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
DecodeMessage(err) => write!(f, "failed to decode fuse message: {}", err),
EncodeMessage(err) => write!(f, "failed to encode fuse message: {}", err),
MissingParameter => write!(f, "one or more parameters are missing"),
InvalidHeaderLength => write!(f, "the `len` field of the header is too small"),
InvalidCString(err) => write!(f, "a c string parameter is invalid: {}", err),
InvalidXattrSize((size, len)) => write!(
f,
"The `size` field of the `SetxattrIn` message does not match the length of the\
decoded value: size = {}, value.len() = {}",
size, len
),
}
}
}
pub type Result<T> = ::std::result::Result<T, Error>;

View File

@ -1,423 +0,0 @@
// Copyright 2019 Intel Corporation. All Rights Reserved.
//
// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause)
#[macro_use(crate_version, crate_authors)]
extern crate clap;
extern crate log;
extern crate vhost_rs;
extern crate vhost_user_backend;
extern crate virtio_devices;
use clap::{App, Arg};
use futures::executor::{ThreadPool, ThreadPoolBuilder};
use libc::EFD_NONBLOCK;
use log::*;
use seccomp::SeccompAction;
use std::num::Wrapping;
use std::sync::{Arc, Mutex, RwLock};
use std::{convert, error, fmt, io, process};
use vhost_rs::vhost_user::message::*;
use vhost_rs::vhost_user::{Listener, SlaveFsCacheReq};
use vhost_user_backend::{VhostUserBackend, VhostUserDaemon, Vring};
use vhost_user_fs::descriptor_utils::Error as VufDescriptorError;
use vhost_user_fs::descriptor_utils::{Reader, Writer};
use vhost_user_fs::filesystem::FileSystem;
use vhost_user_fs::passthrough::{self, PassthroughFs};
use vhost_user_fs::sandbox::Sandbox;
use vhost_user_fs::seccomp::enable_seccomp;
use vhost_user_fs::server::Server;
use vhost_user_fs::Error as VhostUserFsError;
use virtio_bindings::bindings::virtio_net::*;
use virtio_bindings::bindings::virtio_ring::{
VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC,
};
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap};
use vm_virtio::queue::DescriptorChain;
use vmm_sys_util::eventfd::EventFd;
const QUEUE_SIZE: usize = 1024;
const NUM_QUEUES: usize = 2;
const THREAD_POOL_SIZE: usize = 64;
// The guest queued an available buffer for the high priority queue.
const HIPRIO_QUEUE_EVENT: u16 = 0;
// The guest queued an available buffer for the request queue.
const REQ_QUEUE_EVENT: u16 = 1;
// The device has been dropped.
const KILL_EVENT: u16 = 2;
type Result<T> = std::result::Result<T, Error>;
type VhostUserBackendResult<T> = std::result::Result<T, std::io::Error>;
#[derive(Debug)]
enum Error {
/// Failed to create kill eventfd.
CreateKillEventFd(io::Error),
/// Failed to create thread pool.
CreateThreadPool(io::Error),
/// Failed to handle event other than input event.
HandleEventNotEpollIn,
/// Failed to handle unknown event.
HandleEventUnknownEvent,
/// No memory configured.
NoMemoryConfigured,
/// Processing queue failed.
ProcessQueue(VhostUserFsError),
/// Creating a queue reader failed.
QueueReader(VufDescriptorError),
/// Creating a queue writer failed.
QueueWriter(VufDescriptorError),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_error: {:?}", self)
}
}
impl error::Error for Error {}
impl convert::From<Error> for io::Error {
fn from(e: Error) -> Self {
io::Error::new(io::ErrorKind::Other, e)
}
}
struct VhostUserFsThread<F: FileSystem + Send + Sync + 'static> {
mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
kill_evt: EventFd,
server: Arc<Server<F>>,
// handle request from slave to master
vu_req: Option<SlaveFsCacheReq>,
event_idx: bool,
pool: ThreadPool,
}
impl<F: FileSystem + Send + Sync + 'static> Clone for VhostUserFsThread<F> {
fn clone(&self) -> Self {
VhostUserFsThread {
mem: self.mem.clone(),
kill_evt: self.kill_evt.try_clone().unwrap(),
server: self.server.clone(),
vu_req: self.vu_req.clone(),
event_idx: self.event_idx,
pool: self.pool.clone(),
}
}
}
impl<F: FileSystem + Send + Sync + 'static> VhostUserFsThread<F> {
fn new(fs: F, thread_pool_size: usize) -> Result<Self> {
Ok(VhostUserFsThread {
mem: None,
kill_evt: EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?,
server: Arc::new(Server::new(fs)),
vu_req: None,
event_idx: false,
pool: ThreadPoolBuilder::new()
.pool_size(thread_pool_size)
.create()
.map_err(Error::CreateThreadPool)?,
})
}
fn process_queue(&mut self, vring_lock: Arc<RwLock<Vring>>) -> Result<bool> {
let mut used_any = false;
let (atomic_mem, mem) = match &self.mem {
Some(m) => (m, m.memory()),
None => return Err(Error::NoMemoryConfigured),
};
let mut vring = vring_lock.write().unwrap();
while let Some(avail_desc) = vring.mut_queue().iter(&mem).next() {
used_any = true;
// Prepare a set of objects that can be moved to the worker thread.
let desc_head = avail_desc.get_head();
let atomic_mem = atomic_mem.clone();
let server = self.server.clone();
let mut vu_req = self.vu_req.clone();
let event_idx = self.event_idx;
let vring_lock = vring_lock.clone();
self.pool.spawn_ok(async move {
let mem = atomic_mem.memory();
let desc = DescriptorChain::new_from_head(&mem, desc_head).unwrap();
let head_index = desc.index;
let reader = Reader::new(&mem, desc.clone())
.map_err(Error::QueueReader)
.unwrap();
let writer = Writer::new(&mem, desc.clone())
.map_err(Error::QueueWriter)
.unwrap();
server
.handle_message(reader, writer, vu_req.as_mut())
.map_err(Error::ProcessQueue)
.unwrap();
let mut vring = vring_lock.write().unwrap();
if event_idx {
let queue = vring.mut_queue();
if let Some(used_idx) = queue.add_used(&mem, head_index, 0) {
if queue.needs_notification(&mem, Wrapping(used_idx)) {
vring.signal_used_queue().unwrap();
}
}
} else {
vring.mut_queue().add_used(&mem, head_index, 0);
vring.signal_used_queue().unwrap();
}
});
}
Ok(used_any)
}
}
struct VhostUserFsBackend<F: FileSystem + Send + Sync + 'static> {
thread: Mutex<VhostUserFsThread<F>>,
}
impl<F: FileSystem + Send + Sync + 'static> VhostUserFsBackend<F> {
fn new(fs: F, thread_pool_size: usize) -> Result<Self> {
let thread = Mutex::new(VhostUserFsThread::new(fs, thread_pool_size)?);
Ok(VhostUserFsBackend { thread })
}
}
impl<F: FileSystem + Send + Sync + 'static> VhostUserBackend for VhostUserFsBackend<F> {
fn num_queues(&self) -> usize {
NUM_QUEUES
}
fn max_queue_size(&self) -> usize {
QUEUE_SIZE
}
fn features(&self) -> u64 {
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ
}
fn set_event_idx(&mut self, enabled: bool) {
self.thread.lock().unwrap().event_idx = enabled;
}
fn update_memory(&mut self, mem: GuestMemoryMmap) -> VhostUserBackendResult<()> {
self.thread.lock().unwrap().mem = Some(GuestMemoryAtomic::new(mem));
Ok(())
}
fn handle_event(
&self,
device_event: u16,
evset: epoll::Events,
vrings: &[Arc<RwLock<Vring>>],
_thread_id: usize,
) -> VhostUserBackendResult<bool> {
if evset != epoll::Events::EPOLLIN {
return Err(Error::HandleEventNotEpollIn.into());
}
let mut thread = self.thread.lock().unwrap();
let mem = match &thread.mem {
Some(m) => m.memory(),
None => return Err(Error::NoMemoryConfigured.into()),
};
let vring_lock = match device_event {
HIPRIO_QUEUE_EVENT => {
debug!("HIPRIO_QUEUE_EVENT");
vrings[0].clone()
}
REQ_QUEUE_EVENT => {
debug!("QUEUE_EVENT");
vrings[1].clone()
}
_ => return Err(Error::HandleEventUnknownEvent.into()),
};
if thread.event_idx {
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_queue() until it stops finding new
// requests on the queue.
loop {
{
let mut vring = vring_lock.write().unwrap();
vring.mut_queue().update_avail_event(&mem);
}
if !thread.process_queue(vring_lock.clone())? {
break;
}
}
} else {
// Without EVENT_IDX, a single call is enough.
thread.process_queue(vring_lock)?;
}
Ok(false)
}
fn exit_event(&self, _thread_index: usize) -> Option<(EventFd, Option<u16>)> {
Some((
self.thread.lock().unwrap().kill_evt.try_clone().unwrap(),
Some(KILL_EVENT),
))
}
fn set_slave_req_fd(&mut self, vu_req: SlaveFsCacheReq) {
self.thread.lock().unwrap().vu_req = Some(vu_req);
}
}
fn main() {
let cmd_arguments = App::new("vhost-user-fs backend")
.version(crate_version!())
.author(crate_authors!())
.about("Launch a vhost-user-fs backend.")
.arg(
Arg::with_name("shared-dir")
.long("shared-dir")
.help("Shared directory path")
.takes_value(true)
.min_values(1),
)
.arg(
Arg::with_name("sock")
.long("sock")
.help("vhost-user socket path (deprecated)")
.takes_value(true)
.min_values(1),
)
.arg(
Arg::with_name("socket")
.long("socket")
.help("vhost-user socket path")
.takes_value(true)
.min_values(1),
)
.arg(
Arg::with_name("thread-pool-size")
.long("thread-pool-size")
.help("thread pool size (default 64)")
.takes_value(true)
.min_values(1),
)
.arg(
Arg::with_name("disable-xattr")
.long("disable-xattr")
.help("Disable support for extended attributes"),
)
.arg(
Arg::with_name("disable-sandbox")
.long("disable-sandbox")
.help("Don't set up a sandbox for the daemon"),
)
.arg(
Arg::with_name("seccomp")
.long("seccomp")
.help("Disable/debug seccomp security")
.possible_values(&["kill", "log", "trap", "none"])
.default_value("kill"),
)
.get_matches();
// Retrieve arguments
let shared_dir = cmd_arguments
.value_of("shared-dir")
.expect("Failed to retrieve shared directory path");
let socket = match cmd_arguments.value_of("socket") {
Some(path) => path,
None => {
println!("warning: use of deprecated parameter '--sock': Please use the '--socket' option instead.");
cmd_arguments
.value_of("sock")
.expect("Failed to retrieve vhost-user socket path")
}
};
let thread_pool_size: usize = match cmd_arguments.value_of("thread-pool-size") {
Some(size) => size.parse().expect("Invalid argument for thread-pool-size"),
None => THREAD_POOL_SIZE,
};
let xattr: bool = !cmd_arguments.is_present("disable-xattr");
let create_sandbox: bool = !cmd_arguments.is_present("disable-sandbox");
let seccomp_mode: SeccompAction = match cmd_arguments.value_of("seccomp").unwrap() {
"none" => SeccompAction::Allow, // i.e. no seccomp
"kill" => SeccompAction::Kill,
"log" => SeccompAction::Log,
"trap" => SeccompAction::Trap,
_ => unreachable!(), // We told Arg possible_values
};
let listener = Listener::new(socket, true).unwrap();
let fs_cfg = if create_sandbox {
let mut sandbox = Sandbox::new(shared_dir.to_string());
match sandbox.enter().unwrap() {
Some(child_pid) => {
unsafe { libc::waitpid(child_pid, std::ptr::null_mut(), 0) };
return;
}
None => passthrough::Config {
root_dir: "/".to_string(),
xattr,
proc_sfd_rawfd: sandbox.get_proc_self_fd(),
..Default::default()
},
}
} else {
passthrough::Config {
root_dir: shared_dir.to_string(),
xattr,
..Default::default()
}
};
// Must happen before we start the thread pool
if seccomp_mode != SeccompAction::Allow {
enable_seccomp(seccomp_mode).unwrap();
};
let fs = PassthroughFs::new(fs_cfg).unwrap();
let fs_backend = Arc::new(RwLock::new(
VhostUserFsBackend::new(fs, thread_pool_size).unwrap(),
));
let mut daemon =
VhostUserDaemon::new(String::from("vhost-user-fs-backend"), fs_backend.clone()).unwrap();
if let Err(e) = daemon.start(listener) {
error!("Failed to start daemon: {:?}", e);
process::exit(1);
}
if let Err(e) = daemon.wait() {
error!("Waiting for daemon failed: {:?}", e);
}
let kill_evt = fs_backend
.read()
.unwrap()
.thread
.lock()
.unwrap()
.kill_evt
.try_clone()
.unwrap();
if let Err(e) = kill_evt.write(1) {
error!("Error shutting down worker thread: {:?}", e)
}
}

View File

@ -1,274 +0,0 @@
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::borrow::Borrow;
use std::collections::BTreeMap;
/// A BTreeMap that supports 2 types of keys per value. All the usual restrictions and warnings for
/// `std::collections::BTreeMap` also apply to this struct. Additionally, there is a 1:1
/// relationship between the 2 key types. In other words, for each `K1` in the map, there is exactly
/// one `K2` in the map and vice versa.
#[derive(Default)]
pub struct MultikeyBTreeMap<K1, K2, V>
where
K1: Ord,
K2: Ord,
{
// We need to keep a copy of the second key in the main map so that we can remove entries using
// just the main key. Otherwise we would require the caller to provide both keys when calling
// `remove`.
main: BTreeMap<K1, (K2, V)>,
alt: BTreeMap<K2, K1>,
}
impl<K1, K2, V> MultikeyBTreeMap<K1, K2, V>
where
K1: Clone + Ord,
K2: Clone + Ord,
{
/// Create a new empty MultikeyBTreeMap.
pub fn new() -> Self {
MultikeyBTreeMap {
main: BTreeMap::default(),
alt: BTreeMap::default(),
}
}
/// Returns a reference to the value corresponding to the key.
///
/// The key may be any borrowed form of `K1``, but the ordering on the borrowed form must match
/// the ordering on `K1`.
pub fn get<Q>(&self, key: &Q) -> Option<&V>
where
K1: Borrow<Q>,
Q: Ord + ?Sized,
{
self.main.get(key).map(|(_, v)| v)
}
/// Returns a reference to the value corresponding to the alternate key.
///
/// The key may be any borrowed form of the `K2``, but the ordering on the borrowed form must
/// match the ordering on `K2`.
///
/// Note that this method performs 2 lookups: one to get the main key and another to get the
/// value associated with that key. For best performance callers should prefer the `get` method
/// over this method whenever possible as `get` only needs to perform one lookup.
pub fn get_alt<Q2>(&self, key: &Q2) -> Option<&V>
where
K2: Borrow<Q2>,
Q2: Ord + ?Sized,
{
if let Some(k) = self.alt.get(key) {
self.get(k)
} else {
None
}
}
/// Inserts a new entry into the map with the given keys and value.
///
/// Returns `None` if the map did not have an entry with `k1` or `k2` present. If exactly one
/// key was present, then the value associated with that key is updated, the other key is
/// removed, and the old value is returned. If **both** keys were present then the value
/// associated with the main key is updated, the value associated with the alternate key is
/// removed, and the old value associated with the main key is returned.
pub fn insert(&mut self, k1: K1, k2: K2, v: V) -> Option<V> {
let oldval = if let Some(oldkey) = self.alt.insert(k2.clone(), k1.clone()) {
self.main.remove(&oldkey)
} else {
None
};
self.main
.insert(k1, (k2.clone(), v))
.or(oldval)
.map(|(oldk2, v)| {
if oldk2 != k2 {
self.alt.remove(&oldk2);
}
v
})
}
/// Remove a key from the map, returning the value associated with that key if it was previously
/// in the map.
///
/// The key may be any borrowed form of `K1``, but the ordering on the borrowed form must match
/// the ordering on `K1`.
pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
where
K1: Borrow<Q>,
Q: Ord + ?Sized,
{
self.main.remove(key).map(|(k2, v)| {
self.alt.remove(&k2);
v
})
}
/// Clears the map, removing all values.
pub fn clear(&mut self) {
self.alt.clear();
self.main.clear()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn get() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
assert_eq!(*m.get(&k1).expect("failed to look up main key"), val);
assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val);
}
#[test]
fn update_main_key() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
let new_k1 = 0x3add_f8f8_c7c5_df5e;
let val2 = 0x7389_f8a7;
assert_eq!(
m.insert(new_k1, k2, val2)
.expect("failed to update main key"),
val
);
assert!(m.get(&k1).is_none());
assert_eq!(*m.get(&new_k1).expect("failed to look up main key"), val2);
assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val2);
}
#[test]
fn update_alt_key() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
let new_k2 = 0x6825_a60b_61ac_b333;
let val2 = 0xbb14_8f2c;
assert_eq!(
m.insert(k1, new_k2, val2)
.expect("failed to update alt key"),
val
);
assert!(m.get_alt(&k2).is_none());
assert_eq!(*m.get(&k1).expect("failed to look up main key"), val2);
assert_eq!(
*m.get_alt(&new_k2).expect("failed to look up alt key"),
val2
);
}
#[test]
fn update_value() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
let val2 = 0xe42d_79ba;
assert_eq!(
m.insert(k1, k2, val2).expect("failed to update alt key"),
val
);
assert_eq!(*m.get(&k1).expect("failed to look up main key"), val2);
assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val2);
}
#[test]
fn update_both_keys_main() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
let new_k1 = 0xc980_587a_24b3_ae30;
let new_k2 = 0x2773_c5ee_8239_45a2;
let val2 = 0x31f4_33f9;
assert!(m.insert(new_k1, new_k2, val2).is_none());
let val3 = 0x8da1_9cf7;
assert_eq!(
m.insert(k1, new_k2, val3)
.expect("failed to update main key"),
val
);
// Both new_k1 and k2 should now be gone from the map.
assert!(m.get(&new_k1).is_none());
assert!(m.get_alt(&k2).is_none());
assert_eq!(*m.get(&k1).expect("failed to look up main key"), val3);
assert_eq!(
*m.get_alt(&new_k2).expect("failed to look up alt key"),
val3
);
}
#[test]
fn update_both_keys_alt() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
let new_k1 = 0xc980_587a_24b3_ae30;
let new_k2 = 0x2773_c5ee_8239_45a2;
let val2 = 0x31f4_33f9;
assert!(m.insert(new_k1, new_k2, val2).is_none());
let val3 = 0x8da1_9cf7;
assert_eq!(
m.insert(new_k1, k2, val3)
.expect("failed to update main key"),
val2
);
// Both k1 and new_k2 should now be gone from the map.
assert!(m.get(&k1).is_none());
assert!(m.get_alt(&new_k2).is_none());
assert_eq!(*m.get(&new_k1).expect("failed to look up main key"), val3);
assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val3);
}
#[test]
fn remove() {
let mut m = MultikeyBTreeMap::<u64, i64, u32>::new();
let k1 = 0xc6c8_f5e0_b13e_ed40;
let k2 = 0x1a04_ce4b_8329_14fe;
let val = 0xf4e3_c360;
assert!(m.insert(k1, k2, val).is_none());
assert_eq!(m.remove(&k1).expect("failed to remove entry"), val);
assert!(m.get(&k1).is_none());
assert!(m.get_alt(&k2).is_none());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,319 +0,0 @@
// Copyright 2020 Red Hat, Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::ffi::CString;
use std::os::unix::io::RawFd;
use std::{fmt, fs, io};
use tempdir::TempDir;
#[derive(Debug)]
pub enum Error {
/// Failed to bind mount `/proc/self/fd` into a temporary directory.
BindMountProcSelfFd(io::Error),
/// Failed to bind mount shared directory.
BindMountSharedDir(io::Error),
/// Failed to change to the old root directory.
ChdirOldRoot(io::Error),
/// Failed to change to the new root directory.
ChdirNewRoot(io::Error),
/// Failed to clean the properties of the mount point.
CleanMount(io::Error),
/// Failed to create a temporary directory.
CreateTempDir(io::Error),
/// Call to libc::fork returned an error.
Fork(io::Error),
/// Error bind-mounting a directory.
MountBind(io::Error),
/// Failed to mount old root.
MountOldRoot(io::Error),
/// Error mounting proc.
MountProc(io::Error),
/// Failed to mount new root.
MountNewRoot(io::Error),
/// Error mounting target directory.
MountTarget(io::Error),
/// Failed to open new root.
OpenNewRoot(io::Error),
/// Failed to open old root.
OpenOldRoot(io::Error),
/// Failed to open `/proc/self/fd`.
OpenProcSelfFd(io::Error),
/// Error switching root directory.
PivotRoot(io::Error),
/// Failed to remove temporary directory.
RmdirTempDir(io::Error),
/// Failed to lazily unmount old root.
UmountOldRoot(io::Error),
/// Failed to lazily unmount temporary directory.
UmountTempDir(io::Error),
/// Call to libc::unshare returned an error.
Unshare(io::Error),
/// Failed to read from procfs.
ReadProc(io::Error),
/// Failed to parse `/proc/sys/fs/nr_open`.
InvalidNrOpen(std::num::ParseIntError),
/// Failed to set rlimit.
SetRlimit(io::Error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_sandbox_error: {:?}", self)
}
}
/// A helper for creating a sandbox for isolating the service.
pub struct Sandbox {
/// The directory that is going to be shared with the VM. The sandbox will be constructed on top
/// of this directory.
shared_dir: String,
/// A file descriptor for `/proc/self/fd` obtained from the sandboxed context.
proc_self_fd: Option<RawFd>,
}
impl Sandbox {
pub fn new(shared_dir: String) -> Self {
Sandbox {
shared_dir,
proc_self_fd: None,
}
}
// Make `self.shared_dir` our root directory, and get an isolated file descriptor for
// `/proc/self/fd`.
//
// This is based on virtiofsd's setup_namespaces() and setup_mounts(), and it's very similar to
// the strategy used in containers. Consists on a careful sequence of mounts and bind-mounts to
// ensure it's not possible to escape the sandbox through `self.shared_dir` nor the file
// descriptor obtained for `/proc/self/fd`.
//
// It's ugly, but it's the only way until Linux implements a proper containerization API.
fn setup_mounts(&mut self) -> Result<(), Error> {
// Ensure our mount changes don't affect the parent mount namespace.
let c_root_dir = CString::new("/").unwrap();
let ret = unsafe {
libc::mount(
std::ptr::null(),
c_root_dir.as_ptr(),
std::ptr::null(),
libc::MS_SLAVE | libc::MS_REC,
std::ptr::null(),
)
};
if ret != 0 {
return Err(Error::CleanMount(std::io::Error::last_os_error()));
}
// Mount `/proc` in this context.
let c_proc_dir = CString::new("/proc").unwrap();
let c_proc_fs = CString::new("proc").unwrap();
let ret = unsafe {
libc::mount(
c_proc_fs.as_ptr(),
c_proc_dir.as_ptr(),
c_proc_fs.as_ptr(),
libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID | libc::MS_RELATIME,
std::ptr::null(),
)
};
if ret != 0 {
return Err(Error::MountProc(std::io::Error::last_os_error()));
}
// Bind-mount `/proc/self/fd` info a temporary directory, preventing access to ancestor
// directories.
let c_proc_self_fd = CString::new("/proc/self/fd").unwrap();
let tmp_dir = TempDir::new("vhostuserfs-")
.map_err(|_| Error::CreateTempDir(std::io::Error::last_os_error()))?;
let c_tmp_dir = CString::new(tmp_dir.into_path().to_str().unwrap()).unwrap();
let ret = unsafe {
libc::mount(
c_proc_self_fd.as_ptr(),
c_tmp_dir.as_ptr(),
std::ptr::null(),
libc::MS_BIND,
std::ptr::null(),
)
};
if ret < 0 {
return Err(Error::BindMountProcSelfFd(std::io::Error::last_os_error()));
}
// Obtain a file descriptor for `/proc/self/fd` through the bind-mounted temporary directory.
let proc_self_fd = unsafe { libc::open(c_tmp_dir.as_ptr(), libc::O_PATH) };
if proc_self_fd < 0 {
return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error()));
}
self.proc_self_fd = Some(proc_self_fd);
// Now that we have a file descriptor for `/proc/self/fd`, we no longer need the bind-mount.
// Unmount it and remove the temporary directory.
let ret = unsafe { libc::umount2(c_tmp_dir.as_ptr(), libc::MNT_DETACH) };
if ret < 0 {
return Err(Error::UmountTempDir(std::io::Error::last_os_error()));
}
let ret = unsafe { libc::rmdir(c_tmp_dir.as_ptr()) };
if ret < 0 {
return Err(Error::RmdirTempDir(std::io::Error::last_os_error()));
}
// Bind-mount `self.shared_dir` on itself so we can use as new root on `pivot_root` syscall.
let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap();
let ret = unsafe {
libc::mount(
c_shared_dir.as_ptr(),
c_shared_dir.as_ptr(),
std::ptr::null(),
libc::MS_BIND | libc::MS_REC,
std::ptr::null(),
)
};
if ret < 0 {
return Err(Error::BindMountSharedDir(std::io::Error::last_os_error()));
}
// Get a file descriptor to our old root so we can reference it after switching root.
let oldroot_fd = unsafe {
libc::open(
c_root_dir.as_ptr(),
libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
)
};
if oldroot_fd < 0 {
return Err(Error::OpenOldRoot(std::io::Error::last_os_error()));
}
// Get a file descriptor to the new root so we can reference it after switching root.
let newroot_fd = unsafe {
libc::open(
c_shared_dir.as_ptr(),
libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
)
};
if newroot_fd < 0 {
return Err(Error::OpenNewRoot(std::io::Error::last_os_error()));
}
// Change to new root directory to prepare for `pivot_root` syscall.
let ret = unsafe { libc::fchdir(newroot_fd) };
if ret < 0 {
return Err(Error::ChdirNewRoot(std::io::Error::last_os_error()));
}
// Call to `pivot_root` using `.` as both new and old root.
let c_current_dir = CString::new(".").unwrap();
let ret = unsafe {
libc::syscall(
libc::SYS_pivot_root,
c_current_dir.as_ptr(),
c_current_dir.as_ptr(),
)
};
if ret < 0 {
return Err(Error::PivotRoot(std::io::Error::last_os_error()));
}
// Change to old root directory to prepare for cleaning up and unmounting it.
let ret = unsafe { libc::fchdir(oldroot_fd) };
if ret < 0 {
return Err(Error::ChdirOldRoot(std::io::Error::last_os_error()));
}
// Clean up old root to avoid mount namespace propagation.
let c_empty = CString::new("").unwrap();
let ret = unsafe {
libc::mount(
c_empty.as_ptr(),
c_current_dir.as_ptr(),
c_empty.as_ptr(),
libc::MS_SLAVE | libc::MS_REC,
std::ptr::null(),
)
};
if ret != 0 {
return Err(Error::CleanMount(std::io::Error::last_os_error()));
}
// Lazily unmount old root.
let ret = unsafe { libc::umount2(c_current_dir.as_ptr(), libc::MNT_DETACH) };
if ret < 0 {
return Err(Error::UmountOldRoot(std::io::Error::last_os_error()));
}
// Change to new root.
let ret = unsafe { libc::fchdir(newroot_fd) };
if ret < 0 {
return Err(Error::ChdirNewRoot(std::io::Error::last_os_error()));
}
// We no longer need these file descriptors, so close them.
unsafe { libc::close(newroot_fd) };
unsafe { libc::close(oldroot_fd) };
Ok(())
}
/// Sets the limit of open files to the max possible.
fn setup_nofile_rlimit(&self) -> Result<(), Error> {
// /proc/sys/fs/nr_open is a sysctl file that shows the maximum number
// of file-handles a process can allocate.
let path = "/proc/sys/fs/nr_open";
let max_str = fs::read_to_string(path).map_err(Error::ReadProc)?;
let max = max_str.trim().parse().map_err(Error::InvalidNrOpen)?;
let limit = libc::rlimit {
rlim_cur: max,
rlim_max: max,
};
let ret = unsafe { libc::setrlimit(libc::RLIMIT_NOFILE, &limit) };
if ret < 0 {
Err(Error::SetRlimit(std::io::Error::last_os_error()))
} else {
Ok(())
}
}
/// Set up sandbox, fork and jump into it.
///
/// On success, the returned value will be the PID of the child for the parent and `None` for
/// the child itself, with the latter running isolated in `self.shared_dir`.
pub fn enter(&mut self) -> Result<Option<i32>, Error> {
let uid = unsafe { libc::geteuid() };
let flags = if uid == 0 {
libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET
} else {
// If running as an unprivileged user, rely on user_namespaces(7) for isolation. The
// main limitation of this strategy is that only the current uid/gid are mapped into
// the new namespace, so most operations on permissions will fail.
libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET | libc::CLONE_NEWUSER
};
let ret = unsafe { libc::unshare(flags) };
if ret != 0 {
return Err(Error::Unshare(std::io::Error::last_os_error()));
}
let child = unsafe { libc::fork() };
match child {
0 => {
// This is the child. Request to receive SIGTERM on parent's death.
unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) };
self.setup_nofile_rlimit()?;
self.setup_mounts()?;
Ok(None)
}
x if x > 0 => {
// This is the parent.
Ok(Some(child))
}
_ => Err(Error::Fork(std::io::Error::last_os_error())),
}
}
pub fn get_proc_self_fd(&self) -> Option<RawFd> {
self.proc_self_fd
}
}

View File

@ -1,141 +0,0 @@
// Copyright 2020 Red Hat, Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use seccomp::{allow_syscall, BpfProgram, SeccompAction, SeccompFilter};
use std::convert::TryInto;
use std::{convert, fmt};
#[derive(Debug)]
pub enum Error {
/// Cannot create seccomp filter
CreateSeccompFilter(seccomp::SeccompError),
/// Cannot apply seccomp filter
ApplySeccompFilter(seccomp::Error),
}
impl convert::From<seccomp::Error> for Error {
fn from(e: seccomp::Error) -> Self {
Error::ApplySeccompFilter(e)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_seccomp_error: {:?}", self)
}
}
fn vuf_filter(action: SeccompAction) -> Result<SeccompFilter, Error> {
Ok(SeccompFilter::new(
vec![
allow_syscall(libc::SYS_accept4),
allow_syscall(libc::SYS_brk),
allow_syscall(libc::SYS_capget), // For CAP_FSETID
allow_syscall(libc::SYS_capset),
allow_syscall(libc::SYS_clock_gettime),
allow_syscall(libc::SYS_clone),
allow_syscall(libc::SYS_close),
allow_syscall(libc::SYS_copy_file_range),
allow_syscall(libc::SYS_dup),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_epoll_create),
allow_syscall(libc::SYS_epoll_create1),
allow_syscall(libc::SYS_epoll_ctl),
allow_syscall(libc::SYS_epoll_pwait),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_epoll_wait),
allow_syscall(libc::SYS_eventfd2),
allow_syscall(libc::SYS_exit),
allow_syscall(libc::SYS_exit_group),
allow_syscall(libc::SYS_fallocate),
allow_syscall(libc::SYS_fchdir),
allow_syscall(libc::SYS_fchmod),
allow_syscall(libc::SYS_fchmodat),
allow_syscall(libc::SYS_fchownat),
allow_syscall(libc::SYS_fcntl),
allow_syscall(libc::SYS_fdatasync),
allow_syscall(libc::SYS_fgetxattr),
allow_syscall(libc::SYS_flistxattr),
allow_syscall(libc::SYS_flock),
allow_syscall(libc::SYS_fremovexattr),
allow_syscall(libc::SYS_fsetxattr),
allow_syscall(libc::SYS_fstat),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_fstatfs),
allow_syscall(libc::SYS_fsync),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_ftruncate),
allow_syscall(libc::SYS_futex),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_getdents),
allow_syscall(libc::SYS_getdents64),
allow_syscall(libc::SYS_getegid),
allow_syscall(libc::SYS_geteuid),
allow_syscall(libc::SYS_getpid),
allow_syscall(libc::SYS_gettid),
allow_syscall(libc::SYS_gettimeofday),
allow_syscall(libc::SYS_getxattr),
allow_syscall(libc::SYS_linkat),
allow_syscall(libc::SYS_listxattr),
allow_syscall(libc::SYS_lseek),
allow_syscall(libc::SYS_madvise),
allow_syscall(libc::SYS_mkdirat),
allow_syscall(libc::SYS_mknodat),
allow_syscall(libc::SYS_mmap),
allow_syscall(libc::SYS_mprotect),
allow_syscall(libc::SYS_mremap),
allow_syscall(libc::SYS_munmap),
allow_syscall(libc::SYS_newfstatat),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_open),
allow_syscall(libc::SYS_openat),
allow_syscall(libc::SYS_prctl), // TODO restrict to just PR_SET_NAME?
allow_syscall(libc::SYS_preadv),
allow_syscall(libc::SYS_pread64),
allow_syscall(libc::SYS_pwritev),
allow_syscall(libc::SYS_pwrite64),
allow_syscall(libc::SYS_read),
allow_syscall(libc::SYS_readlinkat),
allow_syscall(libc::SYS_recvmsg),
allow_syscall(libc::SYS_renameat),
allow_syscall(libc::SYS_renameat2),
allow_syscall(libc::SYS_removexattr),
allow_syscall(libc::SYS_rt_sigaction),
allow_syscall(libc::SYS_rt_sigprocmask),
allow_syscall(libc::SYS_rt_sigreturn),
allow_syscall(libc::SYS_sched_getaffinity), // used by thread_pool
allow_syscall(libc::SYS_sendmsg),
allow_syscall(libc::SYS_setresgid),
allow_syscall(libc::SYS_setresuid),
//allow_syscall(libc::SYS_setresgid32), Needed on some platforms,
//allow_syscall(libc::SYS_setresuid32), Needed on some platforms
allow_syscall(libc::SYS_set_robust_list),
allow_syscall(libc::SYS_setxattr),
allow_syscall(libc::SYS_sigaltstack),
allow_syscall(libc::SYS_statx),
allow_syscall(libc::SYS_symlinkat),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_time), // Rarely needed, except on static builds
allow_syscall(libc::SYS_tgkill),
allow_syscall(libc::SYS_umask),
#[cfg(target_arch = "x86_64")]
allow_syscall(libc::SYS_unlink),
allow_syscall(libc::SYS_unlinkat),
allow_syscall(libc::SYS_unshare),
allow_syscall(libc::SYS_utimensat),
allow_syscall(libc::SYS_write),
allow_syscall(libc::SYS_writev),
]
.into_iter()
.collect(),
action,
)?)
}
pub fn enable_seccomp(action: SeccompAction) -> Result<(), Error> {
let scfilter = vuf_filter(action)?;
let bpfprog: BpfProgram = scfilter.try_into()?;
SeccompFilter::apply(bpfprog).map_err(Error::ApplySeccompFilter)
}

File diff suppressed because it is too large Load Diff