diff --git a/virtio-devices/src/console.rs b/virtio-devices/src/console.rs index f50f422fe..8899c45f9 100644 --- a/virtio-devices/src/console.rs +++ b/virtio-devices/src/console.rs @@ -42,6 +42,8 @@ const INPUT_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3; const CONFIG_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4; // File written to (input ready) const FILE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5; +// Console resized +const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6; //Console size feature bit const VIRTIO_CONSOLE_F_SIZE: u64 = 0; @@ -74,11 +76,13 @@ struct ConsoleEpollHandler { mem: GuestMemoryAtomic, interrupt_cb: Arc, in_buffer: Arc>>, + resizer: Arc, endpoint: Endpoint, input_queue_evt: EventFd, output_queue_evt: EventFd, input_evt: EventFd, config_evt: EventFd, + resize_pipe: Option, kill_evt: EventFd, pause_evt: EventFd, } @@ -210,6 +214,9 @@ impl ConsoleEpollHandler { helper.add_event(self.output_queue_evt.as_raw_fd(), OUTPUT_QUEUE_EVENT)?; helper.add_event(self.input_evt.as_raw_fd(), INPUT_EVENT)?; helper.add_event(self.config_evt.as_raw_fd(), CONFIG_EVENT)?; + if let Some(resize_pipe) = self.resize_pipe.as_ref() { + helper.add_event(resize_pipe.as_raw_fd(), RESIZE_EVENT)?; + } if let Some(in_file) = self.endpoint.in_file() { helper.add_event(in_file.as_raw_fd(), FILE_EVENT)?; } @@ -265,6 +272,14 @@ impl EpollHelperHandler for ConsoleEpollHandler { return true; } } + RESIZE_EVENT => { + if let Err(e) = self.resize_pipe.as_ref().unwrap().read_exact(&mut [0]) { + error!("Failed to get resize event: {:?}", e); + return true; + } + + self.resizer.update_console_size(); + } FILE_EVENT => { let mut input = [0u8; 64]; if let Some(ref mut in_file) = self.endpoint.in_file() { @@ -328,6 +343,7 @@ pub struct Console { id: String, config: Arc>, resizer: Arc, + resize_pipe: Option, endpoint: Endpoint, seccomp_action: SeccompAction, in_buffer: Arc>>, @@ -367,6 +383,7 @@ impl Console { pub fn new( id: String, endpoint: Endpoint, + resize_pipe: Option, iommu: bool, seccomp_action: SeccompAction, exit_evt: EventFd, @@ -401,6 +418,7 @@ impl Console { id, config: console_config, resizer: resizer.clone(), + resize_pipe, endpoint, seccomp_action, in_buffer: Arc::new(Mutex::new(VecDeque::new())), @@ -488,6 +506,8 @@ impl VirtioDevice for Console { output_queue_evt: queue_evts.remove(0), input_evt, config_evt: self.resizer.config_evt.try_clone().unwrap(), + resize_pipe: self.resize_pipe.as_ref().map(|p| p.try_clone().unwrap()), + resizer: Arc::clone(&self.resizer), kill_evt, pause_evt, }; diff --git a/vmm/src/clone3.rs b/vmm/src/clone3.rs new file mode 100644 index 000000000..f08e5ad31 --- /dev/null +++ b/vmm/src/clone3.rs @@ -0,0 +1,27 @@ +// Copyright 2021 Alyssa Ross +// SPDX-License-Identifier: Apache-2.0 + +use libc::{c_long, size_t, syscall, SYS_clone3}; + +pub const CLONE_CLEAR_SIGHAND: u64 = 0x100000000; + +#[repr(C)] +#[derive(Default)] +#[allow(non_camel_case_types)] +pub struct clone_args { + pub flags: u64, + pub pidfd: u64, + pub child_tid: u64, + pub parent_tid: u64, + pub exit_signal: u64, + pub stack: u64, + pub stack_size: u64, + pub tls: u64, + pub set_tid: u64, + pub set_tid_size: u64, + pub cgroup: u64, +} + +pub unsafe fn clone3(args: &mut clone_args, size: size_t) -> c_long { + syscall(SYS_clone3, args, size) +} diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 316b662fd..8e9e9232a 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -22,7 +22,9 @@ use crate::interrupt::LegacyUserspaceInterruptManager; #[cfg(feature = "acpi")] use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE; use crate::memory_manager::{Error as MemoryManagerError, MemoryManager}; +use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::serial_buffer::SerialBuffer; +use crate::sigwinch_listener::start_sigwinch_listener; use crate::GuestRegionMmap; use crate::PciDeviceInfo; use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; @@ -791,6 +793,9 @@ pub struct DeviceManager { // serial PTY serial_pty: Option>>, + // pty foreground status, + console_resize_pipe: Option>, + // Interrupt controller #[cfg(target_arch = "x86_64")] interrupt_controller: Option>>, @@ -977,6 +982,7 @@ impl DeviceManager { acpi_address, serial_pty: None, console_pty: None, + console_resize_pipe: None, virtio_mem_devices: Vec::new(), #[cfg(target_arch = "aarch64")] gpio_device: None, @@ -1011,10 +1017,15 @@ impl DeviceManager { .map(|pty| pty.lock().unwrap().clone()) } + pub fn console_resize_pipe(&self) -> Option> { + self.console_resize_pipe.as_ref().map(Arc::clone) + } + pub fn create_devices( &mut self, serial_pty: Option, console_pty: Option, + console_resize_pipe: Option, ) -> DeviceManagerResult<()> { let mut virtio_devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new(); @@ -1069,6 +1080,7 @@ impl DeviceManager { &mut virtio_devices, serial_pty, console_pty, + console_resize_pipe, )?; // Reserve some IRQs for PCI devices in case they need to support INTx. @@ -1653,10 +1665,22 @@ impl DeviceManager { self.modify_mode(f.as_raw_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG)) } + fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> { + let seccomp_filter = + get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap(); + + let pipe = start_sigwinch_listener(seccomp_filter, pty)?; + + self.console_resize_pipe = Some(Arc::new(pipe)); + + Ok(()) + } + fn add_virtio_console_device( &mut self, virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>, console_pty: Option, + resize_pipe: Option, ) -> DeviceManagerResult>> { let console_config = self.config.lock().unwrap().console.clone(); let endpoint = match console_config.mode { @@ -1670,6 +1694,7 @@ impl DeviceManager { self.config.lock().unwrap().console.file = Some(pty.path.clone()); let file = pty.main.try_clone().unwrap(); self.console_pty = Some(Arc::new(Mutex::new(pty))); + self.console_resize_pipe = Some(Arc::new(resize_pipe.unwrap())); Endpoint::FilePair(file.try_clone().unwrap(), file) } else { let (main, mut sub, path) = @@ -1678,6 +1703,8 @@ impl DeviceManager { .map_err(DeviceManagerError::SetPtyRaw)?; self.config.lock().unwrap().console.file = Some(path.clone()); let file = main.try_clone().unwrap(); + assert!(resize_pipe.is_none()); + self.listen_for_sigwinch_on_tty(&sub).unwrap(); self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); Endpoint::FilePair(file.try_clone().unwrap(), file) } @@ -1703,6 +1730,9 @@ impl DeviceManager { let (virtio_console_device, console_resizer) = virtio_devices::Console::new( id.clone(), endpoint, + self.console_resize_pipe + .as_ref() + .map(|p| p.try_clone().unwrap()), self.force_iommu | console_config.iommu, self.seccomp_action.clone(), self.exit_evt @@ -1739,6 +1769,7 @@ impl DeviceManager { virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>, serial_pty: Option, console_pty: Option, + console_resize_pipe: Option, ) -> DeviceManagerResult> { let serial_config = self.config.lock().unwrap().serial.clone(); let serial_writer: Option> = match serial_config.mode { @@ -1774,7 +1805,8 @@ impl DeviceManager { None }; - let console_resizer = self.add_virtio_console_device(virtio_devices, console_pty)?; + let console_resizer = + self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; Ok(Arc::new(Console { serial, @@ -4243,7 +4275,7 @@ impl Snapshottable for DeviceManager { // Now that DeviceManager is updated with the right states, it's time // to create the devices based on the configuration. - self.create_devices(None, None) + self.create_devices(None, None, None) .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; Ok(()) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index c40e35771..1939dc175 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -47,6 +47,7 @@ use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transport use vmm_sys_util::eventfd::EventFd; pub mod api; +mod clone3; pub mod config; pub mod cpu; pub mod device_manager; @@ -55,6 +56,7 @@ pub mod interrupt; pub mod memory_manager; pub mod migration; pub mod seccomp_filters; +mod sigwinch_listener; pub mod vm; #[cfg(feature = "acpi")] @@ -405,6 +407,7 @@ impl Vmm { activate_evt, None, None, + None, )?; if let Some(serial_pty) = vm.serial_pty() { self.epoll @@ -532,6 +535,10 @@ impl Vmm { let config = vm.get_config(); let serial_pty = vm.serial_pty(); let console_pty = vm.console_pty(); + let console_resize_pipe = vm + .console_resize_pipe() + .as_ref() + .map(|pipe| pipe.try_clone().unwrap()); self.vm_shutdown()?; let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; @@ -556,6 +563,7 @@ impl Vmm { activate_evt, serial_pty, console_pty, + console_resize_pipe, )?); } diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 621904d6a..7119d03d7 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -15,6 +15,7 @@ pub enum Thread { SignalHandler, Vcpu, Vmm, + PtyForeground, } /// Shorthand for chaining `SeccompCondition`s with the `and` operator in a `SeccompRule`. @@ -39,6 +40,8 @@ macro_rules! or { // See include/uapi/asm-generic/ioctls.h in the kernel code. const TCGETS: u64 = 0x5401; const TCSETS: u64 = 0x5402; +const TIOCSCTTY: u64 = 0x540E; +const TIOCSPGRP: u64 = 0x5410; const TIOCGWINSZ: u64 = 0x5413; const TIOCSPTLCK: u64 = 0x4004_5431; const TIOCGTPEER: u64 = 0x5441; @@ -217,9 +220,11 @@ fn create_vmm_ioctl_seccomp_rule_common() -> Result, BackendErr and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFNETMASK)?], and![Cond::new(1, ArgLen::Dword, Eq, TCSETS)?], and![Cond::new(1, ArgLen::Dword, Eq, TCGETS)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPTLCK)?], and![Cond::new(1, ArgLen::Dword, Eq, TIOCGTPEER)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPTLCK)?], and![Cond::new(1, ArgLen::Dword, Eq, TUNGETFEATURES)?], and![Cond::new(1, ArgLen::Dword, Eq, TUNGETIFF)?], and![Cond::new(1, ArgLen::Dword, Eq, TUNSETIFF)?], @@ -367,6 +372,35 @@ fn signal_handler_thread_rules() -> Result)>, Backend ]) } +fn create_pty_foreground_ioctl_seccomp_rule() -> Result, BackendError> { + Ok(or![ + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP)?], + ]) +} + +fn pty_foreground_thread_rules() -> Result)>, BackendError> { + Ok(vec![ + (libc::SYS_close, vec![]), + (libc::SYS_exit_group, vec![]), + (libc::SYS_getpgid, vec![]), + #[cfg(target_arch = "x86_64")] + (libc::SYS_getpgrp, vec![]), + (libc::SYS_ioctl, create_pty_foreground_ioctl_seccomp_rule()?), + (libc::SYS_munmap, vec![]), + #[cfg(target_arch = "x86_64")] + (libc::SYS_poll, vec![]), + #[cfg(target_arch = "aarch64")] + (libc::SYS_ppoll, vec![]), + (libc::SYS_read, vec![]), + (libc::SYS_rt_sigaction, vec![]), + (libc::SYS_rt_sigreturn, vec![]), + (libc::SYS_setsid, vec![]), + (libc::SYS_sigaltstack, vec![]), + (libc::SYS_write, vec![]), + ]) +} + // The filter containing the white listed syscall rules required by the VMM to // function. fn vmm_thread_rules() -> Result)>, BackendError> { @@ -381,6 +415,7 @@ fn vmm_thread_rules() -> Result)>, BackendError> { (libc::SYS_clock_gettime, vec![]), (libc::SYS_clock_nanosleep, vec![]), (libc::SYS_clone, vec![]), + (libc::SYS_clone3, vec![]), (libc::SYS_close, vec![]), (libc::SYS_connect, vec![]), (libc::SYS_dup, vec![]), @@ -406,6 +441,9 @@ fn vmm_thread_rules() -> Result)>, BackendError> { #[cfg(target_arch = "aarch64")] (libc::SYS_newfstatat, vec![]), (libc::SYS_futex, vec![]), + (libc::SYS_getpgid, vec![]), + #[cfg(target_arch = "x86_64")] + (libc::SYS_getpgrp, vec![]), (libc::SYS_getpid, vec![]), (libc::SYS_getrandom, vec![]), (libc::SYS_gettid, vec![]), @@ -431,6 +469,10 @@ fn vmm_thread_rules() -> Result)>, BackendError> { (libc::SYS_open, vec![]), (libc::SYS_openat, vec![]), (libc::SYS_pipe2, vec![]), + #[cfg(target_arch = "x86_64")] + (libc::SYS_poll, vec![]), + #[cfg(target_arch = "aarch64")] + (libc::SYS_ppoll, vec![]), (libc::SYS_prctl, vec![]), (libc::SYS_pread64, vec![]), (libc::SYS_preadv, vec![]), @@ -454,6 +496,7 @@ fn vmm_thread_rules() -> Result)>, BackendError> { (libc::SYS_sendto, vec![]), (libc::SYS_set_robust_list, vec![]), (libc::SYS_set_tid_address, vec![]), + (libc::SYS_setsid, vec![]), (libc::SYS_sigaltstack, vec![]), ( libc::SYS_socket, @@ -615,6 +658,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Result)> Thread::SignalHandler => Ok(signal_handler_thread_rules()?), Thread::Vcpu => Ok(vcpu_thread_rules()?), Thread::Vmm => Ok(vmm_thread_rules()?), + Thread::PtyForeground => Ok(pty_foreground_thread_rules()?), } } diff --git a/vmm/src/sigwinch_listener.rs b/vmm/src/sigwinch_listener.rs new file mode 100644 index 000000000..13fd3464c --- /dev/null +++ b/vmm/src/sigwinch_listener.rs @@ -0,0 +1,135 @@ +// Copyright 2021 Alyssa Ross +// SPDX-License-Identifier: Apache-2.0 + +use crate::clone3::{clone3, clone_args, CLONE_CLEAR_SIGHAND}; +use libc::{ + c_int, c_void, close, getpgrp, ioctl, pipe2, poll, pollfd, setsid, sigemptyset, siginfo_t, + sigprocmask, tcsetpgrp, O_CLOEXEC, POLLERR, SIGWINCH, SIG_SETMASK, STDIN_FILENO, STDOUT_FILENO, + TIOCSCTTY, +}; +use seccompiler::{apply_filter, BpfProgram}; +use std::cell::RefCell; +use std::fs::File; +use std::io::{self, ErrorKind, Read, Write}; +use std::mem::size_of; +use std::mem::MaybeUninit; +use std::os::unix::prelude::*; +use std::process::exit; +use std::ptr::null_mut; +use vmm_sys_util::signal::register_signal_handler; + +thread_local! { + // The tty file descriptor is stored in a global variable so it + // can be accessed by a signal handler. + static TX: RefCell> = RefCell::new(None); +} + +fn with_tx R>(f: F) -> R { + TX.with(|tx| f(tx.borrow().as_ref().unwrap())) +} + +// This function has to be safe to call from a signal handler, and +// therefore must not panic. +fn notify() { + if let Err(e) = with_tx(|mut tx| tx.write_all(b"\n")) { + if e.kind() == ErrorKind::BrokenPipe { + exit(0); + } + exit(1); + } +} + +extern "C" fn sigwinch_handler(_signo: c_int, _info: *mut siginfo_t, _unused: *mut c_void) { + notify(); +} + +fn unblock_all_signals() -> io::Result<()> { + let mut set = MaybeUninit::uninit(); + if unsafe { sigemptyset(set.as_mut_ptr()) } == -1 { + return Err(io::Error::last_os_error()); + } + let set = unsafe { set.assume_init() }; + + if unsafe { sigprocmask(SIG_SETMASK, &set, null_mut()) } == -1 { + return Err(io::Error::last_os_error()); + } + + Ok(()) +} + +fn sigwinch_listener_main(seccomp_filter: BpfProgram, tx: File, tty: &File) -> ! { + TX.with(|opt| opt.replace(Some(tx))); + + unsafe { + close(STDIN_FILENO); + close(STDOUT_FILENO); + } + + unblock_all_signals().unwrap(); + + apply_filter(&seccomp_filter).unwrap(); + + register_signal_handler(SIGWINCH, sigwinch_handler).unwrap(); + + unsafe { + // Create a new session (and therefore a new process group). + assert_ne!(setsid(), -1); + + // Set the tty to be this process's controlling terminal. + assert_ne!(ioctl(tty.as_raw_fd(), TIOCSCTTY, 0), -1); + + // Become the foreground process group of the tty. + assert_ne!(tcsetpgrp(tty.as_raw_fd(), getpgrp()), -1); + } + + notify(); + + // Wait for the pipe to close, indicating the parent has exited. + with_tx(|tx| { + let mut pollfd = pollfd { + fd: tx.as_raw_fd(), + events: 0, + revents: 0, + }; + + while unsafe { poll(&mut pollfd, 1, -1) } == -1 { + let e = io::Error::last_os_error(); + if !matches!(e.kind(), ErrorKind::Interrupted | ErrorKind::WouldBlock) { + panic!("poll: {}", e); + } + } + + assert_eq!(pollfd.revents, POLLERR); + }); + + exit(0); +} + +pub fn start_sigwinch_listener(seccomp_filter: BpfProgram, pty: &File) -> io::Result { + let mut pipe = [-1; 2]; + if unsafe { pipe2(pipe.as_mut_ptr(), O_CLOEXEC) } == -1 { + return Err(io::Error::last_os_error()); + } + + let mut rx = unsafe { File::from_raw_fd(pipe[0]) }; + let tx = unsafe { File::from_raw_fd(pipe[1]) }; + + let mut args = clone_args::default(); + args.flags |= CLONE_CLEAR_SIGHAND; + + match unsafe { clone3(&mut args, size_of::()) } { + -1 => return Err(io::Error::last_os_error()), + 0 => { + drop(rx); + sigwinch_listener_main(seccomp_filter, tx, pty); + } + _ => (), + } + + drop(tx); + + // Wait for a notification indicating readiness. + rx.read_exact(&mut [0])?; + + Ok(rx) +} diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 79ead510e..d0550cd96 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -708,6 +708,7 @@ impl Vm { activate_evt: EventFd, serial_pty: Option, console_pty: Option, + console_resize_pipe: Option, ) -> Result { #[cfg(feature = "tdx")] let tdx_enabled = config.lock().unwrap().tdx.is_some(); @@ -771,7 +772,7 @@ impl Vm { .device_manager .lock() .unwrap() - .create_devices(serial_pty, console_pty) + .create_devices(serial_pty, console_pty, console_resize_pipe) .map_err(Error::DeviceManager)?; Ok(new_vm) } @@ -1140,6 +1141,10 @@ impl Vm { self.device_manager.lock().unwrap().console_pty() } + pub fn console_resize_pipe(&self) -> Option> { + self.device_manager.lock().unwrap().console_resize_pipe() + } + pub fn shutdown(&mut self) -> Result<()> { let mut state = self.state.try_write().map_err(|_| Error::PoisonedState)?; let new_state = VmState::Shutdown;