mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-28 07:33:09 +00:00
a636411522
Introducing a new function to factorize a small part of the initialization that is shared between a full reinitialization and a restoration. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
573 lines
20 KiB
Rust
573 lines
20 KiB
Rust
// Copyright 2019 Intel Corporation. All Rights Reserved.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
use super::super::{Descriptor, Queue};
|
|
use super::{Error, Result};
|
|
use crate::vhost_user::Inflight;
|
|
use crate::{
|
|
get_host_address_range, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VirtioInterrupt,
|
|
VirtioInterruptType,
|
|
};
|
|
use std::convert::TryInto;
|
|
use std::ffi;
|
|
use std::fs::File;
|
|
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
|
use std::os::unix::net::UnixListener;
|
|
use std::sync::Arc;
|
|
use std::thread::sleep;
|
|
use std::time::{Duration, Instant};
|
|
use std::vec::Vec;
|
|
use vhost::vhost_kern::vhost_binding::{VHOST_F_LOG_ALL, VHOST_VRING_F_LOG};
|
|
use vhost::vhost_user::message::{
|
|
VhostUserHeaderFlag, VhostUserInflight, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
|
|
};
|
|
use vhost::vhost_user::{Master, MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler};
|
|
use vhost::{VhostBackend, VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo, VringConfigData};
|
|
use vm_memory::{Address, Error as MmapError, FileOffset, GuestMemory, GuestMemoryRegion};
|
|
use vm_migration::protocol::MemoryRangeTable;
|
|
use vmm_sys_util::eventfd::EventFd;
|
|
|
|
// Size of a dirty page for vhost-user.
|
|
const VHOST_LOG_PAGE: u64 = 0x1000;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct VhostUserConfig {
|
|
pub socket: String,
|
|
pub num_queues: usize,
|
|
pub queue_size: u16,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct VringInfo {
|
|
config_data: VringConfigData,
|
|
used_guest_addr: u64,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct VhostUserHandle {
|
|
vu: Master,
|
|
ready: bool,
|
|
supports_migration: bool,
|
|
shm_log: Option<Arc<MmapRegion>>,
|
|
acked_features: u64,
|
|
vrings_info: Option<Vec<VringInfo>>,
|
|
}
|
|
|
|
impl VhostUserHandle {
|
|
pub fn update_mem_table(&mut self, mem: &GuestMemoryMmap) -> Result<()> {
|
|
let mut regions: Vec<VhostUserMemoryRegionInfo> = Vec::new();
|
|
for region in mem.iter() {
|
|
let (mmap_handle, mmap_offset) = match region.file_offset() {
|
|
Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
|
|
None => return Err(Error::VhostUserMemoryRegion(MmapError::NoMemoryRegion)),
|
|
};
|
|
|
|
let vhost_user_net_reg = VhostUserMemoryRegionInfo {
|
|
guest_phys_addr: region.start_addr().raw_value(),
|
|
memory_size: region.len() as u64,
|
|
userspace_addr: region.as_ptr() as u64,
|
|
mmap_offset,
|
|
mmap_handle,
|
|
};
|
|
|
|
regions.push(vhost_user_net_reg);
|
|
}
|
|
|
|
self.vu
|
|
.set_mem_table(regions.as_slice())
|
|
.map_err(Error::VhostUserSetMemTable)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn add_memory_region(&mut self, region: &Arc<GuestRegionMmap>) -> Result<()> {
|
|
let (mmap_handle, mmap_offset) = match region.file_offset() {
|
|
Some(file_offset) => (file_offset.file().as_raw_fd(), file_offset.start()),
|
|
None => return Err(Error::MissingRegionFd),
|
|
};
|
|
|
|
let region = VhostUserMemoryRegionInfo {
|
|
guest_phys_addr: region.start_addr().raw_value(),
|
|
memory_size: region.len() as u64,
|
|
userspace_addr: region.as_ptr() as u64,
|
|
mmap_offset,
|
|
mmap_handle,
|
|
};
|
|
|
|
self.vu
|
|
.add_mem_region(®ion)
|
|
.map_err(Error::VhostUserAddMemReg)
|
|
}
|
|
|
|
pub fn negotiate_features_vhost_user(
|
|
&mut self,
|
|
avail_features: u64,
|
|
avail_protocol_features: VhostUserProtocolFeatures,
|
|
) -> Result<(u64, u64)> {
|
|
// Set vhost-user owner.
|
|
self.vu.set_owner().map_err(Error::VhostUserSetOwner)?;
|
|
|
|
// Get features from backend, do negotiation to get a feature collection which
|
|
// both VMM and backend support.
|
|
let backend_features = self
|
|
.vu
|
|
.get_features()
|
|
.map_err(Error::VhostUserGetFeatures)?;
|
|
let acked_features = avail_features & backend_features;
|
|
|
|
let acked_protocol_features =
|
|
if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 {
|
|
let backend_protocol_features = self
|
|
.vu
|
|
.get_protocol_features()
|
|
.map_err(Error::VhostUserGetProtocolFeatures)?;
|
|
|
|
let acked_protocol_features = avail_protocol_features & backend_protocol_features;
|
|
|
|
self.vu
|
|
.set_protocol_features(acked_protocol_features)
|
|
.map_err(Error::VhostUserSetProtocolFeatures)?;
|
|
|
|
acked_protocol_features
|
|
} else {
|
|
VhostUserProtocolFeatures::empty()
|
|
};
|
|
|
|
if avail_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK)
|
|
&& acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK)
|
|
{
|
|
self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY);
|
|
}
|
|
|
|
self.update_supports_migration(acked_features, acked_protocol_features.bits());
|
|
|
|
Ok((acked_features, acked_protocol_features.bits()))
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn setup_vhost_user<S: VhostUserMasterReqHandler>(
|
|
&mut self,
|
|
mem: &GuestMemoryMmap,
|
|
queues: Vec<Queue>,
|
|
queue_evts: Vec<EventFd>,
|
|
virtio_interrupt: &Arc<dyn VirtioInterrupt>,
|
|
acked_features: u64,
|
|
slave_req_handler: &Option<MasterReqHandler<S>>,
|
|
inflight: Option<&mut Inflight>,
|
|
) -> Result<()> {
|
|
self.vu
|
|
.set_features(acked_features)
|
|
.map_err(Error::VhostUserSetFeatures)?;
|
|
|
|
// Update internal value after it's been sent to the backend.
|
|
self.acked_features = acked_features;
|
|
|
|
// Let's first provide the memory table to the backend.
|
|
self.update_mem_table(mem)?;
|
|
|
|
// Send set_vring_num here, since it could tell backends, like SPDK,
|
|
// how many virt queues to be handled, which backend required to know
|
|
// at early stage.
|
|
for (queue_index, queue) in queues.iter().enumerate() {
|
|
self.vu
|
|
.set_vring_num(queue_index, queue.actual_size())
|
|
.map_err(Error::VhostUserSetVringNum)?;
|
|
}
|
|
|
|
// Setup for inflight I/O tracking shared memory.
|
|
if let Some(inflight) = inflight {
|
|
if inflight.fd.is_none() {
|
|
let inflight_req_info = VhostUserInflight {
|
|
mmap_size: 0,
|
|
mmap_offset: 0,
|
|
num_queues: queues.len() as u16,
|
|
queue_size: queues[0].actual_size(),
|
|
};
|
|
let (info, fd) = self
|
|
.vu
|
|
.get_inflight_fd(&inflight_req_info)
|
|
.map_err(Error::VhostUserGetInflight)?;
|
|
inflight.info = info;
|
|
inflight.fd = Some(fd);
|
|
}
|
|
// Unwrapping the inflight fd is safe here since we know it can't be None.
|
|
self.vu
|
|
.set_inflight_fd(&inflight.info, inflight.fd.as_ref().unwrap().as_raw_fd())
|
|
.map_err(Error::VhostUserSetInflight)?;
|
|
}
|
|
|
|
let num_queues = queues.len() as usize;
|
|
|
|
let mut vrings_info = Vec::new();
|
|
for (queue_index, queue) in queues.into_iter().enumerate() {
|
|
let actual_size: usize = queue.actual_size().try_into().unwrap();
|
|
|
|
let config_data = VringConfigData {
|
|
queue_max_size: queue.get_max_size(),
|
|
queue_size: queue.actual_size(),
|
|
flags: 0u32,
|
|
desc_table_addr: get_host_address_range(
|
|
mem,
|
|
queue.desc_table,
|
|
actual_size * std::mem::size_of::<Descriptor>(),
|
|
)
|
|
.ok_or(Error::DescriptorTableAddress)? as u64,
|
|
// The used ring is {flags: u16; idx: u16; virtq_used_elem [{id: u16, len: u16}; actual_size]},
|
|
// i.e. 4 + (4 + 4) * actual_size.
|
|
used_ring_addr: get_host_address_range(mem, queue.used_ring, 4 + actual_size * 8)
|
|
.ok_or(Error::UsedAddress)? as u64,
|
|
// The used ring is {flags: u16; idx: u16; elem [u16; actual_size]},
|
|
// i.e. 4 + (2) * actual_size.
|
|
avail_ring_addr: get_host_address_range(mem, queue.avail_ring, 4 + actual_size * 2)
|
|
.ok_or(Error::AvailAddress)? as u64,
|
|
log_addr: None,
|
|
};
|
|
|
|
vrings_info.push(VringInfo {
|
|
config_data,
|
|
used_guest_addr: queue.used_ring.raw_value(),
|
|
});
|
|
|
|
self.vu
|
|
.set_vring_addr(queue_index, &config_data)
|
|
.map_err(Error::VhostUserSetVringAddr)?;
|
|
self.vu
|
|
.set_vring_base(
|
|
queue_index,
|
|
queue
|
|
.used_index_from_memory(mem)
|
|
.map_err(Error::GetAvailableIndex)?,
|
|
)
|
|
.map_err(Error::VhostUserSetVringBase)?;
|
|
|
|
if let Some(eventfd) =
|
|
virtio_interrupt.notifier(&VirtioInterruptType::Queue, Some(&queue))
|
|
{
|
|
self.vu
|
|
.set_vring_call(queue_index, &eventfd)
|
|
.map_err(Error::VhostUserSetVringCall)?;
|
|
}
|
|
|
|
self.vu
|
|
.set_vring_kick(queue_index, &queue_evts[queue_index])
|
|
.map_err(Error::VhostUserSetVringKick)?;
|
|
}
|
|
|
|
self.enable_vhost_user_vrings(num_queues, true)?;
|
|
|
|
if let Some(slave_req_handler) = slave_req_handler {
|
|
self.vu
|
|
.set_slave_request_fd(&slave_req_handler.get_tx_raw_fd())
|
|
.map_err(Error::VhostUserSetSlaveRequestFd)?;
|
|
}
|
|
|
|
self.vrings_info = Some(vrings_info);
|
|
self.ready = true;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn enable_vhost_user_vrings(&mut self, num_queues: usize, enable: bool) -> Result<()> {
|
|
for queue_index in 0..num_queues {
|
|
self.vu
|
|
.set_vring_enable(queue_index, enable)
|
|
.map_err(Error::VhostUserSetVringEnable)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn reset_vhost_user(&mut self, num_queues: usize) -> Result<()> {
|
|
self.enable_vhost_user_vrings(num_queues, false)?;
|
|
|
|
// Reset the owner.
|
|
self.vu.reset_owner().map_err(Error::VhostUserResetOwner)
|
|
}
|
|
|
|
pub fn set_protocol_features_vhost_user(
|
|
&mut self,
|
|
acked_features: u64,
|
|
acked_protocol_features: u64,
|
|
) -> Result<()> {
|
|
self.vu.set_owner().map_err(Error::VhostUserSetOwner)?;
|
|
self.vu
|
|
.get_features()
|
|
.map_err(Error::VhostUserGetFeatures)?;
|
|
|
|
if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 {
|
|
if let Some(acked_protocol_features) =
|
|
VhostUserProtocolFeatures::from_bits(acked_protocol_features)
|
|
{
|
|
self.vu
|
|
.set_protocol_features(acked_protocol_features)
|
|
.map_err(Error::VhostUserSetProtocolFeatures)?;
|
|
|
|
if acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK) {
|
|
self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY);
|
|
}
|
|
}
|
|
}
|
|
|
|
self.update_supports_migration(acked_features, acked_protocol_features);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn reinitialize_vhost_user<S: VhostUserMasterReqHandler>(
|
|
&mut self,
|
|
mem: &GuestMemoryMmap,
|
|
queues: Vec<Queue>,
|
|
queue_evts: Vec<EventFd>,
|
|
virtio_interrupt: &Arc<dyn VirtioInterrupt>,
|
|
acked_features: u64,
|
|
acked_protocol_features: u64,
|
|
slave_req_handler: &Option<MasterReqHandler<S>>,
|
|
inflight: Option<&mut Inflight>,
|
|
) -> Result<()> {
|
|
self.set_protocol_features_vhost_user(acked_features, acked_protocol_features)?;
|
|
|
|
self.setup_vhost_user(
|
|
mem,
|
|
queues,
|
|
queue_evts,
|
|
virtio_interrupt,
|
|
acked_features,
|
|
slave_req_handler,
|
|
inflight,
|
|
)
|
|
}
|
|
|
|
pub fn connect_vhost_user(
|
|
server: bool,
|
|
socket_path: &str,
|
|
num_queues: u64,
|
|
unlink_socket: bool,
|
|
) -> Result<Self> {
|
|
if server {
|
|
if unlink_socket {
|
|
std::fs::remove_file(socket_path).map_err(Error::RemoveSocketPath)?;
|
|
}
|
|
|
|
info!("Binding vhost-user listener...");
|
|
let listener = UnixListener::bind(socket_path).map_err(Error::BindSocket)?;
|
|
info!("Waiting for incoming vhost-user connection...");
|
|
let (stream, _) = listener.accept().map_err(Error::AcceptConnection)?;
|
|
|
|
Ok(VhostUserHandle {
|
|
vu: Master::from_stream(stream, num_queues),
|
|
ready: false,
|
|
supports_migration: false,
|
|
shm_log: None,
|
|
acked_features: 0,
|
|
vrings_info: None,
|
|
})
|
|
} else {
|
|
let now = Instant::now();
|
|
|
|
// Retry connecting for a full minute
|
|
let err = loop {
|
|
let err = match Master::connect(socket_path, num_queues) {
|
|
Ok(m) => {
|
|
return Ok(VhostUserHandle {
|
|
vu: m,
|
|
ready: false,
|
|
supports_migration: false,
|
|
shm_log: None,
|
|
acked_features: 0,
|
|
vrings_info: None,
|
|
})
|
|
}
|
|
Err(e) => e,
|
|
};
|
|
sleep(Duration::from_millis(100));
|
|
|
|
if now.elapsed().as_secs() >= 60 {
|
|
break err;
|
|
}
|
|
};
|
|
|
|
error!(
|
|
"Failed connecting the backend after trying for 1 minute: {:?}",
|
|
err
|
|
);
|
|
Err(Error::VhostUserConnect)
|
|
}
|
|
}
|
|
|
|
pub fn socket_handle(&mut self) -> &mut Master {
|
|
&mut self.vu
|
|
}
|
|
|
|
pub fn pause_vhost_user(&mut self, num_queues: usize) -> Result<()> {
|
|
if self.ready {
|
|
self.enable_vhost_user_vrings(num_queues, false)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn resume_vhost_user(&mut self, num_queues: usize) -> Result<()> {
|
|
if self.ready {
|
|
self.enable_vhost_user_vrings(num_queues, true)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn update_supports_migration(&mut self, acked_features: u64, acked_protocol_features: u64) {
|
|
if (acked_features & u64::from(vhost::vhost_kern::vhost_binding::VHOST_F_LOG_ALL) != 0)
|
|
&& (acked_protocol_features & VhostUserProtocolFeatures::LOG_SHMFD.bits() != 0)
|
|
{
|
|
self.supports_migration = true;
|
|
}
|
|
}
|
|
|
|
fn update_log_base(&mut self, last_ram_addr: u64) -> Result<Option<Arc<MmapRegion>>> {
|
|
// Create the memfd
|
|
let fd = memfd_create(
|
|
&ffi::CString::new("vhost_user_dirty_log").unwrap(),
|
|
libc::MFD_CLOEXEC | libc::MFD_ALLOW_SEALING,
|
|
)
|
|
.map_err(Error::MemfdCreate)?;
|
|
|
|
// Safe because we checked the file descriptor is valid
|
|
let file = unsafe { File::from_raw_fd(fd) };
|
|
// The size of the memory mapping corresponds to the size of a bitmap
|
|
// covering all guest pages for addresses from 0 to the last physical
|
|
// address in guest RAM.
|
|
// A page is always 4kiB from a vhost-user perspective, and each bit is
|
|
// a page. That's how we can compute mmap_size from the last address.
|
|
let mmap_size = (last_ram_addr / (VHOST_LOG_PAGE * 8)) + 1;
|
|
let mmap_handle = file.as_raw_fd();
|
|
|
|
// Set shm_log region size
|
|
file.set_len(mmap_size).map_err(Error::SetFileSize)?;
|
|
|
|
// Set the seals
|
|
let res = unsafe {
|
|
libc::fcntl(
|
|
file.as_raw_fd(),
|
|
libc::F_ADD_SEALS,
|
|
libc::F_SEAL_GROW | libc::F_SEAL_SHRINK | libc::F_SEAL_SEAL,
|
|
)
|
|
};
|
|
if res < 0 {
|
|
return Err(Error::SetSeals(std::io::Error::last_os_error()));
|
|
}
|
|
|
|
// Mmap shm_log region
|
|
let region = MmapRegion::build(
|
|
Some(FileOffset::new(file, 0)),
|
|
mmap_size as usize,
|
|
libc::PROT_READ | libc::PROT_WRITE,
|
|
libc::MAP_SHARED,
|
|
)
|
|
.map_err(Error::NewMmapRegion)?;
|
|
|
|
// Make sure we hold onto the region to prevent the mapping from being
|
|
// released.
|
|
let old_region = self.shm_log.replace(Arc::new(region));
|
|
|
|
// Send the shm_log fd over to the backend
|
|
let log = VhostUserDirtyLogRegion {
|
|
mmap_size,
|
|
mmap_offset: 0,
|
|
mmap_handle,
|
|
};
|
|
self.vu
|
|
.set_log_base(0, Some(log))
|
|
.map_err(Error::VhostUserSetLogBase)?;
|
|
|
|
Ok(old_region)
|
|
}
|
|
|
|
fn set_vring_logging(&mut self, enable: bool) -> Result<()> {
|
|
if let Some(vrings_info) = &self.vrings_info {
|
|
for (i, vring_info) in vrings_info.iter().enumerate() {
|
|
let mut config_data = vring_info.config_data;
|
|
config_data.flags = if enable { 1 << VHOST_VRING_F_LOG } else { 0 };
|
|
config_data.log_addr = if enable {
|
|
Some(vring_info.used_guest_addr)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
self.vu
|
|
.set_vring_addr(i, &config_data)
|
|
.map_err(Error::VhostUserSetVringAddr)?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn start_dirty_log(&mut self, last_ram_addr: u64) -> Result<()> {
|
|
if !self.supports_migration {
|
|
return Err(Error::MigrationNotSupported);
|
|
}
|
|
|
|
// Set the shm log region
|
|
self.update_log_base(last_ram_addr)?;
|
|
|
|
// Enable VHOST_F_LOG_ALL feature
|
|
let features = self.acked_features | (1 << VHOST_F_LOG_ALL);
|
|
self.vu
|
|
.set_features(features)
|
|
.map_err(Error::VhostUserSetFeatures)?;
|
|
|
|
// Enable dirty page logging of used ring for all queues
|
|
self.set_vring_logging(true)
|
|
}
|
|
|
|
pub fn stop_dirty_log(&mut self) -> Result<()> {
|
|
if !self.supports_migration {
|
|
return Err(Error::MigrationNotSupported);
|
|
}
|
|
|
|
// Disable dirty page logging of used ring for all queues
|
|
self.set_vring_logging(false)?;
|
|
|
|
// Disable VHOST_F_LOG_ALL feature
|
|
self.vu
|
|
.set_features(self.acked_features)
|
|
.map_err(Error::VhostUserSetFeatures)?;
|
|
|
|
// This is important here since the log region goes out of scope,
|
|
// invoking the Drop trait, hence unmapping the memory.
|
|
self.shm_log = None;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn dirty_log(&mut self, last_ram_addr: u64) -> Result<MemoryRangeTable> {
|
|
// The log region is updated by creating a new region that is sent to
|
|
// the backend. This ensures the backend stops logging to the previous
|
|
// region. The previous region is returned and processed to create the
|
|
// bitmap representing the dirty pages.
|
|
if let Some(region) = self.update_log_base(last_ram_addr)? {
|
|
// Be careful with the size, as it was based on u8, meaning we must
|
|
// divide it by 8.
|
|
let len = region.size() / 8;
|
|
let bitmap = unsafe {
|
|
// Cast the pointer to u64
|
|
let ptr = region.as_ptr() as *const u64;
|
|
std::slice::from_raw_parts(ptr, len).to_vec()
|
|
};
|
|
Ok(MemoryRangeTable::from_bitmap(bitmap, 0))
|
|
} else {
|
|
Err(Error::MissingShmLogRegion)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn memfd_create(name: &ffi::CStr, flags: u32) -> std::result::Result<RawFd, std::io::Error> {
|
|
let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };
|
|
|
|
if res < 0 {
|
|
Err(std::io::Error::last_os_error())
|
|
} else {
|
|
Ok(res as RawFd)
|
|
}
|
|
}
|