vm-virtio: Add vhost-user-net implementation

vhost-user framwork could provide good performance in data intensive
scenario due to the memory sharing mechanism. Implement vhost-user-net
device to get the benefit for Rust-based VMMs network.

Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
This commit is contained in:
Cathy Zhang 2019-08-28 17:50:48 +08:00 committed by Rob Bradford
parent 51306555e7
commit 633f51af9c
5 changed files with 558 additions and 1 deletions

View File

@ -13,6 +13,7 @@ extern crate epoll;
#[macro_use]
extern crate log;
extern crate pci;
extern crate vhost_rs;
extern crate virtio_bindings;
extern crate vm_memory;
@ -29,6 +30,7 @@ mod queue;
mod rng;
pub mod transport;
pub mod vhost_user;
pub use self::block::*;
pub use self::console::*;
@ -117,9 +119,16 @@ const INTERRUPT_STATUS_CONFIG_CHANGED: u32 = 0x2;
pub enum ActivateError {
EpollCtl(std::io::Error),
BadActivate,
/// Queue number is not correct
BadQueueNum,
/// Failed to clone Kill event
CloneKillEventFd,
/// Failed to create Vhost-user interrupt eventfd
VhostIrqCreate,
/// Failed to setup vhost-user daemon.
VhostUserSetup(fs::Error),
/// Failed to setup vhost-user daemon.
VhostUserNetSetup(vhost_user::Error),
}
pub type ActivateResult = std::result::Result<(), ActivateError>;

View File

@ -0,0 +1,125 @@
// Copyright (c) 2019 Intel Corporation. All rights reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE-BSD-3-Clause file.
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use super::super::{Queue, VirtioInterruptType};
use super::{Error, Result};
use epoll;
use vmm_sys_util::eventfd::EventFd;
use crate::VirtioInterrupt;
use std::io;
use std::os::unix::io::AsRawFd;
use std::sync::Arc;
/// Collection of common parameters required by vhost-user devices while
/// call Epoll handler.
///
/// # Arguments
/// * `interrupt_cb` interrupt for virtqueue change.
/// * `kill_evt` - EventFd used to kill the vhost-user device.
/// * `vu_interrupt_list` - virtqueue and EventFd to signal when buffer used.
pub struct VhostUserEpollConfig {
pub interrupt_cb: Arc<VirtioInterrupt>,
pub kill_evt: EventFd,
pub vu_interrupt_list: Vec<(EventFd, Queue)>,
}
pub struct VhostUserEpollHandler {
pub vu_epoll_cfg: VhostUserEpollConfig,
}
impl VhostUserEpollHandler {
/// Construct a new event handler for vhost-user based devices.
///
/// # Arguments
/// * `vu_epoll_cfg` - collection of common parameters for vhost-user devices
///
/// # Return
/// * `VhostUserEpollHandler` - epoll handler for vhost-user based devices
pub fn new(vu_epoll_cfg: VhostUserEpollConfig) -> VhostUserEpollHandler {
VhostUserEpollHandler { vu_epoll_cfg }
}
fn signal_used_queue(&self, queue: &Queue) -> Result<()> {
(self.vu_epoll_cfg.interrupt_cb)(&VirtioInterruptType::Queue, Some(queue))
.map_err(Error::FailedSignalingUsedQueue)?;
Ok(())
}
pub fn run(&mut self) -> Result<()> {
let epoll_fd = epoll::create(true).map_err(Error::EpollCreateFd)?;
for (index, vhost_user_interrupt) in self.vu_epoll_cfg.vu_interrupt_list.iter().enumerate()
{
epoll::ctl(
epoll_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
vhost_user_interrupt.0.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, index as u64),
)
.map_err(Error::EpollCtl)?;
}
let kill_evt_index = self.vu_epoll_cfg.vu_interrupt_list.len();
epoll::ctl(
epoll_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
self.vu_epoll_cfg.kill_evt.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, kill_evt_index as u64),
)
.map_err(Error::EpollCtl)?;
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); kill_evt_index + 1];
'poll: loop {
let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
Ok(res) => res,
Err(e) => {
if e.kind() == io::ErrorKind::Interrupted {
// It's well defined from the epoll_wait() syscall
// documentation that the epoll loop can be interrupted
// before any of the requested events occurred or the
// timeout expired. In both those cases, epoll_wait()
// returns an error of type EINTR, but this should not
// be considered as a regular error. Instead it is more
// appropriate to retry, by calling into epoll_wait().
continue;
}
return Err(Error::EpollWait(e));
}
};
for event in events.iter().take(num_events) {
let ev_type = event.data as usize;
match ev_type {
x if x < kill_evt_index => {
let vhost_user_interrupt = &self.vu_epoll_cfg.vu_interrupt_list[x].0;
vhost_user_interrupt
.read()
.map_err(Error::FailedReadingQueue)?;
let result =
self.signal_used_queue(&self.vu_epoll_cfg.vu_interrupt_list[x].1);
if let Err(_e) = result {
error!("failed to signal used queue");
}
}
x if kill_evt_index == x => {
break 'poll;
}
_ => {
error!("Unknown event for vhost-user-net");
}
}
}
}
Ok(())
}
}

View File

@ -0,0 +1,87 @@
// Copyright 2019 Intel Corporation. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
extern crate epoll;
extern crate net_util;
extern crate vhost_rs;
extern crate virtio_bindings;
extern crate vm_memory;
use std;
use std::io;
use vhost_rs::Error as VhostError;
use vm_memory::Error as MmapError;
mod handler;
pub mod net;
pub mod vu_common_ctrl;
pub use self::net::Net;
pub use self::vu_common_ctrl::VhostUserConfig;
#[derive(Debug)]
pub enum Error {
/// Invalid available address.
AvailAddress,
/// Queue number is not correct
BadQueueNum,
/// Creating kill eventfd failed.
CreateKillEventFd(io::Error),
/// Cloning kill eventfd failed.
CloneKillEventFd(io::Error),
/// Invalid descriptor table address.
DescriptorTableAddress,
/// Create Epoll eventfd failed
EpollCreateFd(io::Error),
/// Epoll ctl error
EpollCtl(io::Error),
/// Epoll wait error
EpollWait(io::Error),
/// Read queue failed.
FailedReadingQueue(io::Error),
/// Signal used queue failed.
FailedSignalingUsedQueue(io::Error),
/// Failed to read vhost eventfd.
MemoryRegions(MmapError),
/// Failed to create master.
VhostUserCreateMaster(VhostError),
/// Failed to open vhost device.
VhostUserOpen(VhostError),
/// Get features failed.
VhostUserGetFeatures(VhostError),
/// Get protocol features failed.
VhostUserGetProtocolFeatures(VhostError),
/// Vhost-user Backend not support vhost-user protocol.
VhostUserProtocolNotSupport,
/// Set owner failed.
VhostUserSetOwner(VhostError),
/// Set features failed.
VhostUserSetFeatures(VhostError),
/// Set protocol features failed.
VhostUserSetProtocolFeatures(VhostError),
/// Set mem table failed.
VhostUserSetMemTable(VhostError),
/// Set vring num failed.
VhostUserSetVringNum(VhostError),
/// Set vring addr failed.
VhostUserSetVringAddr(VhostError),
/// Set vring base failed.
VhostUserSetVringBase(VhostError),
/// Set vring call failed.
VhostUserSetVringCall(VhostError),
/// Set vring kick failed.
VhostUserSetVringKick(VhostError),
/// Set vring enable failed.
VhostUserSetVringEnable(VhostError),
/// Vhost-user setup vring failed.
VhostUserSetupVringFailed,
/// Failed to create vhost eventfd.
VhostIrqCreate(io::Error),
/// Failed to read vhost eventfd.
VhostIrqRead(io::Error),
/// Failed to read vhost eventfd.
VhostUserMemoryRegion(MmapError),
/// Invalid used address.
UsedAddress,
}
type Result<T> = std::result::Result<T, Error>;

View File

@ -0,0 +1,223 @@
// Copyright 2019 Intel Corporation. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
use libc;
use libc::EFD_NONBLOCK;
use std::cmp;
use std::io::Write;
use std::sync::{Arc, RwLock};
use std::thread;
use std::vec::Vec;
use crate::VirtioInterrupt;
use net_util::{MacAddr, MAC_ADDR_LEN};
use vm_memory::GuestMemoryMmap;
use vmm_sys_util::eventfd::EventFd;
use super::super::{ActivateError, ActivateResult, Queue, VirtioDevice, VirtioDeviceType};
use super::handler::*;
use super::vu_common_ctrl::*;
use super::{Error, Result};
use vhost_rs::vhost_user::message::VhostUserVirtioFeatures;
use vhost_rs::vhost_user::{Master, VhostUserMaster};
use vhost_rs::VhostBackend;
use virtio_bindings::virtio_net;
use virtio_bindings::virtio_ring;
pub struct Net {
vhost_user_net: Master,
kill_evt: EventFd,
avail_features: u64,
acked_features: u64,
config_space: Vec<u8>,
queue_sizes: Vec<u16>,
}
impl<'a> Net {
/// Create a new vhost-user-net device
pub fn new(mac_addr: MacAddr, vu_cfg: VhostUserConfig<'a>) -> Result<Net> {
let mut vhost_user_net = Master::connect(vu_cfg.sock, vu_cfg.num_queues as u64)
.map_err(Error::VhostUserCreateMaster)?;
let kill_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?;
// Filling device and vring features VMM supports.
let mut avail_features = 1 << virtio_net::VIRTIO_NET_F_GUEST_CSUM
| 1 << virtio_net::VIRTIO_NET_F_CSUM
| 1 << virtio_net::VIRTIO_NET_F_GUEST_TSO4
| 1 << virtio_net::VIRTIO_NET_F_GUEST_TSO6
| 1 << virtio_net::VIRTIO_NET_F_GUEST_ECN
| 1 << virtio_net::VIRTIO_NET_F_GUEST_UFO
| 1 << virtio_net::VIRTIO_NET_F_HOST_TSO4
| 1 << virtio_net::VIRTIO_NET_F_HOST_TSO6
| 1 << virtio_net::VIRTIO_NET_F_HOST_ECN
| 1 << virtio_net::VIRTIO_NET_F_HOST_UFO
| 1 << virtio_net::VIRTIO_NET_F_MRG_RXBUF
| 1 << virtio_net::VIRTIO_F_NOTIFY_ON_EMPTY
| 1 << virtio_net::VIRTIO_F_VERSION_1
| 1 << virtio_ring::VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
vhost_user_net
.set_owner()
.map_err(Error::VhostUserSetOwner)?;
// Get features from backend, do negotiation to get a feature collection which
// both VMM and backend support.
let backend_features = vhost_user_net.get_features().unwrap();
avail_features &= backend_features;
// Set features back is required by the vhost crate mechanism, since the
// later vhost call will check if features is filled in master before execution.
vhost_user_net
.set_features(backend_features)
.map_err(Error::VhostUserSetFeatures)?;
let mut acked_features = 0;
if backend_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 {
acked_features |= VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
vhost_user_net
.get_protocol_features()
.map_err(Error::VhostUserGetProtocolFeatures)?;
} else {
return Err(Error::VhostUserProtocolNotSupport);
}
let mut config_space = Vec::with_capacity(MAC_ADDR_LEN);
unsafe { config_space.set_len(MAC_ADDR_LEN) }
config_space[..].copy_from_slice(mac_addr.get_bytes());
avail_features |= 1 << virtio_net::VIRTIO_NET_F_MAC;
// Send set_vring_base here, since it could tell backends, like OVS + DPDK,
// how many virt queues to be handled, which backend required to know at early stage.
for i in 0..vu_cfg.num_queues {
vhost_user_net
.set_vring_base(i, 0)
.map_err(Error::VhostUserSetVringBase)?;
}
Ok(Net {
vhost_user_net,
kill_evt,
avail_features,
acked_features,
config_space,
queue_sizes: vec![vu_cfg.queue_size; vu_cfg.num_queues],
})
}
}
impl Drop for Net {
fn drop(&mut self) {
if let Err(_e) = self.kill_evt.write(1) {
error!("failed to kill vhost-user-net with error {}", _e);
}
}
}
impl VirtioDevice for Net {
fn device_type(&self) -> u32 {
VirtioDeviceType::TYPE_NET as u32
}
fn queue_max_sizes(&self) -> &[u16] {
&self.queue_sizes
}
fn features(&self, page: u32) -> u32 {
match page {
0 => self.avail_features as u32,
1 => (self.avail_features >> 32) as u32,
_ => {
warn!("Received request for unknown features page: {}", page);
0u32
}
}
}
fn ack_features(&mut self, page: u32, value: u32) {
let mut v = match page {
0 => u64::from(value),
1 => u64::from(value) << 32,
_ => {
warn!("Cannot acknowledge unknown features page: {}", page);
0u64
}
};
// Check if the guest is ACK'ing a feature that we didn't claim to have.
let unrequested_features = v & !self.avail_features;
if unrequested_features != 0 {
warn!("Received acknowledge request for unknown feature: {:x}", v);
// Don't count these features as acked.
v &= !unrequested_features;
}
self.acked_features |= v;
}
fn read_config(&self, offset: u64, mut data: &mut [u8]) {
let config_len = self.config_space.len() as u64;
if offset >= config_len {
error!("Failed to read config space");
return;
}
if let Some(end) = offset.checked_add(data.len() as u64) {
// This write can't fail, offset and end are checked against config_len.
data.write_all(&self.config_space[offset as usize..cmp::min(end, config_len) as usize])
.unwrap();
}
}
fn write_config(&mut self, offset: u64, data: &[u8]) {
let data_len = data.len() as u64;
let config_len = self.config_space.len() as u64;
if offset + data_len > config_len {
error!("Failed to write config space");
return;
}
let (_, right) = self.config_space.split_at_mut(offset as usize);
right.copy_from_slice(&data[..]);
}
fn activate(
&mut self,
mem: Arc<RwLock<GuestMemoryMmap>>,
interrupt_cb: Arc<VirtioInterrupt>,
queues: Vec<Queue>,
queue_evts: Vec<EventFd>,
) -> ActivateResult {
let handler_kill_evt = self
.kill_evt
.try_clone()
.map_err(|_| ActivateError::CloneKillEventFd)?;
let vu_interrupt_list = setup_vhost_user(
&mut self.vhost_user_net,
&mem.read().unwrap(),
queues,
queue_evts,
self.acked_features,
)
.map_err(ActivateError::VhostUserNetSetup)?;
let vu_epoll_cfg = VhostUserEpollConfig {
interrupt_cb,
kill_evt: handler_kill_evt,
vu_interrupt_list,
};
let _handler_result = thread::Builder::new()
.name("vhost_user_net".to_string())
.spawn(move || {
let mut handler = VhostUserEpollHandler::new(vu_epoll_cfg);
let result = handler.run();
if let Err(_e) = result {
error!("net worker thread exited with error {:?}!", _e);
}
});
if let Err(_e) = _handler_result {
error!("vhost-user net thread create failed with error {:?}", _e);
}
Ok(())
}
}

View File

@ -0,0 +1,113 @@
// Copyright 2019 Intel Corporation. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
use libc;
use libc::EFD_NONBLOCK;
use std::os::unix::io::AsRawFd;
use std::vec::Vec;
use vm_memory::{Address, Error as MmapError, GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
use vmm_sys_util::eventfd::EventFd;
use super::super::Queue;
use super::{Error, Result};
use vhost_rs::vhost_user::{Master, VhostUserMaster};
use vhost_rs::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData};
#[derive(Debug, Copy, Clone)]
pub struct VhostUserConfig<'a> {
pub sock: &'a str,
pub num_queues: usize,
pub queue_size: u16,
}
pub fn setup_vhost_user_vring(
vu: &mut Master,
mem: &GuestMemoryMmap,
queues: Vec<Queue>,
queue_evts: Vec<EventFd>,
) -> Result<Vec<(EventFd, Queue)>> {
let mut regions: Vec<VhostUserMemoryRegionInfo> = Vec::new();
mem.with_regions_mut(|_, region| {
let (mmap_handle, mmap_offset) = match region.file_offset() {
Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
None => return Err(MmapError::NoMemoryRegion),
};
let vhost_user_net_reg = VhostUserMemoryRegionInfo {
guest_phys_addr: region.start_addr().raw_value(),
memory_size: region.len() as u64,
userspace_addr: region.as_ptr() as u64,
mmap_offset,
mmap_handle,
};
regions.push(vhost_user_net_reg);
Ok(())
})
.map_err(Error::VhostUserMemoryRegion)?;
vu.set_mem_table(regions.as_slice())
.map_err(Error::VhostUserSetMemTable)?;
let mut vu_interrupt_list = Vec::new();
for (queue_index, queue) in queues.into_iter().enumerate() {
vu.set_vring_num(queue_index, queue.get_max_size())
.map_err(Error::VhostUserSetVringNum)?;
let config_data = VringConfigData {
queue_max_size: queue.get_max_size(),
queue_size: queue.actual_size(),
flags: 0u32,
desc_table_addr: mem
.get_host_address(queue.desc_table)
.ok_or_else(|| Error::DescriptorTableAddress)? as u64,
used_ring_addr: mem
.get_host_address(queue.used_ring)
.ok_or_else(|| Error::UsedAddress)? as u64,
avail_ring_addr: mem
.get_host_address(queue.avail_ring)
.ok_or_else(|| Error::AvailAddress)? as u64,
log_addr: None,
};
vu.set_vring_addr(queue_index, &config_data)
.map_err(Error::VhostUserSetVringAddr)?;
vu.set_vring_base(queue_index, 0u16)
.map_err(Error::VhostUserSetVringBase)?;
let vhost_user_interrupt = EventFd::new(EFD_NONBLOCK).map_err(Error::VhostIrqCreate)?;
vu.set_vring_call(queue_index, &vhost_user_interrupt)
.map_err(Error::VhostUserSetVringCall)?;
vu_interrupt_list.push((vhost_user_interrupt, queue));
vu.set_vring_kick(queue_index, &queue_evts[queue_index])
.map_err(Error::VhostUserSetVringKick)?;
}
Ok(vu_interrupt_list)
}
pub fn setup_vhost_user(
vu: &mut Master,
mem: &GuestMemoryMmap,
queues: Vec<Queue>,
queue_evts: Vec<EventFd>,
acked_features: u64,
) -> Result<Vec<(EventFd, Queue)>> {
for i in 0..queues.len() {
vu.set_vring_enable(i, true)
.map_err(Error::VhostUserSetVringEnable)?;
}
let backend_features = vu.get_features().unwrap();
vu.set_features(acked_features & backend_features)
.map_err(Error::VhostUserSetFeatures)?;
match setup_vhost_user_vring(vu, mem, queues, queue_evts) {
Ok(vu_interrupt_list) => Ok(vu_interrupt_list),
Err(_) => Err(Error::VhostUserSetupVringFailed),
}
}