mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-22 19:32:20 +00:00
vm-virtio: Implement refactor for net devices and backend
Since the common parts are put into net_util.rs under vm-virtio, refactoring code for virtio-net device, vhost-user-net device and backend to shrink the code size and improve readability meanwhile. Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
This commit is contained in:
parent
6ae2597d19
commit
1ae7deb393
@ -18,44 +18,29 @@ use clap::{App, Arg};
|
||||
use epoll;
|
||||
use libc::{self, EAGAIN, EFD_NONBLOCK};
|
||||
use log::*;
|
||||
use std::cmp;
|
||||
use net_util::Tap;
|
||||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use std::io::{self, Write};
|
||||
use std::mem;
|
||||
use std::io::{self};
|
||||
use std::net::Ipv4Addr;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::process;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::vec::Vec;
|
||||
|
||||
use vhost_rs::vhost_user::message::*;
|
||||
use vhost_rs::vhost_user::Error as VhostUserError;
|
||||
use vhost_user_backend::{VhostUserBackend, VhostUserDaemon, Vring, VringWorker};
|
||||
|
||||
use net_gen;
|
||||
|
||||
use net_util::{Tap, TapError};
|
||||
use virtio_bindings::bindings::virtio_net::*;
|
||||
use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap};
|
||||
use vm_memory::GuestMemoryMmap;
|
||||
use vm_virtio::net_util::{
|
||||
open_tap, RxVirtio, TxVirtio, KILL_EVENT, RX_QUEUE_EVENT, RX_TAP_EVENT, TX_QUEUE_EVENT,
|
||||
};
|
||||
use vm_virtio::Queue;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
/// The maximum buffer size when segmentation offload is enabled. This
|
||||
/// includes the 12-byte virtio net header.
|
||||
/// http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html#x1-1740003
|
||||
const MAX_BUFFER_SIZE: usize = 65562;
|
||||
const QUEUE_SIZE: usize = 1024;
|
||||
const NUM_QUEUES: usize = 2;
|
||||
|
||||
// The guest has made a buffer available to receive a frame into.
|
||||
const RX_QUEUE_EVENT: u16 = 0;
|
||||
// The transmit queue has a frame that is ready to send from the guest.
|
||||
const TX_QUEUE_EVENT: u16 = 1;
|
||||
// A frame is available for reading from the tap device to receive in the guest.
|
||||
const RX_TAP_EVENT: u16 = 2;
|
||||
// The device has been dropped.
|
||||
const KILL_EVENT: u16 = 3;
|
||||
|
||||
pub type VhostUserResult<T> = std::result::Result<T, VhostUserError>;
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type VhostUserBackendResult<T> = std::result::Result<T, std::io::Error>;
|
||||
@ -93,17 +78,7 @@ pub enum Error {
|
||||
/// Failed to parse mask parameter.
|
||||
ParseMaskParam,
|
||||
/// Open tap device failed.
|
||||
TapOpen(TapError),
|
||||
/// Setting tap IP failed.
|
||||
TapSetIp(TapError),
|
||||
/// Setting tap netmask failed.
|
||||
TapSetNetmask(TapError),
|
||||
/// Setting tap interface offload flags failed.
|
||||
TapSetOffload(TapError),
|
||||
/// Setting vnet header size failed.
|
||||
TapSetVnetHdrSize(TapError),
|
||||
/// Enabling tap interface failed.
|
||||
TapEnable(TapError),
|
||||
OpenTap(vm_virtio::net_util::Error),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
@ -120,44 +95,6 @@ impl std::convert::From<Error> for std::io::Error {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TxVirtio {
|
||||
iovec: Vec<(GuestAddress, usize)>,
|
||||
frame_buf: [u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
|
||||
impl TxVirtio {
|
||||
fn new() -> Self {
|
||||
TxVirtio {
|
||||
iovec: Vec::new(),
|
||||
frame_buf: [0u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct RxVirtio {
|
||||
deferred_frame: bool,
|
||||
deferred_irqs: bool,
|
||||
bytes_read: usize,
|
||||
frame_buf: [u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
|
||||
impl RxVirtio {
|
||||
fn new() -> Self {
|
||||
RxVirtio {
|
||||
deferred_frame: false,
|
||||
deferred_irqs: false,
|
||||
bytes_read: 0,
|
||||
frame_buf: [0u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vnet_hdr_len() -> usize {
|
||||
mem::size_of::<virtio_net_hdr_v1>()
|
||||
}
|
||||
|
||||
struct VhostUserNetBackend {
|
||||
mem: Option<GuestMemoryMmap>,
|
||||
vring_worker: Option<Arc<VringWorker>>,
|
||||
@ -185,16 +122,6 @@ impl std::clone::Clone for VhostUserNetBackend {
|
||||
impl VhostUserNetBackend {
|
||||
/// Create a new virtio network device with the given TAP interface.
|
||||
pub fn new_with_tap(tap: Tap) -> Result<Self> {
|
||||
// Set offload flags to match the virtio features below.
|
||||
tap.set_offload(
|
||||
net_gen::TUN_F_CSUM | net_gen::TUN_F_UFO | net_gen::TUN_F_TSO4 | net_gen::TUN_F_TSO6,
|
||||
)
|
||||
.map_err(Error::TapSetOffload)?;
|
||||
|
||||
let vnet_hdr_size = vnet_hdr_len() as i32;
|
||||
tap.set_vnet_hdr_size(vnet_hdr_size)
|
||||
.map_err(Error::TapSetVnetHdrSize)?;
|
||||
|
||||
let rx = RxVirtio::new();
|
||||
let tx = TxVirtio::new();
|
||||
|
||||
@ -212,10 +139,7 @@ impl VhostUserNetBackend {
|
||||
/// Create a new virtio network device with the given IP address and
|
||||
/// netmask.
|
||||
pub fn new(ip_addr: Ipv4Addr, netmask: Ipv4Addr) -> Result<Self> {
|
||||
let tap = Tap::new().map_err(Error::TapOpen)?;
|
||||
tap.set_ip_addr(ip_addr).map_err(Error::TapSetIp)?;
|
||||
tap.set_netmask(netmask).map_err(Error::TapSetNetmask)?;
|
||||
tap.enable().map_err(Error::TapEnable)?;
|
||||
let tap = open_tap(ip_addr, netmask).map_err(Error::OpenTap)?;
|
||||
|
||||
Self::new_with_tap(tap)
|
||||
}
|
||||
@ -223,10 +147,10 @@ impl VhostUserNetBackend {
|
||||
// Copies a single frame from `self.rx.frame_buf` into the guest. Returns true
|
||||
// if a buffer was used, and false if the frame must be deferred until a buffer
|
||||
// is made available by the driver.
|
||||
fn rx_single_frame(&mut self, vring: &mut Vring) -> Result<bool> {
|
||||
fn rx_single_frame(&mut self, mut queue: &mut Queue) -> Result<bool> {
|
||||
let mem = self.mem.as_ref().ok_or(Error::NoMemoryConfigured)?;
|
||||
|
||||
let mut next_desc = vring.mut_queue().iter(&mem).next();
|
||||
let next_desc = queue.iter(&mem).next();
|
||||
|
||||
if next_desc.is_none() {
|
||||
// Queue has no available descriptors
|
||||
@ -244,52 +168,9 @@ impl VhostUserNetBackend {
|
||||
}
|
||||
return Ok(false);
|
||||
}
|
||||
let write_complete = self.rx.process_desc_chain(&mem, next_desc, &mut queue);
|
||||
|
||||
// We just checked that the head descriptor exists.
|
||||
let head_index = next_desc.as_ref().unwrap().index;
|
||||
let mut write_count = 0;
|
||||
|
||||
// Copy from frame into buffer, which may span multiple descriptors.
|
||||
loop {
|
||||
match next_desc {
|
||||
Some(desc) => {
|
||||
if !desc.is_write_only() {
|
||||
break;
|
||||
}
|
||||
let limit = cmp::min(write_count + desc.len as usize, self.rx.bytes_read);
|
||||
let source_slice = &self.rx.frame_buf[write_count..limit];
|
||||
let write_result = mem.write_slice(source_slice, desc.addr);
|
||||
|
||||
match write_result {
|
||||
Ok(_) => {
|
||||
write_count = limit;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to write slice: {:?}", e);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if write_count >= self.rx.bytes_read {
|
||||
break;
|
||||
}
|
||||
next_desc = desc.next_descriptor();
|
||||
}
|
||||
None => {
|
||||
warn!("Receiving buffer is too small to hold frame of current size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vring
|
||||
.mut_queue()
|
||||
.add_used(&mem, head_index, write_count as u32);
|
||||
|
||||
// Mark that we have at least one pending packet and we need to interrupt the guest.
|
||||
self.rx.deferred_irqs = true;
|
||||
|
||||
Ok(write_count >= self.rx.bytes_read)
|
||||
Ok(write_complete)
|
||||
}
|
||||
|
||||
fn process_rx(&mut self, vring: &mut Vring) -> Result<()> {
|
||||
@ -298,7 +179,7 @@ impl VhostUserNetBackend {
|
||||
match self.read_tap() {
|
||||
Ok(count) => {
|
||||
self.rx.bytes_read = count;
|
||||
if !self.rx_single_frame(vring)? {
|
||||
if !self.rx_single_frame(&mut vring.mut_queue())? {
|
||||
self.rx.deferred_frame = true;
|
||||
break;
|
||||
}
|
||||
@ -328,7 +209,7 @@ impl VhostUserNetBackend {
|
||||
|
||||
fn resume_rx(&mut self, vring: &mut Vring) -> Result<()> {
|
||||
if self.rx.deferred_frame {
|
||||
if self.rx_single_frame(vring)? {
|
||||
if self.rx_single_frame(&mut vring.mut_queue())? {
|
||||
self.rx.deferred_frame = false;
|
||||
// process_rx() was interrupted possibly before consuming all
|
||||
// packets in the tap; try continuing now.
|
||||
@ -345,65 +226,10 @@ impl VhostUserNetBackend {
|
||||
}
|
||||
}
|
||||
|
||||
fn process_tx(&mut self, vring: &mut Vring) -> Result<()> {
|
||||
fn process_tx(&mut self, mut queue: &mut Queue) -> Result<()> {
|
||||
let mem = self.mem.as_ref().ok_or(Error::NoMemoryConfigured)?;
|
||||
|
||||
let mut used_desc_heads = [(0, 0); QUEUE_SIZE];
|
||||
let mut used_count = 0;
|
||||
while let Some(avail_desc) = vring.mut_queue().iter(&mem).next() {
|
||||
let head_index = avail_desc.index;
|
||||
let mut read_count = 0;
|
||||
let mut next_desc = Some(avail_desc);
|
||||
|
||||
self.tx.iovec.clear();
|
||||
while let Some(desc) = next_desc {
|
||||
if desc.is_write_only() {
|
||||
break;
|
||||
}
|
||||
self.tx.iovec.push((desc.addr, desc.len as usize));
|
||||
read_count += desc.len as usize;
|
||||
next_desc = desc.next_descriptor();
|
||||
}
|
||||
used_desc_heads[used_count] = (head_index, read_count);
|
||||
used_count += 1;
|
||||
read_count = 0;
|
||||
// Copy buffer from across multiple descriptors.
|
||||
// TODO(performance - Issue #420): change this to use `writev()` instead of `write()`
|
||||
// and get rid of the intermediate buffer.
|
||||
for (desc_addr, desc_len) in self.tx.iovec.drain(..) {
|
||||
let limit = cmp::min((read_count + desc_len) as usize, self.tx.frame_buf.len());
|
||||
|
||||
let read_result = mem.read_slice(
|
||||
&mut self.tx.frame_buf[read_count..limit as usize],
|
||||
desc_addr,
|
||||
);
|
||||
match read_result {
|
||||
Ok(_) => {
|
||||
// Increment by number of bytes actually read
|
||||
read_count += limit - read_count;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to read slice: {:?}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let write_result = self.tap.write(&self.tx.frame_buf[..read_count as usize]);
|
||||
match write_result {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("net: tx: error failed to write to tap: {}", e);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if used_count > 0 {
|
||||
for &(desc_index, _) in &used_desc_heads[..used_count] {
|
||||
vring.mut_queue().add_used(&mem, desc_index, 0);
|
||||
}
|
||||
vring.signal_used_queue().unwrap();
|
||||
}
|
||||
self.tx.process_desc_chain(&mem, &mut self.tap, &mut queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -468,7 +294,7 @@ impl VhostUserBackend for VhostUserNetBackend {
|
||||
}
|
||||
TX_QUEUE_EVENT => {
|
||||
let mut vring = vrings[1].write().unwrap();
|
||||
self.process_tx(&mut vring)?;
|
||||
self.process_tx(&mut vring.mut_queue())?;
|
||||
}
|
||||
RX_TAP_EVENT => {
|
||||
let mut vring = vrings[0].write().unwrap();
|
||||
@ -476,7 +302,7 @@ impl VhostUserBackend for VhostUserNetBackend {
|
||||
// Process a deferred frame first if available. Don't read from tap again
|
||||
// until we manage to receive this deferred frame.
|
||||
{
|
||||
if self.rx_single_frame(&mut vring)? {
|
||||
if self.rx_single_frame(&mut vring.mut_queue())? {
|
||||
self.rx.deferred_frame = false;
|
||||
self.process_rx(&mut vring)?;
|
||||
} else if self.rx.deferred_irqs {
|
||||
|
@ -5,22 +5,23 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use super::net_util::{
|
||||
build_net_config_space, open_tap, RxVirtio, TxVirtio, KILL_EVENT, NET_EVENTS_COUNT,
|
||||
PAUSE_EVENT, RX_QUEUE_EVENT, RX_TAP_EVENT, TX_QUEUE_EVENT,
|
||||
};
|
||||
use super::Error as DeviceError;
|
||||
use super::{
|
||||
ActivateError, ActivateResult, DeviceEventT, Queue, VirtioDevice, VirtioDeviceType,
|
||||
VirtioInterruptType,
|
||||
ActivateError, ActivateResult, Queue, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
|
||||
};
|
||||
use crate::VirtioInterrupt;
|
||||
use arc_swap::ArcSwap;
|
||||
use epoll;
|
||||
use libc::EAGAIN;
|
||||
use libc::EFD_NONBLOCK;
|
||||
use net_gen;
|
||||
use net_util::{MacAddr, Tap, TapError, MAC_ADDR_LEN};
|
||||
use net_util::{MacAddr, Tap};
|
||||
use std::cmp;
|
||||
use std::io::Read;
|
||||
use std::io::{self, Write};
|
||||
use std::mem;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::os::unix::io::{AsRawFd, RawFd};
|
||||
use std::result;
|
||||
@ -30,93 +31,21 @@ use std::thread;
|
||||
use std::vec::Vec;
|
||||
use virtio_bindings::bindings::virtio_net::*;
|
||||
use vm_device::{Migratable, MigratableError, Pausable, Snapshotable};
|
||||
use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap};
|
||||
use vm_memory::GuestMemoryMmap;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
/// The maximum buffer size when segmentation offload is enabled. This
|
||||
/// includes the 12-byte virtio net header.
|
||||
/// http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html#x1-1740003
|
||||
const MAX_BUFFER_SIZE: usize = 65562;
|
||||
const QUEUE_SIZE: u16 = 256;
|
||||
const NUM_QUEUES: usize = 2;
|
||||
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES];
|
||||
|
||||
// A frame is available for reading from the tap device to receive in the guest.
|
||||
const RX_TAP_EVENT: DeviceEventT = 0;
|
||||
// The guest has made a buffer available to receive a frame into.
|
||||
const RX_QUEUE_EVENT: DeviceEventT = 1;
|
||||
// The transmit queue has a frame that is ready to send from the guest.
|
||||
const TX_QUEUE_EVENT: DeviceEventT = 2;
|
||||
// The device has been dropped.
|
||||
pub const KILL_EVENT: DeviceEventT = 3;
|
||||
// Number of DeviceEventT events supported by this implementation.
|
||||
pub const NET_EVENTS_COUNT: usize = 4;
|
||||
// The device should be paused.
|
||||
const PAUSE_EVENT: DeviceEventT = 5;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Open tap device failed.
|
||||
TapOpen(TapError),
|
||||
/// Setting tap IP failed.
|
||||
TapSetIp(TapError),
|
||||
/// Setting tap netmask failed.
|
||||
TapSetNetmask(TapError),
|
||||
/// Setting tap interface offload flags failed.
|
||||
TapSetOffload(TapError),
|
||||
/// Setting vnet header size failed.
|
||||
TapSetVnetHdrSize(TapError),
|
||||
/// Enabling tap interface failed.
|
||||
TapEnable(TapError),
|
||||
/// Failed to open taps.
|
||||
OpenTap(super::net_util::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
struct TxVirtio {
|
||||
queue_evt: EventFd,
|
||||
queue: Queue,
|
||||
iovec: Vec<(GuestAddress, usize)>,
|
||||
frame_buf: [u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
|
||||
impl TxVirtio {
|
||||
fn new(queue: Queue, queue_evt: EventFd) -> Self {
|
||||
let tx_queue_max_size = queue.get_max_size() as usize;
|
||||
TxVirtio {
|
||||
queue_evt,
|
||||
queue,
|
||||
iovec: Vec::with_capacity(tx_queue_max_size),
|
||||
frame_buf: [0u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RxVirtio {
|
||||
queue_evt: EventFd,
|
||||
deferred_frame: bool,
|
||||
deferred_irqs: bool,
|
||||
queue: Queue,
|
||||
bytes_read: usize,
|
||||
frame_buf: [u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
|
||||
impl RxVirtio {
|
||||
fn new(queue: Queue, queue_evt: EventFd) -> Self {
|
||||
RxVirtio {
|
||||
queue_evt,
|
||||
deferred_frame: false,
|
||||
deferred_irqs: false,
|
||||
queue,
|
||||
bytes_read: 0,
|
||||
frame_buf: [0u8; MAX_BUFFER_SIZE],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vnet_hdr_len() -> usize {
|
||||
mem::size_of::<virtio_net_hdr_v1>()
|
||||
}
|
||||
|
||||
struct NetEpollHandler {
|
||||
mem: Arc<ArcSwap<GuestMemoryMmap>>,
|
||||
tap: Tap,
|
||||
@ -140,71 +69,35 @@ impl NetEpollHandler {
|
||||
// Copies a single frame from `self.rx.frame_buf` into the guest. Returns true
|
||||
// if a buffer was used, and false if the frame must be deferred until a buffer
|
||||
// is made available by the driver.
|
||||
fn rx_single_frame(&mut self) -> bool {
|
||||
fn rx_single_frame(&mut self, mut queue: &mut Queue) -> bool {
|
||||
let mem = self.mem.load();
|
||||
let mut next_desc = self.rx.queue.iter(&mem).next();
|
||||
let next_desc = queue.iter(&mem).next();
|
||||
|
||||
if next_desc.is_none() {
|
||||
// Queue has no available descriptors
|
||||
if self.rx_tap_listening {
|
||||
self.unregister_tap_rx_listener().unwrap();
|
||||
unregister_listener(
|
||||
self.epoll_fd,
|
||||
self.tap.as_raw_fd(),
|
||||
epoll::Events::EPOLLIN,
|
||||
u64::from(RX_TAP_EVENT),
|
||||
)
|
||||
.unwrap();
|
||||
self.rx_tap_listening = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// We just checked that the head descriptor exists.
|
||||
let head_index = next_desc.as_ref().unwrap().index;
|
||||
let mut write_count = 0;
|
||||
|
||||
// Copy from frame into buffer, which may span multiple descriptors.
|
||||
loop {
|
||||
match next_desc {
|
||||
Some(desc) => {
|
||||
if !desc.is_write_only() {
|
||||
break;
|
||||
}
|
||||
let limit = cmp::min(write_count + desc.len as usize, self.rx.bytes_read);
|
||||
let source_slice = &self.rx.frame_buf[write_count..limit];
|
||||
let write_result = mem.write_slice(source_slice, desc.addr);
|
||||
|
||||
match write_result {
|
||||
Ok(_) => {
|
||||
write_count = limit;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to write slice: {:?}", e);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if write_count >= self.rx.bytes_read {
|
||||
break;
|
||||
}
|
||||
next_desc = desc.next_descriptor();
|
||||
}
|
||||
None => {
|
||||
warn!("Receiving buffer is too small to hold frame of current size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.rx.process_desc_chain(&mem, next_desc, &mut queue)
|
||||
}
|
||||
|
||||
self.rx.queue.add_used(&mem, head_index, write_count as u32);
|
||||
|
||||
// Mark that we have at least one pending packet and we need to interrupt the guest.
|
||||
self.rx.deferred_irqs = true;
|
||||
|
||||
write_count >= self.rx.bytes_read
|
||||
}
|
||||
|
||||
fn process_rx(&mut self) -> result::Result<(), DeviceError> {
|
||||
fn process_rx(&mut self, queue: &mut Queue) -> result::Result<(), DeviceError> {
|
||||
// Read as many frames as possible.
|
||||
loop {
|
||||
match self.read_tap() {
|
||||
Ok(count) => {
|
||||
self.rx.bytes_read = count;
|
||||
if !self.rx_single_frame() {
|
||||
if !self.rx_single_frame(queue) {
|
||||
self.rx.deferred_frame = true;
|
||||
break;
|
||||
}
|
||||
@ -225,22 +118,22 @@ impl NetEpollHandler {
|
||||
}
|
||||
if self.rx.deferred_irqs {
|
||||
self.rx.deferred_irqs = false;
|
||||
self.signal_used_queue(&self.rx.queue)
|
||||
self.signal_used_queue(queue)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn resume_rx(&mut self) -> result::Result<(), DeviceError> {
|
||||
fn resume_rx(&mut self, queue: &mut Queue) -> result::Result<(), DeviceError> {
|
||||
if self.rx.deferred_frame {
|
||||
if self.rx_single_frame() {
|
||||
if self.rx_single_frame(queue) {
|
||||
self.rx.deferred_frame = false;
|
||||
// process_rx() was interrupted possibly before consuming all
|
||||
// packets in the tap; try continuing now.
|
||||
self.process_rx()
|
||||
self.process_rx(queue)
|
||||
} else if self.rx.deferred_irqs {
|
||||
self.rx.deferred_irqs = false;
|
||||
self.signal_used_queue(&self.rx.queue)
|
||||
self.signal_used_queue(queue)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
@ -249,56 +142,10 @@ impl NetEpollHandler {
|
||||
}
|
||||
}
|
||||
|
||||
fn process_tx(&mut self) -> result::Result<(), DeviceError> {
|
||||
fn process_tx(&mut self, mut queue: &mut Queue) -> result::Result<(), DeviceError> {
|
||||
let mem = self.mem.load();
|
||||
while let Some(avail_desc) = self.tx.queue.iter(&mem).next() {
|
||||
let head_index = avail_desc.index;
|
||||
let mut read_count = 0;
|
||||
let mut next_desc = Some(avail_desc);
|
||||
|
||||
self.tx.iovec.clear();
|
||||
while let Some(desc) = next_desc {
|
||||
if desc.is_write_only() {
|
||||
break;
|
||||
}
|
||||
self.tx.iovec.push((desc.addr, desc.len as usize));
|
||||
read_count += desc.len as usize;
|
||||
next_desc = desc.next_descriptor();
|
||||
}
|
||||
|
||||
read_count = 0;
|
||||
// Copy buffer from across multiple descriptors.
|
||||
// TODO(performance - Issue #420): change this to use `writev()` instead of `write()`
|
||||
// and get rid of the intermediate buffer.
|
||||
for (desc_addr, desc_len) in self.tx.iovec.drain(..) {
|
||||
let limit = cmp::min((read_count + desc_len) as usize, self.tx.frame_buf.len());
|
||||
|
||||
let read_result = mem.read_slice(
|
||||
&mut self.tx.frame_buf[read_count..limit as usize],
|
||||
desc_addr,
|
||||
);
|
||||
match read_result {
|
||||
Ok(_) => {
|
||||
// Increment by number of bytes actually read
|
||||
read_count += limit - read_count;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to read slice: {:?}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let write_result = self.tap.write(&self.tx.frame_buf[..read_count as usize]);
|
||||
match write_result {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
warn!("net: tx: error failed to write to tap: {}", e);
|
||||
}
|
||||
};
|
||||
|
||||
self.tx.queue.add_used(&mem, head_index, 0);
|
||||
}
|
||||
self.tx.process_desc_chain(&mem, &mut self.tap, &mut queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -307,47 +154,73 @@ impl NetEpollHandler {
|
||||
self.tap.read(&mut self.rx.frame_buf)
|
||||
}
|
||||
|
||||
fn register_tap_rx_listener(&self) -> std::result::Result<(), std::io::Error> {
|
||||
epoll::ctl(
|
||||
self.epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
self.tap.as_raw_fd(),
|
||||
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(RX_TAP_EVENT)),
|
||||
)?;
|
||||
Ok(())
|
||||
fn handle_rx_event(&mut self, mut queue: &mut Queue, queue_evt: &EventFd) {
|
||||
if let Err(e) = queue_evt.read() {
|
||||
error!("Failed to get rx queue event: {:?}", e);
|
||||
}
|
||||
|
||||
fn unregister_tap_rx_listener(&self) -> std::result::Result<(), std::io::Error> {
|
||||
epoll::ctl(
|
||||
self.resume_rx(&mut queue).unwrap();
|
||||
if !self.rx_tap_listening {
|
||||
register_listener(
|
||||
self.epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_DEL,
|
||||
self.tap.as_raw_fd(),
|
||||
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(RX_TAP_EVENT)),
|
||||
)?;
|
||||
Ok(())
|
||||
epoll::Events::EPOLLIN,
|
||||
u64::from(RX_TAP_EVENT),
|
||||
)
|
||||
.unwrap();
|
||||
self.rx_tap_listening = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn run(&mut self, paused: Arc<AtomicBool>) -> result::Result<(), DeviceError> {
|
||||
fn handle_tx_event(&mut self, mut queue: &mut Queue, queue_evt: &EventFd) {
|
||||
if let Err(e) = queue_evt.read() {
|
||||
error!("Failed to get tx queue event: {:?}", e);
|
||||
}
|
||||
|
||||
self.process_tx(&mut queue).unwrap();
|
||||
}
|
||||
|
||||
fn handle_rx_tap_event(&mut self, mut queue: &mut Queue) {
|
||||
if self.rx.deferred_frame
|
||||
// Process a deferred frame first if available. Don't read from tap again
|
||||
// until we manage to receive this deferred frame.
|
||||
{
|
||||
if self.rx_single_frame(&mut queue) {
|
||||
self.rx.deferred_frame = false;
|
||||
self.process_rx(&mut queue).unwrap();
|
||||
} else if self.rx.deferred_irqs {
|
||||
self.rx.deferred_irqs = false;
|
||||
self.signal_used_queue(&queue).unwrap();
|
||||
}
|
||||
} else {
|
||||
self.process_rx(&mut queue).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn run(
|
||||
&mut self,
|
||||
paused: Arc<AtomicBool>,
|
||||
mut queues: Vec<Queue>,
|
||||
queue_evts: Vec<EventFd>,
|
||||
) -> result::Result<(), DeviceError> {
|
||||
// Create the epoll file descriptor
|
||||
self.epoll_fd = epoll::create(true).map_err(DeviceError::EpollCreateFd)?;
|
||||
// Add events
|
||||
// Add events
|
||||
epoll::ctl(
|
||||
self.epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
self.rx.queue_evt.as_raw_fd(),
|
||||
queue_evts[0].as_raw_fd(),
|
||||
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(RX_QUEUE_EVENT)),
|
||||
)
|
||||
.map_err(DeviceError::EpollCtl)?;
|
||||
epoll::ctl(
|
||||
self.epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
self.tx.queue_evt.as_raw_fd(),
|
||||
queue_evts[1].as_raw_fd(),
|
||||
epoll::Event::new(epoll::Events::EPOLLIN, u64::from(TX_QUEUE_EVENT)),
|
||||
)
|
||||
.map_err(DeviceError::EpollCtl)?;
|
||||
self.register_tap_rx_listener()
|
||||
.map_err(DeviceError::EpollCtl)?;
|
||||
self.rx_tap_listening = true;
|
||||
epoll::ctl(
|
||||
self.epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
@ -363,8 +236,7 @@ impl NetEpollHandler {
|
||||
)
|
||||
.map_err(DeviceError::EpollCtl)?;
|
||||
|
||||
const EPOLL_EVENTS_LEN: usize = 100;
|
||||
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
|
||||
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); NET_EVENTS_COUNT];
|
||||
|
||||
'epoll: loop {
|
||||
let num_events = match epoll::wait(self.epoll_fd, -1, &mut events[..]) {
|
||||
@ -389,43 +261,13 @@ impl NetEpollHandler {
|
||||
|
||||
match ev_type {
|
||||
RX_QUEUE_EVENT => {
|
||||
debug!("RX_QUEUE_EVENT received");
|
||||
if let Err(e) = self.rx.queue_evt.read() {
|
||||
error!("Failed to get rx queue event: {:?}", e);
|
||||
break 'epoll;
|
||||
}
|
||||
|
||||
self.resume_rx().unwrap();
|
||||
if !self.rx_tap_listening {
|
||||
self.register_tap_rx_listener().unwrap();
|
||||
self.rx_tap_listening = true;
|
||||
}
|
||||
self.handle_rx_event(&mut queues[0], &queue_evts[0]);
|
||||
}
|
||||
TX_QUEUE_EVENT => {
|
||||
debug!("TX_QUEUE_EVENT received");
|
||||
if let Err(e) = self.tx.queue_evt.read() {
|
||||
error!("Failed to get tx queue event: {:?}", e);
|
||||
break 'epoll;
|
||||
}
|
||||
|
||||
self.process_tx().unwrap();
|
||||
self.handle_tx_event(&mut queues[1], &queue_evts[1]);
|
||||
}
|
||||
RX_TAP_EVENT => {
|
||||
debug!("RX_TAP_EVENT received");
|
||||
if self.rx.deferred_frame
|
||||
// Process a deferred frame first if available. Don't read from tap again
|
||||
// until we manage to receive this deferred frame.
|
||||
{
|
||||
if self.rx_single_frame() {
|
||||
self.rx.deferred_frame = false;
|
||||
self.process_rx().unwrap();
|
||||
} else if self.rx.deferred_irqs {
|
||||
self.rx.deferred_irqs = false;
|
||||
self.signal_used_queue(&self.rx.queue).unwrap();
|
||||
}
|
||||
} else {
|
||||
self.process_rx().unwrap();
|
||||
}
|
||||
self.handle_rx_tap_event(&mut queues[0]);
|
||||
}
|
||||
KILL_EVENT => {
|
||||
debug!("KILL_EVENT received, stopping epoll loop");
|
||||
@ -446,11 +288,38 @@ impl NetEpollHandler {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_listener(
|
||||
epoll_fd: RawFd,
|
||||
fd: RawFd,
|
||||
ev_type: epoll::Events,
|
||||
data: u64,
|
||||
) -> result::Result<(), io::Error> {
|
||||
epoll::ctl(
|
||||
epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_ADD,
|
||||
fd,
|
||||
epoll::Event::new(ev_type, data),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn unregister_listener(
|
||||
epoll_fd: RawFd,
|
||||
fd: RawFd,
|
||||
ev_type: epoll::Events,
|
||||
data: u64,
|
||||
) -> result::Result<(), io::Error> {
|
||||
epoll::ctl(
|
||||
epoll_fd,
|
||||
epoll::ControlOptions::EPOLL_CTL_DEL,
|
||||
fd,
|
||||
epoll::Event::new(ev_type, data),
|
||||
)
|
||||
}
|
||||
|
||||
pub struct Net {
|
||||
kill_evt: Option<EventFd>,
|
||||
pause_evt: Option<EventFd>,
|
||||
@ -468,17 +337,7 @@ pub struct Net {
|
||||
|
||||
impl Net {
|
||||
/// Create a new virtio network device with the given TAP interface.
|
||||
pub fn new_with_tap(tap: Tap, guest_mac: Option<&MacAddr>, iommu: bool) -> Result<Self> {
|
||||
// Set offload flags to match the virtio features below.
|
||||
tap.set_offload(
|
||||
net_gen::TUN_F_CSUM | net_gen::TUN_F_UFO | net_gen::TUN_F_TSO4 | net_gen::TUN_F_TSO6,
|
||||
)
|
||||
.map_err(Error::TapSetOffload)?;
|
||||
|
||||
let vnet_hdr_size = vnet_hdr_len() as i32;
|
||||
tap.set_vnet_hdr_size(vnet_hdr_size)
|
||||
.map_err(Error::TapSetVnetHdrSize)?;
|
||||
|
||||
pub fn new_with_tap(tap: Tap, guest_mac: Option<MacAddr>, iommu: bool) -> Result<Self> {
|
||||
let mut avail_features = 1 << VIRTIO_NET_F_GUEST_CSUM
|
||||
| 1 << VIRTIO_NET_F_CSUM
|
||||
| 1 << VIRTIO_NET_F_GUEST_TSO4
|
||||
@ -491,15 +350,9 @@ impl Net {
|
||||
avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
|
||||
}
|
||||
|
||||
let mut config_space;
|
||||
let config_space;
|
||||
if let Some(mac) = guest_mac {
|
||||
config_space = Vec::with_capacity(MAC_ADDR_LEN);
|
||||
// This is safe, because we know the capacity is large enough.
|
||||
unsafe { config_space.set_len(MAC_ADDR_LEN) }
|
||||
config_space[..].copy_from_slice(mac.get_bytes());
|
||||
// When this feature isn't available, the driver generates a random MAC address.
|
||||
// Otherwise, it should attempt to read the device MAC address from the config space.
|
||||
avail_features |= 1 << VIRTIO_NET_F_MAC;
|
||||
config_space = build_net_config_space(mac, &mut avail_features);
|
||||
} else {
|
||||
config_space = Vec::new();
|
||||
}
|
||||
@ -523,13 +376,10 @@ impl Net {
|
||||
pub fn new(
|
||||
ip_addr: Ipv4Addr,
|
||||
netmask: Ipv4Addr,
|
||||
guest_mac: Option<&MacAddr>,
|
||||
guest_mac: Option<MacAddr>,
|
||||
iommu: bool,
|
||||
) -> Result<Self> {
|
||||
let tap = Tap::new().map_err(Error::TapOpen)?;
|
||||
tap.set_ip_addr(ip_addr).map_err(Error::TapSetIp)?;
|
||||
tap.set_netmask(netmask).map_err(Error::TapSetNetmask)?;
|
||||
tap.enable().map_err(Error::TapEnable)?;
|
||||
let tap = open_tap(ip_addr, netmask).map_err(Error::OpenTap)?;
|
||||
|
||||
Self::new_with_tap(tap, guest_mac, iommu)
|
||||
}
|
||||
@ -658,15 +508,17 @@ impl VirtioDevice for Net {
|
||||
}
|
||||
self.queue_evts = Some(tmp_queue_evts);
|
||||
|
||||
let rx_queue = queues.remove(0);
|
||||
let tx_queue = queues.remove(0);
|
||||
let rx_queue_evt = queue_evts.remove(0);
|
||||
let tx_queue_evt = queue_evts.remove(0);
|
||||
let mut queues_v = Vec::new();
|
||||
let mut queue_evts_v = Vec::new();
|
||||
queues_v.push(queues.remove(0));
|
||||
queues_v.push(queues.remove(0));
|
||||
queue_evts_v.push(queue_evts.remove(0));
|
||||
queue_evts_v.push(queue_evts.remove(0));
|
||||
let mut handler = NetEpollHandler {
|
||||
mem,
|
||||
tap,
|
||||
rx: RxVirtio::new(rx_queue, rx_queue_evt),
|
||||
tx: TxVirtio::new(tx_queue, tx_queue_evt),
|
||||
rx: RxVirtio::new(),
|
||||
tx: TxVirtio::new(),
|
||||
interrupt_cb,
|
||||
kill_evt,
|
||||
pause_evt,
|
||||
@ -677,7 +529,7 @@ impl VirtioDevice for Net {
|
||||
let paused = self.paused.clone();
|
||||
thread::Builder::new()
|
||||
.name("virtio_net".to_string())
|
||||
.spawn(move || handler.run(paused))
|
||||
.spawn(move || handler.run(paused, queues_v, queue_evts_v))
|
||||
.map(|thread| self.epoll_thread = Some(thread))
|
||||
.map_err(|e| {
|
||||
error!("failed to clone the virtio-net epoll thread: {}", e);
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Copyright 2019 Intel Corporation. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use super::super::net_util::build_net_config_space;
|
||||
use super::super::{ActivateError, ActivateResult, Queue, VirtioDevice, VirtioDeviceType};
|
||||
use super::handler::*;
|
||||
use super::vu_common_ctrl::*;
|
||||
@ -10,7 +11,7 @@ use crate::VirtioInterrupt;
|
||||
use arc_swap::ArcSwap;
|
||||
use libc;
|
||||
use libc::EFD_NONBLOCK;
|
||||
use net_util::{MacAddr, MAC_ADDR_LEN};
|
||||
use net_util::MacAddr;
|
||||
use std::cmp;
|
||||
use std::io::Write;
|
||||
use std::result;
|
||||
@ -98,10 +99,7 @@ impl Net {
|
||||
return Err(Error::VhostUserProtocolNotSupport);
|
||||
}
|
||||
|
||||
let mut config_space = Vec::with_capacity(MAC_ADDR_LEN);
|
||||
unsafe { config_space.set_len(MAC_ADDR_LEN) }
|
||||
config_space[..].copy_from_slice(mac_addr.get_bytes());
|
||||
avail_features |= 1 << virtio_net::VIRTIO_NET_F_MAC;
|
||||
let config_space = build_net_config_space(mac_addr, &mut avail_features);
|
||||
|
||||
// Send set_vring_base here, since it could tell backends, like OVS + DPDK,
|
||||
// how many virt queues to be handled, which backend required to know at early stage.
|
||||
|
@ -993,7 +993,7 @@ impl DeviceManager {
|
||||
let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap {
|
||||
let tap = Tap::open_named(tap_if_name).map_err(DeviceManagerError::OpenTap)?;
|
||||
Arc::new(Mutex::new(
|
||||
vm_virtio::Net::new_with_tap(tap, Some(&net_cfg.mac), net_cfg.iommu)
|
||||
vm_virtio::Net::new_with_tap(tap, Some(net_cfg.mac), net_cfg.iommu)
|
||||
.map_err(DeviceManagerError::CreateVirtioNet)?,
|
||||
))
|
||||
} else {
|
||||
@ -1001,7 +1001,7 @@ impl DeviceManager {
|
||||
vm_virtio::Net::new(
|
||||
net_cfg.ip,
|
||||
net_cfg.mask,
|
||||
Some(&net_cfg.mac),
|
||||
Some(net_cfg.mac),
|
||||
net_cfg.iommu,
|
||||
)
|
||||
.map_err(DeviceManagerError::CreateVirtioNet)?,
|
||||
|
Loading…
x
Reference in New Issue
Block a user