2020-07-07 14:02:18 +00:00
|
|
|
// Copyright (c) 2020 Intel Corporation. All rights reserved.
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
|
|
|
|
|
2021-06-28 14:28:26 +00:00
|
|
|
use super::{register_listener, unregister_listener, vnet_hdr_len, Tap};
|
2021-06-02 19:08:04 +00:00
|
|
|
use crate::GuestMemoryMmap;
|
2021-03-17 22:41:52 +00:00
|
|
|
use rate_limiter::{RateLimiter, TokenType};
|
2020-07-07 15:50:13 +00:00
|
|
|
use std::io;
|
2020-07-07 14:02:18 +00:00
|
|
|
use std::num::Wrapping;
|
2020-07-07 15:50:13 +00:00
|
|
|
use std::os::unix::io::{AsRawFd, RawFd};
|
|
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
|
|
use std::sync::Arc;
|
2021-06-02 19:08:04 +00:00
|
|
|
use vm_memory::{Bytes, GuestAddressSpace, GuestMemory, GuestMemoryAtomic};
|
2021-02-16 21:17:37 +00:00
|
|
|
use vm_virtio::Queue;
|
2020-07-07 14:02:18 +00:00
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct TxVirtio {
|
|
|
|
pub counter_bytes: Wrapping<u64>,
|
|
|
|
pub counter_frames: Wrapping<u64>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for TxVirtio {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TxVirtio {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
TxVirtio {
|
|
|
|
counter_bytes: Wrapping(0),
|
|
|
|
counter_frames: Wrapping(0),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
pub fn process_desc_chain(
|
|
|
|
&mut self,
|
|
|
|
mem: &GuestMemoryMmap,
|
|
|
|
tap: &mut Tap,
|
|
|
|
queue: &mut Queue,
|
2021-03-17 22:41:52 +00:00
|
|
|
rate_limiter: &mut Option<RateLimiter>,
|
2021-06-28 14:28:26 +00:00
|
|
|
) -> Result<bool, NetQueuePairError> {
|
|
|
|
let mut retry_write = false;
|
2021-06-30 23:34:13 +00:00
|
|
|
let mut rate_limit_reached = false;
|
2021-06-23 20:42:17 +00:00
|
|
|
while let Some(avail_desc) = queue.iter(mem).next() {
|
2021-06-30 23:34:13 +00:00
|
|
|
if rate_limit_reached {
|
|
|
|
queue.go_to_previous_position();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-07-07 14:02:18 +00:00
|
|
|
let head_index = avail_desc.index;
|
|
|
|
let mut next_desc = Some(avail_desc);
|
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
let mut iovecs = Vec::new();
|
2020-07-07 14:02:18 +00:00
|
|
|
while let Some(desc) = next_desc {
|
2021-03-24 09:05:14 +00:00
|
|
|
if !desc.is_write_only() && desc.len > 0 {
|
2021-02-16 21:17:37 +00:00
|
|
|
let buf = mem
|
|
|
|
.get_slice(desc.addr, desc.len as usize)
|
|
|
|
.map_err(NetQueuePairError::GuestMemory)?
|
|
|
|
.as_ptr();
|
|
|
|
let iovec = libc::iovec {
|
|
|
|
iov_base: buf as *mut libc::c_void,
|
|
|
|
iov_len: desc.len as libc::size_t,
|
|
|
|
};
|
|
|
|
iovecs.push(iovec);
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
|
|
|
next_desc = desc.next_descriptor();
|
|
|
|
}
|
|
|
|
|
2021-06-30 23:34:13 +00:00
|
|
|
let len = if !iovecs.is_empty() {
|
2021-02-16 21:17:37 +00:00
|
|
|
let result = unsafe {
|
|
|
|
libc::writev(
|
|
|
|
tap.as_raw_fd() as libc::c_int,
|
|
|
|
iovecs.as_ptr() as *const libc::iovec,
|
|
|
|
iovecs.len() as libc::c_int,
|
|
|
|
)
|
|
|
|
};
|
2021-06-29 14:52:42 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
if result < 0 {
|
|
|
|
let e = std::io::Error::last_os_error();
|
2021-04-22 13:18:57 +00:00
|
|
|
|
|
|
|
/* EAGAIN */
|
|
|
|
if e.kind() == std::io::ErrorKind::WouldBlock {
|
2021-06-28 14:28:26 +00:00
|
|
|
queue.go_to_previous_position();
|
|
|
|
retry_write = true;
|
2021-04-22 13:18:57 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
error!("net: tx: failed writing to tap: {}", e);
|
2021-02-16 21:17:37 +00:00
|
|
|
return Err(NetQueuePairError::WriteTap(e));
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
self.counter_bytes += Wrapping(result as u64 - vnet_hdr_len() as u64);
|
|
|
|
self.counter_frames += Wrapping(1);
|
2021-06-30 23:34:13 +00:00
|
|
|
|
|
|
|
result as u32
|
|
|
|
} else {
|
|
|
|
0
|
|
|
|
};
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-06-23 20:42:17 +00:00
|
|
|
queue.add_used(mem, head_index, 0);
|
|
|
|
queue.update_avail_event(mem);
|
2021-06-30 23:34:13 +00:00
|
|
|
|
|
|
|
// For the sake of simplicity (similar to the RX rate limiting), we always
|
|
|
|
// let the 'last' descriptor chain go-through even if it was over the rate
|
|
|
|
// limit, and simply stop processing oncoming `avail_desc` if any.
|
|
|
|
if let Some(rate_limiter) = rate_limiter {
|
|
|
|
rate_limit_reached = !rate_limiter.consume(1, TokenType::Ops)
|
|
|
|
|| !rate_limiter.consume(len as u64, TokenType::Bytes);
|
|
|
|
}
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
2021-02-16 21:17:37 +00:00
|
|
|
|
2021-06-28 14:28:26 +00:00
|
|
|
Ok(retry_write)
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct RxVirtio {
|
|
|
|
pub counter_bytes: Wrapping<u64>,
|
|
|
|
pub counter_frames: Wrapping<u64>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for RxVirtio {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RxVirtio {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
RxVirtio {
|
|
|
|
counter_bytes: Wrapping(0),
|
|
|
|
counter_frames: Wrapping(0),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn process_desc_chain(
|
|
|
|
&mut self,
|
|
|
|
mem: &GuestMemoryMmap,
|
2021-02-16 21:17:37 +00:00
|
|
|
tap: &mut Tap,
|
2020-07-07 14:02:18 +00:00
|
|
|
queue: &mut Queue,
|
2021-03-25 21:17:05 +00:00
|
|
|
rate_limiter: &mut Option<RateLimiter>,
|
2021-02-16 21:17:37 +00:00
|
|
|
) -> Result<bool, NetQueuePairError> {
|
|
|
|
let mut exhausted_descs = true;
|
2021-03-25 21:17:05 +00:00
|
|
|
let mut rate_limit_reached = false;
|
|
|
|
|
2021-06-23 20:42:17 +00:00
|
|
|
while let Some(avail_desc) = queue.iter(mem).next() {
|
2021-03-25 21:17:05 +00:00
|
|
|
if rate_limit_reached {
|
|
|
|
exhausted_descs = false;
|
|
|
|
queue.go_to_previous_position();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
let head_index = avail_desc.index;
|
|
|
|
let num_buffers_addr = mem.checked_offset(avail_desc.addr, 10).unwrap();
|
|
|
|
let mut next_desc = Some(avail_desc);
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
let mut iovecs = Vec::new();
|
|
|
|
while let Some(desc) = next_desc {
|
2021-03-24 09:05:14 +00:00
|
|
|
if desc.is_write_only() && desc.len > 0 {
|
2021-02-16 21:17:37 +00:00
|
|
|
let buf = mem
|
|
|
|
.get_slice(desc.addr, desc.len as usize)
|
|
|
|
.map_err(NetQueuePairError::GuestMemory)?
|
|
|
|
.as_ptr();
|
|
|
|
let iovec = libc::iovec {
|
|
|
|
iov_base: buf as *mut libc::c_void,
|
|
|
|
iov_len: desc.len as libc::size_t,
|
2020-07-07 14:02:18 +00:00
|
|
|
};
|
2021-02-16 21:17:37 +00:00
|
|
|
iovecs.push(iovec);
|
|
|
|
}
|
|
|
|
next_desc = desc.next_descriptor();
|
|
|
|
}
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
let len = if !iovecs.is_empty() {
|
|
|
|
let result = unsafe {
|
|
|
|
libc::readv(
|
|
|
|
tap.as_raw_fd() as libc::c_int,
|
|
|
|
iovecs.as_ptr() as *const libc::iovec,
|
|
|
|
iovecs.len() as libc::c_int,
|
|
|
|
)
|
|
|
|
};
|
|
|
|
if result < 0 {
|
|
|
|
let e = std::io::Error::last_os_error();
|
|
|
|
exhausted_descs = false;
|
|
|
|
queue.go_to_previous_position();
|
|
|
|
|
2021-04-22 13:18:57 +00:00
|
|
|
/* EAGAIN */
|
|
|
|
if e.kind() == std::io::ErrorKind::WouldBlock {
|
|
|
|
break;
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
2021-02-16 21:17:37 +00:00
|
|
|
|
|
|
|
error!("net: rx: failed reading from tap: {}", e);
|
|
|
|
return Err(NetQueuePairError::ReadTap(e));
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
// Write num_buffers to guest memory. We simply write 1 as we
|
|
|
|
// never spread the frame over more than one descriptor chain.
|
|
|
|
mem.write_obj(1u16, num_buffers_addr)
|
|
|
|
.map_err(NetQueuePairError::GuestMemory)?;
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
self.counter_bytes += Wrapping(result as u64 - vnet_hdr_len() as u64);
|
|
|
|
self.counter_frames += Wrapping(1);
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
result as u32
|
|
|
|
} else {
|
|
|
|
0
|
|
|
|
};
|
2020-07-07 14:02:18 +00:00
|
|
|
|
2021-06-23 20:42:17 +00:00
|
|
|
queue.add_used(mem, head_index, len);
|
|
|
|
queue.update_avail_event(mem);
|
2021-03-25 21:17:05 +00:00
|
|
|
|
|
|
|
// For the sake of simplicity (keeping the handling of RX_QUEUE_EVENT and
|
|
|
|
// RX_TAP_EVENT totally asynchronous), we always let the 'last' descriptor
|
|
|
|
// chain go-through even if it was over the rate limit, and simply stop
|
|
|
|
// processing oncoming `avail_desc` if any.
|
|
|
|
if let Some(rate_limiter) = rate_limiter {
|
|
|
|
rate_limit_reached = !rate_limiter.consume(1, TokenType::Ops)
|
|
|
|
|| !rate_limiter.consume(len as u64, TokenType::Bytes);
|
|
|
|
}
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
2021-02-16 21:17:37 +00:00
|
|
|
|
|
|
|
Ok(exhausted_descs)
|
2020-07-07 14:02:18 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-07 15:50:13 +00:00
|
|
|
|
|
|
|
#[derive(Default, Clone)]
|
|
|
|
pub struct NetCounters {
|
|
|
|
pub tx_bytes: Arc<AtomicU64>,
|
|
|
|
pub tx_frames: Arc<AtomicU64>,
|
|
|
|
pub rx_bytes: Arc<AtomicU64>,
|
|
|
|
pub rx_frames: Arc<AtomicU64>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum NetQueuePairError {
|
|
|
|
/// No memory configured
|
|
|
|
NoMemoryConfigured,
|
|
|
|
/// Error registering listener
|
|
|
|
RegisterListener(io::Error),
|
|
|
|
/// Error unregistering listener
|
|
|
|
UnregisterListener(io::Error),
|
2021-02-16 21:17:37 +00:00
|
|
|
/// Error writing to the TAP device
|
|
|
|
WriteTap(io::Error),
|
2020-07-07 15:50:13 +00:00
|
|
|
/// Error reading from the TAP device
|
2021-02-16 21:17:37 +00:00
|
|
|
ReadTap(io::Error),
|
|
|
|
/// Error related to guest memory
|
|
|
|
GuestMemory(vm_memory::GuestMemoryError),
|
2020-07-07 15:50:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct NetQueuePair {
|
|
|
|
pub mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
|
|
|
|
pub tap: Tap,
|
2021-06-28 14:28:26 +00:00
|
|
|
// With epoll each FD must be unique. So in order to filter the
|
|
|
|
// events we need to get a second FD responding to the original
|
|
|
|
// device so that we can send EPOLLOUT and EPOLLIN to separate
|
|
|
|
// events.
|
|
|
|
pub tap_for_write_epoll: Tap,
|
2020-07-07 15:50:13 +00:00
|
|
|
pub rx: RxVirtio,
|
|
|
|
pub tx: TxVirtio,
|
|
|
|
pub epoll_fd: Option<RawFd>,
|
|
|
|
pub rx_tap_listening: bool,
|
2021-06-28 14:28:26 +00:00
|
|
|
pub tx_tap_listening: bool,
|
2020-07-07 15:50:13 +00:00
|
|
|
pub counters: NetCounters,
|
2021-06-28 13:51:34 +00:00
|
|
|
pub tap_rx_event_id: u16,
|
2021-06-28 14:28:26 +00:00
|
|
|
pub tap_tx_event_id: u16,
|
2021-03-25 21:17:05 +00:00
|
|
|
pub rx_desc_avail: bool,
|
|
|
|
pub rx_rate_limiter: Option<RateLimiter>,
|
2021-03-17 22:41:52 +00:00
|
|
|
pub tx_rate_limiter: Option<RateLimiter>,
|
2020-07-07 15:50:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl NetQueuePair {
|
2021-10-19 14:01:42 +00:00
|
|
|
pub fn process_tx(&mut self, queue: &mut Queue) -> Result<bool, NetQueuePairError> {
|
2020-07-07 15:50:13 +00:00
|
|
|
let mem = self
|
|
|
|
.mem
|
|
|
|
.as_ref()
|
|
|
|
.ok_or(NetQueuePairError::NoMemoryConfigured)
|
|
|
|
.map(|m| m.memory())?;
|
|
|
|
|
2021-10-19 14:01:42 +00:00
|
|
|
let tx_tap_retry =
|
|
|
|
self.tx
|
|
|
|
.process_desc_chain(&mem, &mut self.tap, queue, &mut self.tx_rate_limiter)?;
|
2021-06-28 14:28:26 +00:00
|
|
|
|
|
|
|
// We got told to try again when writing to the tap. Wait for the TAP to be writable
|
|
|
|
if tx_tap_retry && !self.tx_tap_listening {
|
|
|
|
register_listener(
|
|
|
|
self.epoll_fd.unwrap(),
|
|
|
|
self.tap_for_write_epoll.as_raw_fd(),
|
|
|
|
epoll::Events::EPOLLOUT,
|
|
|
|
u64::from(self.tap_tx_event_id),
|
|
|
|
)
|
|
|
|
.map_err(NetQueuePairError::RegisterListener)?;
|
|
|
|
self.tx_tap_listening = true;
|
|
|
|
info!("Writing to TAP returned EAGAIN. Listening for TAP to become writable.");
|
|
|
|
} else if !tx_tap_retry && self.tx_tap_listening {
|
|
|
|
unregister_listener(
|
|
|
|
self.epoll_fd.unwrap(),
|
|
|
|
self.tap_for_write_epoll.as_raw_fd(),
|
|
|
|
epoll::Events::EPOLLOUT,
|
|
|
|
u64::from(self.tap_tx_event_id),
|
|
|
|
)
|
|
|
|
.map_err(NetQueuePairError::UnregisterListener)?;
|
|
|
|
self.tx_tap_listening = false;
|
|
|
|
info!("Writing to TAP succeeded. No longer listening for TAP to become writable.");
|
|
|
|
}
|
2020-07-07 15:50:13 +00:00
|
|
|
|
|
|
|
self.counters
|
2021-02-16 21:17:37 +00:00
|
|
|
.tx_bytes
|
|
|
|
.fetch_add(self.tx.counter_bytes.0, Ordering::AcqRel);
|
2020-07-07 15:50:13 +00:00
|
|
|
self.counters
|
2021-02-16 21:17:37 +00:00
|
|
|
.tx_frames
|
|
|
|
.fetch_add(self.tx.counter_frames.0, Ordering::AcqRel);
|
|
|
|
self.tx.counter_bytes = Wrapping(0);
|
|
|
|
self.tx.counter_frames = Wrapping(0);
|
2020-07-07 15:50:13 +00:00
|
|
|
|
2021-02-16 21:17:37 +00:00
|
|
|
Ok(queue.needs_notification(&mem, queue.next_used))
|
2020-07-07 15:50:13 +00:00
|
|
|
}
|
|
|
|
|
2021-10-19 14:01:42 +00:00
|
|
|
pub fn process_rx(&mut self, queue: &mut Queue) -> Result<bool, NetQueuePairError> {
|
2021-02-16 21:17:37 +00:00
|
|
|
let mem = self
|
|
|
|
.mem
|
|
|
|
.as_ref()
|
|
|
|
.ok_or(NetQueuePairError::NoMemoryConfigured)
|
|
|
|
.map(|m| m.memory())?;
|
|
|
|
|
2021-10-19 14:01:42 +00:00
|
|
|
self.rx_desc_avail =
|
|
|
|
!self
|
|
|
|
.rx
|
|
|
|
.process_desc_chain(&mem, &mut self.tap, queue, &mut self.rx_rate_limiter)?;
|
2021-03-25 21:17:05 +00:00
|
|
|
let rate_limit_reached = self
|
|
|
|
.rx_rate_limiter
|
|
|
|
.as_ref()
|
|
|
|
.map_or(false, |r| r.is_blocked());
|
|
|
|
|
|
|
|
// Stop listening on the `RX_TAP_EVENT` when:
|
|
|
|
// 1) there is no available describles, or
|
|
|
|
// 2) the RX rate limit is reached.
|
|
|
|
if self.rx_tap_listening && (!self.rx_desc_avail || rate_limit_reached) {
|
2021-02-16 21:17:37 +00:00
|
|
|
unregister_listener(
|
2020-07-07 15:50:13 +00:00
|
|
|
self.epoll_fd.unwrap(),
|
|
|
|
self.tap.as_raw_fd(),
|
|
|
|
epoll::Events::EPOLLIN,
|
2021-06-28 13:51:34 +00:00
|
|
|
u64::from(self.tap_rx_event_id),
|
2020-07-07 15:50:13 +00:00
|
|
|
)
|
2021-02-16 21:17:37 +00:00
|
|
|
.map_err(NetQueuePairError::UnregisterListener)?;
|
|
|
|
self.rx_tap_listening = false;
|
2020-07-07 15:50:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
self.counters
|
2021-02-16 21:17:37 +00:00
|
|
|
.rx_bytes
|
|
|
|
.fetch_add(self.rx.counter_bytes.0, Ordering::AcqRel);
|
2020-07-07 15:50:13 +00:00
|
|
|
self.counters
|
2021-02-16 21:17:37 +00:00
|
|
|
.rx_frames
|
|
|
|
.fetch_add(self.rx.counter_frames.0, Ordering::AcqRel);
|
|
|
|
self.rx.counter_bytes = Wrapping(0);
|
|
|
|
self.rx.counter_frames = Wrapping(0);
|
2020-07-07 15:50:13 +00:00
|
|
|
|
|
|
|
Ok(queue.needs_notification(&mem, queue.next_used))
|
|
|
|
}
|
|
|
|
}
|