diff --git a/net_util/src/queue_pair.rs b/net_util/src/queue_pair.rs index be0b80de0..d5c6a4816 100644 --- a/net_util/src/queue_pair.rs +++ b/net_util/src/queue_pair.rs @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use super::{unregister_listener, vnet_hdr_len, Tap}; +use super::{register_listener, unregister_listener, vnet_hdr_len, Tap}; use crate::GuestMemoryMmap; use rate_limiter::{RateLimiter, TokenType}; use std::io; @@ -39,7 +39,8 @@ impl TxVirtio { tap: &mut Tap, queue: &mut Queue, rate_limiter: &mut Option, - ) -> Result<(), NetQueuePairError> { + ) -> Result { + let mut retry_write = false; while let Some(avail_desc) = queue.iter(mem).next() { let head_index = avail_desc.index; let mut next_desc = Some(avail_desc); @@ -93,11 +94,12 @@ impl TxVirtio { }; if result < 0 { let e = std::io::Error::last_os_error(); - queue.go_to_previous_position(); /* EAGAIN */ if e.kind() == std::io::ErrorKind::WouldBlock { warn!("net: tx: (recoverable) failed writing to tap: {}", e); + queue.go_to_previous_position(); + retry_write = true; break; } error!("net: tx: failed writing to tap: {}", e); @@ -112,7 +114,7 @@ impl TxVirtio { queue.update_avail_event(mem); } - Ok(()) + Ok(retry_write) } } @@ -252,12 +254,19 @@ pub enum NetQueuePairError { pub struct NetQueuePair { pub mem: Option>, pub tap: Tap, + // With epoll each FD must be unique. So in order to filter the + // events we need to get a second FD responding to the original + // device so that we can send EPOLLOUT and EPOLLIN to separate + // events. + pub tap_for_write_epoll: Tap, pub rx: RxVirtio, pub tx: TxVirtio, pub epoll_fd: Option, pub rx_tap_listening: bool, + pub tx_tap_listening: bool, pub counters: NetCounters, pub tap_rx_event_id: u16, + pub tap_tx_event_id: u16, pub rx_desc_avail: bool, pub rx_rate_limiter: Option, pub tx_rate_limiter: Option, @@ -271,8 +280,35 @@ impl NetQueuePair { .ok_or(NetQueuePairError::NoMemoryConfigured) .map(|m| m.memory())?; - self.tx - .process_desc_chain(&mem, &mut self.tap, &mut queue, &mut self.tx_rate_limiter)?; + let tx_tap_retry = self.tx.process_desc_chain( + &mem, + &mut self.tap, + &mut queue, + &mut self.tx_rate_limiter, + )?; + + // We got told to try again when writing to the tap. Wait for the TAP to be writable + if tx_tap_retry && !self.tx_tap_listening { + register_listener( + self.epoll_fd.unwrap(), + self.tap_for_write_epoll.as_raw_fd(), + epoll::Events::EPOLLOUT, + u64::from(self.tap_tx_event_id), + ) + .map_err(NetQueuePairError::RegisterListener)?; + self.tx_tap_listening = true; + info!("Writing to TAP returned EAGAIN. Listening for TAP to become writable."); + } else if !tx_tap_retry && self.tx_tap_listening { + unregister_listener( + self.epoll_fd.unwrap(), + self.tap_for_write_epoll.as_raw_fd(), + epoll::Events::EPOLLOUT, + u64::from(self.tap_tx_event_id), + ) + .map_err(NetQueuePairError::UnregisterListener)?; + self.tx_tap_listening = false; + info!("Writing to TAP succeeded. No longer listening for TAP to become writable."); + } self.counters .tx_bytes diff --git a/vhost_user_net/src/lib.rs b/vhost_user_net/src/lib.rs index 03382bab2..ec860c37b 100644 --- a/vhost_user_net/src/lib.rs +++ b/vhost_user_net/src/lib.rs @@ -83,13 +83,16 @@ impl VhostUserNetThread { kill_evt: EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?, net: NetQueuePair { mem: None, + tap_for_write_epoll: tap.clone(), tap, rx: RxVirtio::new(), tx: TxVirtio::new(), rx_tap_listening: false, + tx_tap_listening: false, epoll_fd: None, counters: NetCounters::default(), tap_rx_event_id: 2, + tap_tx_event_id: 3, rx_desc_avail: false, rx_rate_limiter: None, tx_rate_limiter: None, @@ -193,14 +196,10 @@ impl VhostUserBackend for VhostUserNetBackend { fn handle_event( &self, device_event: u16, - evset: epoll::Events, + _evset: epoll::Events, vrings: &[Arc>], thread_id: usize, ) -> VhostUserBackendResult { - if evset != epoll::Events::EPOLLIN { - return Err(Error::HandleEventNotEpollIn.into()); - } - let mut thread = self.threads[thread_id].lock().unwrap(); match device_event { 0 => { @@ -215,7 +214,7 @@ impl VhostUserBackend for VhostUserNetBackend { thread.net.rx_tap_listening = true; } } - 1 => { + 1 | 3 => { let mut vring = vrings[1].write().unwrap(); if thread .net diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index a54e3917c..15e1ebf0d 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -98,10 +98,12 @@ pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; // A frame is available for reading from the tap device to receive in the guest. pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3; +// The TAP can be written to. Used after an EAGAIN error to retry TX. +pub const TX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4; // New 'wake up' event from the rx rate limiter -pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4; +pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5; // New 'wake up' event from the tx rate limiter -pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5; +pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6; #[derive(Debug)] pub enum Error { @@ -186,11 +188,6 @@ impl NetEpollHandler { } fn handle_tx_event(&mut self) -> result::Result<(), DeviceError> { - let queue_evt = &self.queue_evt_pair[1]; - if let Err(e) = queue_evt.read() { - error!("Failed to get tx queue event: {:?}", e); - } - let rate_limit_reached = self .net .tx_rate_limiter @@ -266,12 +263,22 @@ impl EpollHelperHandler for NetEpollHandler { } } TX_QUEUE_EVENT => { + let queue_evt = &self.queue_evt_pair[1]; + if let Err(e) = queue_evt.read() { + error!("Failed to get tx queue event: {:?}", e); + } self.driver_awake = true; if let Err(e) = self.handle_tx_event() { error!("Error processing TX queue: {:?}", e); return true; } } + TX_TAP_EVENT => { + if let Err(e) = self.handle_tx_event() { + error!("Error processing TX queue (TAP event): {:?}", e); + return true; + } + } RX_TAP_EVENT => { if let Err(e) = self.handle_rx_tap_event() { error!("Error processing tap queue: {:?}", e); @@ -621,13 +628,16 @@ impl VirtioDevice for Net { let mut handler = NetEpollHandler { net: NetQueuePair { mem: Some(mem.clone()), + tap_for_write_epoll: tap.clone(), tap, rx, tx, epoll_fd: None, rx_tap_listening, + tx_tap_listening: false, counters: self.counters.clone(), tap_rx_event_id: RX_TAP_EVENT, + tap_tx_event_id: TX_TAP_EVENT, rx_desc_avail: false, rx_rate_limiter, tx_rate_limiter,