virtio-devices: Add rate limiter for the RX queue of virtio-net

Fixes: #1286 Signed-off-by: Bo Chen <chen.bo@intel.com>
2025-03-07 17:26:14 +00:00 · 2021-03-25 14:17:05 -07:00 · 2021-03-25 14:17:05 -07:00 · 32ad4982dd
commit 32ad4982dd
parent 5d8de62362
4 changed files with 97 additions and 11 deletions
--- a/docs/io_throttling.md
+++ b/docs/io_throttling.md
@ -1,13 +1,16 @@
 # I/O Throttling
-Cloud Hypervisor now supports I/O throttling on virtio-block
+Cloud Hypervisor now supports I/O throttling on virtio-block and virtio-net
 devices. This support is based on the [`rate-limiter` module](https://github.com/firecracker-microvm/firecracker/tree/master/src/rate_limiter)
 from Firecracker. This document explains the user interface of this
 feature, and highlights some internal implementations that can help users
 better understand the expected behavior of I/O throttling in practice.
 Cloud Hypervisor allows to limit both the I/O bandwidth (e.g. bytes/s)
-and I/O operations (ops/s) independently. To limit the I/O bandwidth, it
+and I/O operations (ops/s) independently. For virtio-net devices, while
 sharing the same "rate limit" from user inputs (on both bandwidth and
 operations), the RX and TX queues are throttled independently.
 To limit the I/O bandwidth, Cloud Hypervisor
 provides three user options, i.e., `bw_size` (bytes), `bw_one_time_burst`
 (bytes), and `bw_refill_time` (ms). Both `bw_size` and `bw_refill_time`
 are required, while `bw_one_time_burst` is optional.
@ -29,8 +32,8 @@ empty, it will stop I/O operations for a fixed amount of time
 (`cool_down_time`). The `cool_down_time` now is fixed at `100 ms`, it
 can have big implications to the actual rate limit (which can be a lot
 different the expected "refill-rate" derived from user inputs). For
-example, to have a 1000 IOPS limit, users should be able to provide
+example, to have a 1000 IOPS limit on a virtio-blk device, users should
-either of the following two options:
+be able to provide either of the following two options:
 `ops_size=1000,ops_refill_time=1000` or
 `ops_size=10,ops_refill_time=10`. However, the actual IOPS limits are
 likely to be ~1000 IOPS and ~100 IOPS respectively. The reason is the
--- a/net_util/src/queue_pair.rs
+++ b/net_util/src/queue_pair.rs
@ -134,9 +134,18 @@ impl RxVirtio {
        mem: &GuestMemoryMmap,
        tap: &mut Tap,
        queue: &mut Queue,
        rate_limiter: &mut Option<RateLimiter>,
    ) -> Result<bool, NetQueuePairError> {
        let mut exhausted_descs = true;
        let mut rate_limit_reached = false;
        while let Some(avail_desc) = queue.iter(&mem).next() {
            if rate_limit_reached {
                exhausted_descs = false;
                queue.go_to_previous_position();
                break;
            }
            let head_index = avail_desc.index;
            let num_buffers_addr = mem.checked_offset(avail_desc.addr, 10).unwrap();
            let mut next_desc = Some(avail_desc);
@ -195,6 +204,15 @@ impl RxVirtio {
            queue.add_used(&mem, head_index, len);
            queue.update_avail_event(&mem);
            // For the sake of simplicity (keeping the handling of RX_QUEUE_EVENT and
            // RX_TAP_EVENT totally asynchronous), we always let the 'last' descriptor
            // chain go-through even if it was over the rate limit, and simply stop
            // processing oncoming `avail_desc` if any.
            if let Some(rate_limiter) = rate_limiter {
                rate_limit_reached = !rate_limiter.consume(1, TokenType::Ops)
                    || !rate_limiter.consume(len as u64, TokenType::Bytes);
            }
        }
        Ok(exhausted_descs)
@ -234,6 +252,8 @@ pub struct NetQueuePair {
    pub rx_tap_listening: bool,
    pub counters: NetCounters,
    pub tap_event_id: u16,
    pub rx_desc_avail: bool,
    pub rx_rate_limiter: Option<RateLimiter>,
    pub tx_rate_limiter: Option<RateLimiter>,
 }
@ -267,11 +287,21 @@ impl NetQueuePair {
            .ok_or(NetQueuePairError::NoMemoryConfigured)
            .map(|m| m.memory())?;
-        if self
+        self.rx_desc_avail = !self.rx.process_desc_chain(
-            .rx
+            &mem,
-            .process_desc_chain(&mem, &mut self.tap, &mut queue)?
+            &mut self.tap,
-            && self.rx_tap_listening
+            &mut queue,
-        {
+            &mut self.rx_rate_limiter,
        )?;
        let rate_limit_reached = self
            .rx_rate_limiter
            .as_ref()
            .map_or(false, |r| r.is_blocked());
        // Stop listening on the `RX_TAP_EVENT` when:
        // 1) there is no available describles, or
        // 2) the RX rate limit is reached.
        if self.rx_tap_listening && (!self.rx_desc_avail || rate_limit_reached) {
            unregister_listener(
                self.epoll_fd.unwrap(),
                self.tap.as_raw_fd(),
--- a/vhost_user_net/src/lib.rs
+++ b/vhost_user_net/src/lib.rs
@ -111,6 +111,8 @@ impl VhostUserNetThread {
                epoll_fd: None,
                counters: NetCounters::default(),
                tap_event_id: 2,
                rx_desc_avail: false,
                rx_rate_limiter: None,
                tx_rate_limiter: None,
            },
        })
--- a/virtio-devices/src/net.rs
+++ b/virtio-devices/src/net.rs
@ -46,8 +46,10 @@ pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
 pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
 // A frame is available for reading from the tap device to receive in the guest.
 pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
 // New 'wake up' event from the rx rate limiter
 pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
 // New 'wake up' event from the tx rate limiter
-pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
+pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
 #[derive(Debug)]
 pub enum Error {
@ -90,7 +92,16 @@ impl NetEpollHandler {
            error!("Failed to get rx queue event: {:?}", e);
        }
-        if !self.net.rx_tap_listening {
+        self.net.rx_desc_avail = true;
        let rate_limit_reached = self
            .net
            .rx_rate_limiter
            .as_ref()
            .map_or(false, |r| r.is_blocked());
        // Start to listen on RX_TAP_EVENT only when the rate limit is not reached
        if !self.net.rx_tap_listening && !rate_limit_reached {
            net_util::register_listener(
                self.net.epoll_fd.unwrap(),
                self.net.tap.as_raw_fd(),
@ -161,6 +172,9 @@ impl NetEpollHandler {
        let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
        helper.add_event(self.queue_evt_pair[0].as_raw_fd(), RX_QUEUE_EVENT)?;
        helper.add_event(self.queue_evt_pair[1].as_raw_fd(), TX_QUEUE_EVENT)?;
        if let Some(rate_limiter) = &self.net.rx_rate_limiter {
            helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
        }
        if let Some(rate_limiter) = &self.net.tx_rate_limiter {
            helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
        }
@ -209,6 +223,35 @@ impl EpollHelperHandler for NetEpollHandler {
                    return true;
                }
            }
            RX_RATE_LIMITER_EVENT => {
                if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
                    // Upon rate limiter event, call the rate limiter handler and register the
                    // TAP fd for further processing if some RX buffers are available
                    match rate_limiter.event_handler() {
                        Ok(_) => {
                            if !self.net.rx_tap_listening && self.net.rx_desc_avail {
                                if let Err(e) = net_util::register_listener(
                                    self.net.epoll_fd.unwrap(),
                                    self.net.tap.as_raw_fd(),
                                    epoll::Events::EPOLLIN,
                                    u64::from(self.net.tap_event_id),
                                ) {
                                    error!("Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}", e);
                                    return true;
                                }
                                self.net.rx_tap_listening = true;
                            }
                        }
                        Err(e) => {
                            error!("Error from 'rate_limiter.event_handler()': {:?}", e);
                            return true;
                        }
                    }
                } else {
                    error!("Unexpected RX_RATE_LIMITER_EVENT");
                    return true;
                }
            }
            TX_RATE_LIMITER_EVENT => {
                if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
                    // Upon rate limiter event, call the rate limiter handler
@ -527,6 +570,12 @@ impl VirtioDevice for Net {
                        ActivateError::BadActivate
                    })?;
                let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
                    .rate_limiter_config
                    .map(RateLimiterConfig::try_into)
                    .transpose()
                    .map_err(ActivateError::CreateRateLimiter)?;
                let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
                    .rate_limiter_config
                    .map(RateLimiterConfig::try_into)
@ -543,6 +592,8 @@ impl VirtioDevice for Net {
                        rx_tap_listening,
                        counters: self.counters.clone(),
                        tap_event_id: RX_TAP_EVENT,
                        rx_desc_avail: false,
                        rx_rate_limiter,
                        tx_rate_limiter,
                    },
                    queue_pair,