mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-02-01 17:35:19 +00:00
virtio-devices: Add rate limiter for the RX queue of virtio-net
Fixes: #1286 Signed-off-by: Bo Chen <chen.bo@intel.com>
This commit is contained in:
parent
5d8de62362
commit
32ad4982dd
@ -1,13 +1,16 @@
|
||||
# I/O Throttling
|
||||
|
||||
Cloud Hypervisor now supports I/O throttling on virtio-block
|
||||
Cloud Hypervisor now supports I/O throttling on virtio-block and virtio-net
|
||||
devices. This support is based on the [`rate-limiter` module](https://github.com/firecracker-microvm/firecracker/tree/master/src/rate_limiter)
|
||||
from Firecracker. This document explains the user interface of this
|
||||
feature, and highlights some internal implementations that can help users
|
||||
better understand the expected behavior of I/O throttling in practice.
|
||||
|
||||
Cloud Hypervisor allows to limit both the I/O bandwidth (e.g. bytes/s)
|
||||
and I/O operations (ops/s) independently. To limit the I/O bandwidth, it
|
||||
and I/O operations (ops/s) independently. For virtio-net devices, while
|
||||
sharing the same "rate limit" from user inputs (on both bandwidth and
|
||||
operations), the RX and TX queues are throttled independently.
|
||||
To limit the I/O bandwidth, Cloud Hypervisor
|
||||
provides three user options, i.e., `bw_size` (bytes), `bw_one_time_burst`
|
||||
(bytes), and `bw_refill_time` (ms). Both `bw_size` and `bw_refill_time`
|
||||
are required, while `bw_one_time_burst` is optional.
|
||||
@ -29,8 +32,8 @@ empty, it will stop I/O operations for a fixed amount of time
|
||||
(`cool_down_time`). The `cool_down_time` now is fixed at `100 ms`, it
|
||||
can have big implications to the actual rate limit (which can be a lot
|
||||
different the expected "refill-rate" derived from user inputs). For
|
||||
example, to have a 1000 IOPS limit, users should be able to provide
|
||||
either of the following two options:
|
||||
example, to have a 1000 IOPS limit on a virtio-blk device, users should
|
||||
be able to provide either of the following two options:
|
||||
`ops_size=1000,ops_refill_time=1000` or
|
||||
`ops_size=10,ops_refill_time=10`. However, the actual IOPS limits are
|
||||
likely to be ~1000 IOPS and ~100 IOPS respectively. The reason is the
|
||||
|
@ -134,9 +134,18 @@ impl RxVirtio {
|
||||
mem: &GuestMemoryMmap,
|
||||
tap: &mut Tap,
|
||||
queue: &mut Queue,
|
||||
rate_limiter: &mut Option<RateLimiter>,
|
||||
) -> Result<bool, NetQueuePairError> {
|
||||
let mut exhausted_descs = true;
|
||||
let mut rate_limit_reached = false;
|
||||
|
||||
while let Some(avail_desc) = queue.iter(&mem).next() {
|
||||
if rate_limit_reached {
|
||||
exhausted_descs = false;
|
||||
queue.go_to_previous_position();
|
||||
break;
|
||||
}
|
||||
|
||||
let head_index = avail_desc.index;
|
||||
let num_buffers_addr = mem.checked_offset(avail_desc.addr, 10).unwrap();
|
||||
let mut next_desc = Some(avail_desc);
|
||||
@ -195,6 +204,15 @@ impl RxVirtio {
|
||||
|
||||
queue.add_used(&mem, head_index, len);
|
||||
queue.update_avail_event(&mem);
|
||||
|
||||
// For the sake of simplicity (keeping the handling of RX_QUEUE_EVENT and
|
||||
// RX_TAP_EVENT totally asynchronous), we always let the 'last' descriptor
|
||||
// chain go-through even if it was over the rate limit, and simply stop
|
||||
// processing oncoming `avail_desc` if any.
|
||||
if let Some(rate_limiter) = rate_limiter {
|
||||
rate_limit_reached = !rate_limiter.consume(1, TokenType::Ops)
|
||||
|| !rate_limiter.consume(len as u64, TokenType::Bytes);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(exhausted_descs)
|
||||
@ -234,6 +252,8 @@ pub struct NetQueuePair {
|
||||
pub rx_tap_listening: bool,
|
||||
pub counters: NetCounters,
|
||||
pub tap_event_id: u16,
|
||||
pub rx_desc_avail: bool,
|
||||
pub rx_rate_limiter: Option<RateLimiter>,
|
||||
pub tx_rate_limiter: Option<RateLimiter>,
|
||||
}
|
||||
|
||||
@ -267,11 +287,21 @@ impl NetQueuePair {
|
||||
.ok_or(NetQueuePairError::NoMemoryConfigured)
|
||||
.map(|m| m.memory())?;
|
||||
|
||||
if self
|
||||
.rx
|
||||
.process_desc_chain(&mem, &mut self.tap, &mut queue)?
|
||||
&& self.rx_tap_listening
|
||||
{
|
||||
self.rx_desc_avail = !self.rx.process_desc_chain(
|
||||
&mem,
|
||||
&mut self.tap,
|
||||
&mut queue,
|
||||
&mut self.rx_rate_limiter,
|
||||
)?;
|
||||
let rate_limit_reached = self
|
||||
.rx_rate_limiter
|
||||
.as_ref()
|
||||
.map_or(false, |r| r.is_blocked());
|
||||
|
||||
// Stop listening on the `RX_TAP_EVENT` when:
|
||||
// 1) there is no available describles, or
|
||||
// 2) the RX rate limit is reached.
|
||||
if self.rx_tap_listening && (!self.rx_desc_avail || rate_limit_reached) {
|
||||
unregister_listener(
|
||||
self.epoll_fd.unwrap(),
|
||||
self.tap.as_raw_fd(),
|
||||
|
@ -111,6 +111,8 @@ impl VhostUserNetThread {
|
||||
epoll_fd: None,
|
||||
counters: NetCounters::default(),
|
||||
tap_event_id: 2,
|
||||
rx_desc_avail: false,
|
||||
rx_rate_limiter: None,
|
||||
tx_rate_limiter: None,
|
||||
},
|
||||
})
|
||||
|
@ -46,8 +46,10 @@ pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
|
||||
pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
|
||||
// A frame is available for reading from the tap device to receive in the guest.
|
||||
pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
|
||||
// New 'wake up' event from the rx rate limiter
|
||||
pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
|
||||
// New 'wake up' event from the tx rate limiter
|
||||
pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
|
||||
pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
@ -90,7 +92,16 @@ impl NetEpollHandler {
|
||||
error!("Failed to get rx queue event: {:?}", e);
|
||||
}
|
||||
|
||||
if !self.net.rx_tap_listening {
|
||||
self.net.rx_desc_avail = true;
|
||||
|
||||
let rate_limit_reached = self
|
||||
.net
|
||||
.rx_rate_limiter
|
||||
.as_ref()
|
||||
.map_or(false, |r| r.is_blocked());
|
||||
|
||||
// Start to listen on RX_TAP_EVENT only when the rate limit is not reached
|
||||
if !self.net.rx_tap_listening && !rate_limit_reached {
|
||||
net_util::register_listener(
|
||||
self.net.epoll_fd.unwrap(),
|
||||
self.net.tap.as_raw_fd(),
|
||||
@ -161,6 +172,9 @@ impl NetEpollHandler {
|
||||
let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
|
||||
helper.add_event(self.queue_evt_pair[0].as_raw_fd(), RX_QUEUE_EVENT)?;
|
||||
helper.add_event(self.queue_evt_pair[1].as_raw_fd(), TX_QUEUE_EVENT)?;
|
||||
if let Some(rate_limiter) = &self.net.rx_rate_limiter {
|
||||
helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
|
||||
}
|
||||
if let Some(rate_limiter) = &self.net.tx_rate_limiter {
|
||||
helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
|
||||
}
|
||||
@ -209,6 +223,35 @@ impl EpollHelperHandler for NetEpollHandler {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
RX_RATE_LIMITER_EVENT => {
|
||||
if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
|
||||
// Upon rate limiter event, call the rate limiter handler and register the
|
||||
// TAP fd for further processing if some RX buffers are available
|
||||
match rate_limiter.event_handler() {
|
||||
Ok(_) => {
|
||||
if !self.net.rx_tap_listening && self.net.rx_desc_avail {
|
||||
if let Err(e) = net_util::register_listener(
|
||||
self.net.epoll_fd.unwrap(),
|
||||
self.net.tap.as_raw_fd(),
|
||||
epoll::Events::EPOLLIN,
|
||||
u64::from(self.net.tap_event_id),
|
||||
) {
|
||||
error!("Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}", e);
|
||||
return true;
|
||||
}
|
||||
self.net.rx_tap_listening = true;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Error from 'rate_limiter.event_handler()': {:?}", e);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("Unexpected RX_RATE_LIMITER_EVENT");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
TX_RATE_LIMITER_EVENT => {
|
||||
if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
|
||||
// Upon rate limiter event, call the rate limiter handler
|
||||
@ -527,6 +570,12 @@ impl VirtioDevice for Net {
|
||||
ActivateError::BadActivate
|
||||
})?;
|
||||
|
||||
let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
|
||||
.rate_limiter_config
|
||||
.map(RateLimiterConfig::try_into)
|
||||
.transpose()
|
||||
.map_err(ActivateError::CreateRateLimiter)?;
|
||||
|
||||
let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
|
||||
.rate_limiter_config
|
||||
.map(RateLimiterConfig::try_into)
|
||||
@ -543,6 +592,8 @@ impl VirtioDevice for Net {
|
||||
rx_tap_listening,
|
||||
counters: self.counters.clone(),
|
||||
tap_event_id: RX_TAP_EVENT,
|
||||
rx_desc_avail: false,
|
||||
rx_rate_limiter,
|
||||
tx_rate_limiter,
|
||||
},
|
||||
queue_pair,
|
||||
|
Loading…
x
Reference in New Issue
Block a user