From 5c06b7f86207bf74dc1023f9f49242e2222f8818 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Mon, 17 Feb 2020 08:52:25 -0500 Subject: [PATCH] vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez --- src/bin/vhost_user_blk.rs | 2 +- src/main.rs | 2 +- vhost_user_block/src/lib.rs | 80 ++++++++++++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/bin/vhost_user_blk.rs b/src/bin/vhost_user_blk.rs index fd7f7d6dc..0e8f3b8c2 100644 --- a/src/bin/vhost_user_blk.rs +++ b/src/bin/vhost_user_blk.rs @@ -26,7 +26,7 @@ fn main() { .help( "vhost-user-block backend parameters \"image=,\ sock=,num_queues=,\ - readonly=true|false,direct=true|false\"", + readonly=true|false,direct=true|false,poll_queue=true|false\"", ) .takes_value(true) .min_values(1), diff --git a/src/main.rs b/src/main.rs index cf339f760..72a85cf10 100755 --- a/src/main.rs +++ b/src/main.rs @@ -289,7 +289,7 @@ fn create_app<'a, 'b>( .help( "vhost-user-block backend parameters \"image=,\ sock=,num_queues=,\ - readonly=true|false,direct=true|false\"", + readonly=true|false,direct=true|false,poll_queue=true|false\"", ) .takes_value(true) .conflicts_with_all(&["net-backend", "kernel"]) diff --git a/vhost_user_block/src/lib.rs b/vhost_user_block/src/lib.rs index d77db64ed..c9671186b 100644 --- a/vhost_user_block/src/lib.rs +++ b/vhost_user_block/src/lib.rs @@ -28,6 +28,7 @@ use std::path::PathBuf; use std::process; use std::slice; use std::sync::{Arc, RwLock}; +use std::time::Instant; use std::vec::Vec; use std::{convert, error, fmt, io}; use vhost_rs::vhost_user::message::*; @@ -42,6 +43,10 @@ const QUEUE_SIZE: usize = 1024; const SECTOR_SHIFT: u8 = 9; const SECTOR_SIZE: u64 = (0x01 as u64) << SECTOR_SHIFT; const BLK_SIZE: u32 = 512; +// Current (2020) enterprise SSDs have a latency lower than 30us. +// Polling for 50us should be enough to cover for the device latency +// and the overhead of the emulation layer. +const POLL_QUEUE_US: u128 = 50; trait DiskFile: Read + Seek + Write + Send + Sync {} impl DiskFile for D {} @@ -67,6 +72,8 @@ pub enum Error { ParseReadOnlyParam, /// Failed parsing fs number of queues parameter. ParseBlkNumQueuesParam(std::num::ParseIntError), + /// Failed to parse the poll_queue parameter. + ParsePollQueueParam, /// Failed to handle event other than input event. HandleEventNotEpollIn, /// Failed to create kill eventfd @@ -97,12 +104,19 @@ pub struct VhostUserBlkBackend { disk_nsectors: u64, config: virtio_blk_config, rdonly: bool, + poll_queue: bool, event_idx: bool, kill_evt: EventFd, } impl VhostUserBlkBackend { - pub fn new(image_path: String, num_queues: usize, rdonly: bool, direct: bool) -> Result { + pub fn new( + image_path: String, + num_queues: usize, + rdonly: bool, + direct: bool, + poll_queue: bool, + ) -> Result { let mut options = OpenOptions::new(); options.read(true); options.write(!rdonly); @@ -139,6 +153,7 @@ impl VhostUserBlkBackend { disk_nsectors: nsectors, config, rdonly, + poll_queue, event_idx: false, kill_evt: EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?, }) @@ -180,12 +195,21 @@ impl VhostUserBlkBackend { } } - if let Some(used_idx) = vring.mut_queue().add_used(mem, head.index, len) { - let used_event = vring.mut_queue().get_used_event(mem); - if vring.needs_notification(Wrapping(used_idx), used_event) { - debug!("signalling queue"); - vring.signal_used_queue().unwrap(); + if self.event_idx { + if let Some(used_idx) = vring.mut_queue().add_used(mem, head.index, len) { + let used_event = vring.mut_queue().get_used_event(mem); + if vring.needs_notification(Wrapping(used_idx), used_event) { + debug!("signalling queue"); + vring.signal_used_queue().unwrap(); + } else { + debug!("omitting signal (event_idx)"); + } + used_any = true; } + } else { + debug!("signalling queue"); + vring.mut_queue().add_used(mem, head.index, len); + vring.signal_used_queue().unwrap(); used_any = true; } } @@ -248,13 +272,38 @@ impl VhostUserBackend for VhostUserBlkBackend { match device_event { q if device_event < self.config.num_queues => { let mut vring = vrings[q as usize].write().unwrap(); - if self.process_queue(&mut vring) && self.event_idx { - if let Some(mem) = self.mem.as_ref() { - vring.mut_queue().update_avail_event(mem); - // Check the queue again to ensure there are no pending request - self.process_queue(&mut vring); + + if self.poll_queue { + // Actively poll the queue until POLL_QUEUE_US has passed + // without seeing a new request. + let mut now = Instant::now(); + loop { + if self.process_queue(&mut vring) { + now = Instant::now(); + } else if now.elapsed().as_micros() > POLL_QUEUE_US { + break; + } } } + + if self.event_idx { + // vm-virtio's Queue implementation only checks avail_index + // once, so to properly support EVENT_IDX we need to keep + // calling process_queue() until it stops finding new + // requests on the queue. + loop { + vring + .mut_queue() + .update_avail_event(self.mem.as_ref().unwrap()); + if !self.process_queue(&mut vring) { + break; + } + } + } else { + // Without EVENT_IDX, a single call is enough. + self.process_queue(&mut vring); + } + Ok(false) } _ => Err(Error::HandleEventUnknownEvent.into()), @@ -284,6 +333,7 @@ pub struct VhostUserBlkBackendConfig<'a> { pub num_queues: usize, pub readonly: bool, pub direct: bool, + pub poll_queue: bool, } impl<'a> VhostUserBlkBackendConfig<'a> { @@ -295,6 +345,7 @@ impl<'a> VhostUserBlkBackendConfig<'a> { let mut num_queues_str: &str = ""; let mut readonly: bool = false; let mut direct: bool = false; + let mut poll_queue: bool = true; for param in params_list.iter() { if param.starts_with("image=") { @@ -313,6 +364,11 @@ impl<'a> VhostUserBlkBackendConfig<'a> { Ok(b) => b, Err(_) => return Err(Error::ParseDirectParam), } + } else if param.starts_with("poll_queue=") { + poll_queue = match param[11..].parse::() { + Ok(b) => b, + Err(_) => return Err(Error::ParsePollQueueParam), + } } } @@ -334,6 +390,7 @@ impl<'a> VhostUserBlkBackendConfig<'a> { num_queues, readonly, direct, + poll_queue, }) } } @@ -353,6 +410,7 @@ pub fn start_block_backend(backend_command: &str) { backend_config.num_queues, backend_config.readonly, backend_config.direct, + backend_config.poll_queue, ) .unwrap(), ));