cloud-hypervisor/vhost_user_block/src/lib.rs

557 lines
17 KiB
Rust
Raw Normal View History

// Copyright 2019 Red Hat, Inc. All Rights Reserved.
//
// Portions Copyright 2019 Intel Corporation. All Rights Reserved.
//
// Portions Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
//
// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause)
use block::{
build_serial,
qcow::{self, ImageType, QcowFile},
Request, VirtioBlockConfig,
};
use libc::EFD_NONBLOCK;
use log::*;
use option_parser::{OptionParser, OptionParserError, Toggle};
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Read;
use std::io::{Seek, SeekFrom, Write};
use std::ops::Deref;
use std::ops::DerefMut;
use std::os::unix::fs::OpenOptionsExt;
use std::path::PathBuf;
use std::process;
use std::result;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, RwLock, RwLockWriteGuard};
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
use std::time::Instant;
use std::vec::Vec;
use std::{convert, error, fmt, io};
use vhost::vhost_user::message::*;
use vhost::vhost_user::Listener;
use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock, VringState, VringT};
use virtio_bindings::virtio_blk::*;
use virtio_bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
use virtio_queue::QueueT;
use vm_memory::GuestAddressSpace;
use vm_memory::{bitmap::AtomicBitmap, ByteValued, Bytes, GuestMemoryAtomic};
use vmm_sys_util::{epoll::EventSet, eventfd::EventFd};
type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
const SECTOR_SHIFT: u8 = 9;
const SECTOR_SIZE: u64 = 0x01 << SECTOR_SHIFT;
const BLK_SIZE: u32 = 512;
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
// Current (2020) enterprise SSDs have a latency lower than 30us.
// Polling for 50us should be enough to cover for the device latency
// and the overhead of the emulation layer.
const POLL_QUEUE_US: u128 = 50;
trait DiskFile: Read + Seek + Write + Send {}
impl<D: Read + Seek + Write + Send> DiskFile for D {}
type Result<T> = std::result::Result<T, Error>;
type VhostUserBackendResult<T> = std::result::Result<T, std::io::Error>;
#[derive(Debug)]
enum Error {
/// Failed to create kill eventfd
CreateKillEventFd(io::Error),
/// Failed to parse configuration string
FailedConfigParse(OptionParserError),
/// Failed to handle event other than input event.
HandleEventNotEpollIn,
/// Failed to handle unknown event.
HandleEventUnknownEvent,
/// No path provided
PathParameterMissing,
/// No socket provided
SocketParameterMissing,
}
pub const SYNTAX: &str = "vhost-user-block backend parameters \
\"path=<image_path>,socket=<socket_path>,num_queues=<number_of_queues>,\
queue_size=<size_of_each_queue>,readonly=true|false,direct=true|false,\
poll_queue=true|false\"";
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_block_error: {self:?}")
}
}
impl error::Error for Error {}
impl convert::From<Error> for io::Error {
fn from(e: Error) -> Self {
io::Error::new(io::ErrorKind::Other, e)
}
}
struct VhostUserBlkThread {
disk_image: Arc<Mutex<dyn DiskFile>>,
serial: Vec<u8>,
disk_nsectors: u64,
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
event_idx: bool,
kill_evt: EventFd,
writeback: Arc<AtomicBool>,
mem: GuestMemoryAtomic<GuestMemoryMmap>,
}
impl VhostUserBlkThread {
fn new(
disk_image: Arc<Mutex<dyn DiskFile>>,
serial: Vec<u8>,
disk_nsectors: u64,
writeback: Arc<AtomicBool>,
mem: GuestMemoryAtomic<GuestMemoryMmap>,
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
) -> Result<Self> {
Ok(VhostUserBlkThread {
disk_image,
serial,
disk_nsectors,
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
event_idx: false,
kill_evt: EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?,
writeback,
mem,
})
}
fn process_queue(
&mut self,
vring: &mut RwLockWriteGuard<VringState<GuestMemoryAtomic<GuestMemoryMmap>>>,
) -> bool {
let mut used_descs = false;
while let Some(mut desc_chain) = vring
.get_queue_mut()
.pop_descriptor_chain(self.mem.memory())
{
debug!("got an element in the queue");
let len;
match Request::parse(&mut desc_chain, None) {
Ok(mut request) => {
debug!("element is a valid request");
request.set_writeback(self.writeback.load(Ordering::Acquire));
let status = match request.execute(
&mut self.disk_image.lock().unwrap().deref_mut(),
self.disk_nsectors,
desc_chain.memory(),
&self.serial,
) {
Ok(l) => {
len = l;
VIRTIO_BLK_S_OK
}
Err(e) => {
len = 1;
e.status()
}
};
desc_chain
.memory()
.write_obj(status, request.status_addr)
.unwrap();
}
Err(err) => {
error!("failed to parse available descriptor chain: {:?}", err);
len = 0;
}
}
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
vring
.get_queue_mut()
.add_used(desc_chain.memory(), desc_chain.head_index(), len)
.unwrap();
used_descs = true;
}
let mut needs_signalling = false;
if self.event_idx {
if vring
.get_queue_mut()
.needs_notification(self.mem.memory().deref())
.unwrap()
{
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
debug!("signalling queue");
needs_signalling = true;
} else {
debug!("omitting signal (event_idx)");
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
}
} else {
debug!("signalling queue");
needs_signalling = true;
}
if needs_signalling {
vring.signal_used_queue().unwrap();
}
used_descs
}
}
struct VhostUserBlkBackend {
threads: Vec<Mutex<VhostUserBlkThread>>,
config: VirtioBlockConfig,
rdonly: bool,
poll_queue: bool,
queues_per_thread: Vec<u64>,
queue_size: usize,
acked_features: u64,
writeback: Arc<AtomicBool>,
mem: GuestMemoryAtomic<GuestMemoryMmap>,
}
impl VhostUserBlkBackend {
fn new(
image_path: String,
num_queues: usize,
rdonly: bool,
direct: bool,
poll_queue: bool,
queue_size: usize,
mem: GuestMemoryAtomic<GuestMemoryMmap>,
) -> Result<Self> {
let mut options = OpenOptions::new();
options.read(true);
options.write(!rdonly);
if direct {
options.custom_flags(libc::O_DIRECT);
}
let image: File = options.open(&image_path).unwrap();
let mut raw_img: qcow::RawFile = qcow::RawFile::new(image, direct);
let serial = build_serial(&PathBuf::from(&image_path));
let image_type = qcow::detect_image_type(&mut raw_img).unwrap();
let image = match image_type {
ImageType::Raw => Arc::new(Mutex::new(raw_img)) as Arc<Mutex<dyn DiskFile>>,
ImageType::Qcow2 => {
Arc::new(Mutex::new(QcowFile::from(raw_img).unwrap())) as Arc<Mutex<dyn DiskFile>>
}
};
let nsectors = (image.lock().unwrap().seek(SeekFrom::End(0)).unwrap()) / SECTOR_SIZE;
let config = VirtioBlockConfig {
capacity: nsectors,
blk_size: BLK_SIZE,
size_max: 65535,
seg_max: 128 - 2,
min_io_size: 1,
opt_io_size: 1,
num_queues: num_queues as u16,
writeback: 1,
..Default::default()
};
let mut queues_per_thread = Vec::new();
let mut threads = Vec::new();
let writeback = Arc::new(AtomicBool::new(true));
for i in 0..num_queues {
let thread = Mutex::new(VhostUserBlkThread::new(
image.clone(),
serial.clone(),
nsectors,
writeback.clone(),
mem.clone(),
)?);
threads.push(thread);
queues_per_thread.push(0b1 << i);
}
Ok(VhostUserBlkBackend {
threads,
config,
rdonly,
poll_queue,
queues_per_thread,
queue_size,
acked_features: 0,
writeback,
mem,
})
}
fn update_writeback(&mut self) {
// Use writeback from config if VIRTIO_BLK_F_CONFIG_WCE
let writeback =
if self.acked_features & 1 << VIRTIO_BLK_F_CONFIG_WCE == 1 << VIRTIO_BLK_F_CONFIG_WCE {
self.config.writeback == 1
} else {
// Else check if VIRTIO_BLK_F_FLUSH negotiated
self.acked_features & 1 << VIRTIO_BLK_F_FLUSH == 1 << VIRTIO_BLK_F_FLUSH
};
info!(
"Changing cache mode to {}",
if writeback {
"writeback"
} else {
"writethrough"
}
);
self.writeback.store(writeback, Ordering::Release);
}
}
impl VhostUserBackendMut for VhostUserBlkBackend {
type Bitmap = AtomicBitmap;
type Vring = VringRwLock<GuestMemoryAtomic<GuestMemoryMmap>>;
fn num_queues(&self) -> usize {
self.config.num_queues as usize
}
fn max_queue_size(&self) -> usize {
self.queue_size
}
fn features(&self) -> u64 {
let mut avail_features = 1 << VIRTIO_BLK_F_SEG_MAX
| 1 << VIRTIO_BLK_F_BLK_SIZE
| 1 << VIRTIO_BLK_F_FLUSH
| 1 << VIRTIO_BLK_F_TOPOLOGY
| 1 << VIRTIO_BLK_F_MQ
| 1 << VIRTIO_BLK_F_CONFIG_WCE
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
| 1 << VIRTIO_RING_F_EVENT_IDX
| 1 << VIRTIO_F_VERSION_1
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
if self.rdonly {
avail_features |= 1 << VIRTIO_BLK_F_RO;
}
avail_features
}
fn acked_features(&mut self, features: u64) {
self.acked_features = features;
self.update_writeback();
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::CONFIG
| VhostUserProtocolFeatures::MQ
| VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
}
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
fn set_event_idx(&mut self, enabled: bool) {
for thread in self.threads.iter() {
thread.lock().unwrap().event_idx = enabled;
}
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
}
fn handle_event(
&mut self,
device_event: u16,
evset: EventSet,
vrings: &[VringRwLock<GuestMemoryAtomic<GuestMemoryMmap>>],
thread_id: usize,
) -> VhostUserBackendResult<()> {
if evset != EventSet::IN {
return Err(Error::HandleEventNotEpollIn.into());
}
debug!("event received: {:?}", device_event);
let mut thread = self.threads[thread_id].lock().unwrap();
match device_event {
0 => {
let mut vring = vrings[0].get_mut();
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
if self.poll_queue {
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
// Actively poll the queue until POLL_QUEUE_US has passed
// without seeing a new request.
let mut now = Instant::now();
loop {
if thread.process_queue(&mut vring) {
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
now = Instant::now();
} else if now.elapsed().as_micros() > POLL_QUEUE_US {
break;
}
}
}
if thread.event_idx {
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_queue() until it stops finding new
// requests on the queue.
loop {
vring
.get_queue_mut()
.enable_notification(self.mem.memory().deref())
.unwrap();
if !thread.process_queue(&mut vring) {
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
break;
}
vhost_user_block: Make use of the EVENT_IDX feature Now that vhost_user_backend and vm-virtio do support EVENT_IDX, use it in vhost_user_block to reduce the number of notifications sent between the driver and the device. This is specially useful when using active polling on the virtqueue, as it'll be implemented by a future patch. This is a snapshot of kvm_stat while generating ~60K IOPS with fio on the guest without EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 393454 20.3 62494 kvm_exit 393446 20.3 62494 kvm_apic_accept_irq 378146 19.5 60268 kvm_msi_set_irq 369720 19.0 58881 kvm_fast_mmio 370497 19.1 58817 kvm_hv_timer_state 10197 0.5 1715 kvm_msr 8770 0.5 1443 kvm_wait_lapic_expire 7018 0.4 1118 kvm_apic 2768 0.1 538 kvm_pv_tlb_flush 2028 0.1 360 kvm_vcpu_wakeup 1453 0.1 278 kvm_apic_ipi 1384 0.1 269 kvm_fpu 1148 0.1 164 kvm_pio 574 0.0 82 kvm_userspace_exit 574 0.0 82 kvm_halt_poll_ns 24 0.0 3 And this is the snapshot while doing the same thing with EVENT_IDX: Event Total %Total CurAvg/s kvm_entry 35506 26.0 3873 kvm_exit 35499 26.0 3873 kvm_hv_timer_state 14740 10.8 1672 kvm_apic_accept_irq 13017 9.5 1438 kvm_msr 12845 9.4 1421 kvm_wait_lapic_expire 10422 7.6 1118 kvm_apic 3788 2.8 502 kvm_pv_tlb_flush 2708 2.0 340 kvm_vcpu_wakeup 1992 1.5 258 kvm_apic_ipi 1894 1.4 251 kvm_fpu 1476 1.1 164 kvm_pio 738 0.5 82 kvm_userspace_exit 738 0.5 82 kvm_msi_set_irq 701 0.5 69 kvm_fast_mmio 238 0.2 4 kvm_halt_poll_ns 50 0.0 1 kvm_ple_window_update 28 0.0 0 kvm_page_fault 4 0.0 0 It can be clearly appreciated how the number of vm exits per second, specially the ones related to notifications (kvm_fast_mmio and kvm_msi_set_irq) is drastically lower. Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-14 12:24:34 +00:00
}
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
} else {
// Without EVENT_IDX, a single call is enough.
thread.process_queue(&mut vring);
}
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
Ok(())
}
_ => Err(Error::HandleEventUnknownEvent.into()),
}
}
fn get_config(&self, _offset: u32, _size: u32) -> Vec<u8> {
self.config.as_slice().to_vec()
}
fn set_config(&mut self, offset: u32, data: &[u8]) -> result::Result<(), io::Error> {
let config_slice = self.config.as_mut_slice();
let data_len = data.len() as u32;
let config_len = config_slice.len() as u32;
if offset + data_len > config_len {
error!("Failed to write config space");
return Err(io::Error::from_raw_os_error(libc::EINVAL));
}
let (_, right) = config_slice.split_at_mut(offset as usize);
right.copy_from_slice(data);
self.update_writeback();
Ok(())
}
fn exit_event(&self, thread_index: usize) -> Option<EventFd> {
Some(
self.threads[thread_index]
.lock()
.unwrap()
.kill_evt
.try_clone()
.unwrap(),
)
}
fn queues_per_thread(&self) -> Vec<u64> {
self.queues_per_thread.clone()
}
fn update_memory(
&mut self,
_mem: GuestMemoryAtomic<GuestMemoryMmap>,
) -> VhostUserBackendResult<()> {
Ok(())
}
}
struct VhostUserBlkBackendConfig {
path: String,
socket: String,
num_queues: usize,
queue_size: usize,
readonly: bool,
direct: bool,
poll_queue: bool,
}
impl VhostUserBlkBackendConfig {
fn parse(backend: &str) -> Result<Self> {
let mut parser = OptionParser::new();
parser
.add("path")
.add("readonly")
.add("direct")
.add("num_queues")
.add("queue_size")
.add("socket")
.add("poll_queue");
parser.parse(backend).map_err(Error::FailedConfigParse)?;
let path = parser.get("path").ok_or(Error::PathParameterMissing)?;
let readonly = parser
.convert::<Toggle>("readonly")
.map_err(Error::FailedConfigParse)?
.unwrap_or(Toggle(false))
.0;
let direct = parser
.convert::<Toggle>("direct")
.map_err(Error::FailedConfigParse)?
.unwrap_or(Toggle(false))
.0;
let num_queues = parser
.convert("num_queues")
.map_err(Error::FailedConfigParse)?
.unwrap_or(1);
let socket = parser.get("socket").ok_or(Error::SocketParameterMissing)?;
let poll_queue = parser
.convert::<Toggle>("poll_queue")
.map_err(Error::FailedConfigParse)?
.unwrap_or(Toggle(true))
.0;
let queue_size = parser
.convert("queue_size")
.map_err(Error::FailedConfigParse)?
.unwrap_or(1024);
Ok(VhostUserBlkBackendConfig {
path,
socket,
num_queues,
queue_size,
readonly,
direct,
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
poll_queue,
})
}
}
pub fn start_block_backend(backend_command: &str) {
let backend_config = match VhostUserBlkBackendConfig::parse(backend_command) {
Ok(config) => config,
Err(e) => {
println!("Failed parsing parameters {e:?}");
process::exit(1);
}
};
let mem = GuestMemoryAtomic::new(GuestMemoryMmap::new());
let blk_backend = Arc::new(RwLock::new(
VhostUserBlkBackend::new(
backend_config.path,
backend_config.num_queues,
backend_config.readonly,
backend_config.direct,
vhost_user_block: Implement optional static polling Actively polling the virtqueue significantly reduces the latency of each I/O operation, at the expense of using more CPU time. This features is specially useful when using low-latency devices (SSD, NVMe) as the backend. This change implements static polling. When a request arrives after being idle, vhost_user_block will keep checking the virtqueue for new requests, until POLL_QUEUE_US (50us) has passed without finding one. POLL_QUEUE_US is defined to be 50us, based on the current latency of enterprise SSDs (< 30us) and the overhead of the emulation. This feature is enabled by default, and can be disabled by using the "poll_queue" parameter of "block-backend". This is a test using null_blk as a backend for the image, with the following parameters: - null_blk gb=20 nr_devices=1 irqmode=2 completion_nsec=0 no_sched=1 With "poll_queue=false": fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=169MiB/s][r=43.2k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=433: Tue Feb 18 11:12:59 2020 read: IOPS=43.2k, BW=169MiB/s (177MB/s)(1688MiB/10001msec) clat (usec): min=17, max=836, avg=21.64, stdev= 3.81 lat (usec): min=17, max=836, avg=21.77, stdev= 3.81 clat percentiles (nsec): | 1.00th=[19328], 5.00th=[19840], 10.00th=[20352], 20.00th=[21120], | 30.00th=[21376], 40.00th=[21376], 50.00th=[21376], 60.00th=[21632], | 70.00th=[21632], 80.00th=[21888], 90.00th=[22144], 95.00th=[22912], | 99.00th=[28544], 99.50th=[30336], 99.90th=[39168], 99.95th=[42752], | 99.99th=[71168] bw ( KiB/s): min=168440, max=188496, per=100.00%, avg=172912.00, stdev=3975.63, samples=19 iops : min=42110, max=47124, avg=43228.00, stdev=993.91, samples=19 lat (usec) : 20=5.90%, 50=94.08%, 100=0.02%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=10.35%, sys=25.82%, ctx=432417, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=432220,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=169MiB/s (177MB/s), 169MiB/s-169MiB/s (177MB/s-177MB/s), io=1688MiB (1770MB), run=10001-10001msec Disk stats (read/write): vdb: ios=427867/0, merge=0/0, ticks=7346/0, in_queue=0, util=99.04% With "poll_queue=true" (default): fio --ioengine=sync --bs=4k --rw randread --name randread --direct=1 --filename=/dev/vdb --time_based --runtime=10 randread: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=sync, iodepth=1 fio-3.14 Starting 1 process Jobs: 1 (f=1): [r(1)][100.0%][r=260MiB/s][r=66.7k IOPS][eta 00m:00s] randread: (groupid=0, jobs=1): err= 0: pid=422: Tue Feb 18 11:14:47 2020 read: IOPS=68.5k, BW=267MiB/s (280MB/s)(2674MiB/10001msec) clat (usec): min=10, max=966, avg=13.60, stdev= 3.49 lat (usec): min=10, max=966, avg=13.70, stdev= 3.50 clat percentiles (nsec): | 1.00th=[11200], 5.00th=[11968], 10.00th=[11968], 20.00th=[12224], | 30.00th=[12992], 40.00th=[13504], 50.00th=[13760], 60.00th=[13888], | 70.00th=[14016], 80.00th=[14144], 90.00th=[14272], 95.00th=[14656], | 99.00th=[20352], 99.50th=[23936], 99.90th=[35072], 99.95th=[36096], | 99.99th=[47872] bw ( KiB/s): min=265456, max=296456, per=100.00%, avg=274229.05, stdev=13048.14, samples=19 iops : min=66364, max=74114, avg=68557.26, stdev=3262.03, samples=19 lat (usec) : 20=98.84%, 50=1.15%, 100=0.01%, 250=0.01%, 500=0.01% lat (usec) : 750=0.01%, 1000=0.01% cpu : usr=8.24%, sys=21.15%, ctx=684669, majf=0, minf=10 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=684611,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=1 Run status group 0 (all jobs): READ: bw=267MiB/s (280MB/s), 267MiB/s-267MiB/s (280MB/s-280MB/s), io=2674MiB (2804MB), run=10001-10001msec Disk stats (read/write): vdb: ios=677855/0, merge=0/0, ticks=7026/0, in_queue=0, util=99.04% Signed-off-by: Sergio Lopez <slp@redhat.com>
2020-02-17 13:52:25 +00:00
backend_config.poll_queue,
backend_config.queue_size,
mem.clone(),
)
.unwrap(),
));
debug!("blk_backend is created!\n");
let listener = Listener::new(&backend_config.socket, true).unwrap();
let name = "vhost-user-blk-backend";
let mut blk_daemon = VhostUserDaemon::new(name.to_string(), blk_backend.clone(), mem).unwrap();
debug!("blk_daemon is created!\n");
if let Err(e) = blk_daemon.start(listener) {
error!(
"Failed to start daemon for vhost-user-block with error: {:?}\n",
e
);
process::exit(1);
}
if let Err(e) = blk_daemon.wait() {
error!("Error from the main thread: {:?}", e);
}
for thread in blk_backend.read().unwrap().threads.iter() {
if let Err(e) = thread.lock().unwrap().kill_evt.write(1) {
error!("Error shutting down worker thread: {:?}", e)
}
}
}