cloud-hypervisor/vm-virtio/src/queue.rs
Rob Bradford 3b2faa9f11 vm-virtio: Reset underlying device on driver request
If the driver triggers a reset by writing zero into the status register
then reset the underlying device if supported. A device reset also
requires resetting various aspects of the queue.

In order to be able to do a subsequent reactivate it is required to
reclaim certain resources (interrupt and queue EventFDs.) If a device
reset is requested by the driver but the underlying device does not
support it then generate an error as the driver would not be able to
configure it anyway.

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
2019-05-09 15:44:18 +02:00

820 lines
25 KiB
Rust

// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE-BSD-3-Clause file.
//
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use std::cmp::min;
use std::num::Wrapping;
use std::sync::atomic::{fence, Ordering};
use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize,
};
pub(super) const VIRTQ_DESC_F_NEXT: u16 = 0x1;
pub(super) const VIRTQ_DESC_F_WRITE: u16 = 0x2;
// GuestMemoryMmap::read_obj() will be used to fetch the descriptor,
// which has an explicit constraint that the entire descriptor doesn't
// cross the page boundary. Otherwise the descriptor may be splitted into
// two mmap regions which causes failure of GuestMemoryMmap::read_obj().
//
// The Virtio Spec 1.0 defines the alignment of VirtIO descriptor is 16 bytes,
// which fulfills the explicit constraint of GuestMemoryMmap::read_obj().
/// A virtio descriptor constraints with C representive.
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct Descriptor {
addr: u64,
len: u32,
flags: u16,
next: u16,
}
unsafe impl ByteValued for Descriptor {}
/// A virtio descriptor chain.
pub struct DescriptorChain<'a> {
mem: &'a GuestMemoryMmap,
desc_table: GuestAddress,
queue_size: u16,
ttl: u16, // used to prevent infinite chain cycles
/// Index into the descriptor table
pub index: u16,
/// Guest physical address of device specific data
pub addr: GuestAddress,
/// Length of device specific data
pub len: u32,
/// Includes next, write, and indirect bits
pub flags: u16,
/// Index into the descriptor table of the next descriptor if flags has
/// the next bit set
pub next: u16,
}
impl<'a> DescriptorChain<'a> {
fn checked_new(
mem: &GuestMemoryMmap,
desc_table: GuestAddress,
queue_size: u16,
index: u16,
) -> Option<DescriptorChain> {
if index >= queue_size {
return None;
}
let desc_head = match mem.checked_offset(desc_table, (index as usize) * 16) {
Some(a) => a,
None => return None,
};
mem.checked_offset(desc_head, 16)?;
// These reads can't fail unless Guest memory is hopelessly broken.
let desc = match mem.read_obj::<Descriptor>(desc_head) {
Ok(ret) => ret,
Err(_) => {
// TODO log address
error!("Failed to read from memory");
return None;
}
};
let chain = DescriptorChain {
mem,
desc_table,
queue_size,
ttl: queue_size,
index,
addr: GuestAddress(desc.addr),
len: desc.len,
flags: desc.flags,
next: desc.next,
};
if chain.is_valid() {
Some(chain)
} else {
None
}
}
fn is_valid(&self) -> bool {
!(self
.mem
.checked_offset(self.addr, self.len as usize)
.is_none()
|| (self.has_next() && self.next >= self.queue_size))
}
/// Gets if this descriptor chain has another descriptor chain linked after it.
pub fn has_next(&self) -> bool {
self.flags & VIRTQ_DESC_F_NEXT != 0 && self.ttl > 1
}
/// If the driver designated this as a write only descriptor.
///
/// If this is false, this descriptor is read only.
/// Write only means the the emulated device can write and the driver can read.
pub fn is_write_only(&self) -> bool {
self.flags & VIRTQ_DESC_F_WRITE != 0
}
/// Gets the next descriptor in this descriptor chain, if there is one.
///
/// Note that this is distinct from the next descriptor chain returned by `AvailIter`, which is
/// the head of the next _available_ descriptor chain.
pub fn next_descriptor(&self) -> Option<DescriptorChain<'a>> {
if self.has_next() {
DescriptorChain::checked_new(self.mem, self.desc_table, self.queue_size, self.next).map(
|mut c| {
c.ttl = self.ttl - 1;
c
},
)
} else {
None
}
}
}
/// Consuming iterator over all available descriptor chain heads in the queue.
pub struct AvailIter<'a, 'b> {
mem: &'a GuestMemoryMmap,
desc_table: GuestAddress,
avail_ring: GuestAddress,
next_index: Wrapping<u16>,
last_index: Wrapping<u16>,
queue_size: u16,
next_avail: &'b mut Wrapping<u16>,
}
impl<'a, 'b> AvailIter<'a, 'b> {
pub fn new(mem: &'a GuestMemoryMmap, q_next_avail: &'b mut Wrapping<u16>) -> AvailIter<'a, 'b> {
AvailIter {
mem,
desc_table: GuestAddress(0),
avail_ring: GuestAddress(0),
next_index: Wrapping(0),
last_index: Wrapping(0),
queue_size: 0,
next_avail: q_next_avail,
}
}
}
impl<'a, 'b> Iterator for AvailIter<'a, 'b> {
type Item = DescriptorChain<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.next_index == self.last_index {
return None;
}
let offset = (4 + (self.next_index.0 % self.queue_size) * 2) as usize;
let avail_addr = match self.mem.checked_offset(self.avail_ring, offset) {
Some(a) => a,
None => return None,
};
// This index is checked below in checked_new
let desc_index: u16 = match self.mem.read_obj(avail_addr) {
Ok(ret) => ret,
Err(_) => {
// TODO log address
error!("Failed to read from memory");
return None;
}
};
self.next_index += Wrapping(1);
let ret =
DescriptorChain::checked_new(self.mem, self.desc_table, self.queue_size, desc_index);
if ret.is_some() {
*self.next_avail += Wrapping(1);
}
ret
}
}
#[derive(Clone)]
/// A virtio queue's parameters.
pub struct Queue {
/// The maximal size in elements offered by the device
max_size: u16,
/// The queue size in elements the driver selected
pub size: u16,
/// Inidcates if the queue is finished with configuration
pub ready: bool,
/// Guest physical address of the descriptor table
pub desc_table: GuestAddress,
/// Guest physical address of the available ring
pub avail_ring: GuestAddress,
/// Guest physical address of the used ring
pub used_ring: GuestAddress,
next_avail: Wrapping<u16>,
next_used: Wrapping<u16>,
}
impl Queue {
/// Constructs an empty virtio queue with the given `max_size`.
pub fn new(max_size: u16) -> Queue {
Queue {
max_size,
size: max_size,
ready: false,
desc_table: GuestAddress(0),
avail_ring: GuestAddress(0),
used_ring: GuestAddress(0),
next_avail: Wrapping(0),
next_used: Wrapping(0),
}
}
pub fn get_max_size(&self) -> u16 {
self.max_size
}
/// Return the actual size of the queue, as the driver may not set up a
/// queue as big as the device allows.
pub fn actual_size(&self) -> u16 {
min(self.size, self.max_size)
}
/// Reset the queue to a state that is acceptable for a device reset
pub fn reset(&mut self) {
self.ready = false;
self.size = self.max_size;
}
pub fn is_valid(&self, mem: &GuestMemoryMmap) -> bool {
let queue_size = self.actual_size() as usize;
let desc_table = self.desc_table;
let desc_table_size = 16 * queue_size;
let avail_ring = self.avail_ring;
let avail_ring_size = 6 + 2 * queue_size;
let used_ring = self.used_ring;
let used_ring_size = 6 + 8 * queue_size;
if !self.ready {
error!("attempt to use virtio queue that is not marked ready");
false
} else if self.size > self.max_size || self.size == 0 || (self.size & (self.size - 1)) != 0
{
error!("virtio queue with invalid size: {}", self.size);
false
} else if desc_table
.checked_add(desc_table_size as GuestUsize)
.map_or(true, |v| !mem.address_in_range(v))
{
error!(
"virtio queue descriptor table goes out of bounds: start:0x{:08x} size:0x{:08x}",
desc_table.raw_value(),
desc_table_size
);
false
} else if avail_ring
.checked_add(avail_ring_size as GuestUsize)
.map_or(true, |v| !mem.address_in_range(v))
{
error!(
"virtio queue available ring goes out of bounds: start:0x{:08x} size:0x{:08x}",
avail_ring.raw_value(),
avail_ring_size
);
false
} else if used_ring
.checked_add(used_ring_size as GuestUsize)
.map_or(true, |v| !mem.address_in_range(v))
{
error!(
"virtio queue used ring goes out of bounds: start:0x{:08x} size:0x{:08x}",
used_ring.raw_value(),
used_ring_size
);
false
} else if desc_table.mask(0xf) != 0 {
error!("virtio queue descriptor table breaks alignment contraints");
false
} else if avail_ring.mask(0x1) != 0 {
error!("virtio queue available ring breaks alignment contraints");
false
} else if used_ring.mask(0x3) != 0 {
error!("virtio queue used ring breaks alignment contraints");
false
} else {
true
}
}
/// A consuming iterator over all available descriptor chain heads offered by the driver.
pub fn iter<'a, 'b>(&'b mut self, mem: &'a GuestMemoryMmap) -> AvailIter<'a, 'b> {
let queue_size = self.actual_size();
let avail_ring = self.avail_ring;
let index_addr = match mem.checked_offset(avail_ring, 2) {
Some(ret) => ret,
None => {
// TODO log address
warn!("Invalid offset");
return AvailIter::new(mem, &mut self.next_avail);
}
};
// Note that last_index has no invalid values
let last_index: u16 = match mem.read_obj::<u16>(index_addr) {
Ok(ret) => ret,
Err(_) => return AvailIter::new(mem, &mut self.next_avail),
};
AvailIter {
mem,
desc_table: self.desc_table,
avail_ring,
next_index: self.next_avail,
last_index: Wrapping(last_index),
queue_size,
next_avail: &mut self.next_avail,
}
}
/// Puts an available descriptor head into the used ring for use by the guest.
pub fn add_used(&mut self, mem: &GuestMemoryMmap, desc_index: u16, len: u32) {
if desc_index >= self.actual_size() {
error!(
"attempted to add out of bounds descriptor to used ring: {}",
desc_index
);
return;
}
let used_ring = self.used_ring;
let next_used = u64::from(self.next_used.0 % self.actual_size());
let used_elem = used_ring.unchecked_add(4 + next_used * 8);
// These writes can't fail as we are guaranteed to be within the descriptor ring.
mem.write_obj(u32::from(desc_index), used_elem).unwrap();
mem.write_obj(len as u32, used_elem.unchecked_add(4))
.unwrap();
self.next_used += Wrapping(1);
// This fence ensures all descriptor writes are visible before the index update is.
fence(Ordering::Release);
mem.write_obj(self.next_used.0 as u16, used_ring.unchecked_add(2))
.unwrap();
}
/// Goes back one position in the available descriptor chain offered by the driver.
/// Rust does not support bidirectional iterators. This is the only way to revert the effect
/// of an iterator increment on the queue.
pub fn go_to_previous_position(&mut self) {
self.next_avail -= Wrapping(1);
}
}
#[cfg(test)]
pub(crate) mod tests {
extern crate vm_memory;
use std::marker::PhantomData;
use std::mem;
pub use super::*;
use vm_memory::{GuestAddress, GuestMemoryMmap, GuestUsize};
// Represents a location in GuestMemoryMmap which holds a given type.
pub struct SomeplaceInMemory<'a, T> {
pub location: GuestAddress,
mem: &'a GuestMemoryMmap,
phantom: PhantomData<*const T>,
}
// The ByteValued trait is required to use mem.read_obj and write_obj.
impl<'a, T> SomeplaceInMemory<'a, T>
where
T: vm_memory::ByteValued,
{
fn new(location: GuestAddress, mem: &'a GuestMemoryMmap) -> Self {
SomeplaceInMemory {
location,
mem,
phantom: PhantomData,
}
}
// Reads from the actual memory location.
pub fn get(&self) -> T {
self.mem.read_obj(self.location).unwrap()
}
// Writes to the actual memory location.
pub fn set(&self, val: T) {
self.mem.write_obj(val, self.location).unwrap()
}
// This function returns a place in memory which holds a value of type U, and starts
// offset bytes after the current location.
fn map_offset<U>(&self, offset: GuestUsize) -> SomeplaceInMemory<'a, U> {
SomeplaceInMemory {
location: self.location.checked_add(offset).unwrap(),
mem: self.mem,
phantom: PhantomData,
}
}
// This function returns a place in memory which holds a value of type U, and starts
// immediately after the end of self (which is location + sizeof(T)).
fn next_place<U>(&self) -> SomeplaceInMemory<'a, U> {
self.map_offset::<U>(mem::size_of::<T>() as u64)
}
fn end(&self) -> GuestAddress {
self.location
.checked_add(mem::size_of::<T>() as u64)
.unwrap()
}
}
// Represents a virtio descriptor in guest memory.
pub struct VirtqDesc<'a> {
pub addr: SomeplaceInMemory<'a, u64>,
pub len: SomeplaceInMemory<'a, u32>,
pub flags: SomeplaceInMemory<'a, u16>,
pub next: SomeplaceInMemory<'a, u16>,
}
impl<'a> VirtqDesc<'a> {
fn new(start: GuestAddress, mem: &'a GuestMemoryMmap) -> Self {
assert_eq!(start.0 & 0xf, 0);
let addr = SomeplaceInMemory::new(start, mem);
let len = addr.next_place();
let flags = len.next_place();
let next = flags.next_place();
VirtqDesc {
addr,
len,
flags,
next,
}
}
fn start(&self) -> GuestAddress {
self.addr.location
}
fn end(&self) -> GuestAddress {
self.next.end()
}
pub fn set(&self, addr: u64, len: u32, flags: u16, next: u16) {
self.addr.set(addr);
self.len.set(len);
self.flags.set(flags);
self.next.set(next);
}
}
// Represents a virtio queue ring. The only difference between the used and available rings,
// is the ring element type.
pub struct VirtqRing<'a, T> {
pub flags: SomeplaceInMemory<'a, u16>,
pub idx: SomeplaceInMemory<'a, u16>,
pub ring: Vec<SomeplaceInMemory<'a, T>>,
pub event: SomeplaceInMemory<'a, u16>,
}
impl<'a, T> VirtqRing<'a, T>
where
T: vm_memory::ByteValued,
{
fn new(
start: GuestAddress,
mem: &'a GuestMemoryMmap,
qsize: u16,
alignment: GuestUsize,
) -> Self {
assert_eq!(start.0 & (alignment - 1), 0);
let flags = SomeplaceInMemory::new(start, mem);
let idx = flags.next_place();
let mut ring = Vec::with_capacity(qsize as usize);
ring.push(idx.next_place());
for _ in 1..qsize as usize {
let x = ring.last().unwrap().next_place();
ring.push(x)
}
let event = ring.last().unwrap().next_place();
flags.set(0);
idx.set(0);
event.set(0);
VirtqRing {
flags,
idx,
ring,
event,
}
}
pub fn end(&self) -> GuestAddress {
self.event.end()
}
}
#[repr(C)]
#[derive(Clone, Copy, Default)]
pub struct VirtqUsedElem {
pub id: u32,
pub len: u32,
}
unsafe impl vm_memory::ByteValued for VirtqUsedElem {}
pub type VirtqAvail<'a> = VirtqRing<'a, u16>;
pub type VirtqUsed<'a> = VirtqRing<'a, VirtqUsedElem>;
pub struct VirtQueue<'a> {
pub dtable: Vec<VirtqDesc<'a>>,
pub avail: VirtqAvail<'a>,
pub used: VirtqUsed<'a>,
}
impl<'a> VirtQueue<'a> {
// We try to make sure things are aligned properly :-s
pub fn new(start: GuestAddress, mem: &'a GuestMemoryMmap, qsize: u16) -> Self {
// power of 2?
assert!(qsize > 0 && qsize & (qsize - 1) == 0);
let mut dtable = Vec::with_capacity(qsize as usize);
let mut end = start;
for _ in 0..qsize {
let d = VirtqDesc::new(end, mem);
end = d.end();
dtable.push(d);
}
const AVAIL_ALIGN: u64 = 2;
let avail = VirtqAvail::new(end, mem, qsize, AVAIL_ALIGN);
const USED_ALIGN: u64 = 4;
let mut x = avail.end().0;
x = (x + USED_ALIGN - 1) & !(USED_ALIGN - 1);
let used = VirtqUsed::new(GuestAddress(x), mem, qsize, USED_ALIGN);
VirtQueue {
dtable,
avail,
used,
}
}
fn size(&self) -> u16 {
self.dtable.len() as u16
}
fn dtable_start(&self) -> GuestAddress {
self.dtable.first().unwrap().start()
}
fn avail_start(&self) -> GuestAddress {
self.avail.flags.location
}
fn used_start(&self) -> GuestAddress {
self.used.flags.location
}
// Creates a new Queue, using the underlying memory regions represented by the VirtQueue.
pub fn create_queue(&self) -> Queue {
let mut q = Queue::new(self.size());
q.size = self.size();
q.ready = true;
q.desc_table = self.dtable_start();
q.avail_ring = self.avail_start();
q.used_ring = self.used_start();
q
}
pub fn start(&self) -> GuestAddress {
self.dtable_start()
}
pub fn end(&self) -> GuestAddress {
self.used.end()
}
}
#[test]
fn test_checked_new_descriptor_chain() {
let m = &GuestMemoryMmap::new(&[(GuestAddress(0), 0x10000)]).unwrap();
let vq = VirtQueue::new(GuestAddress(0), m, 16);
assert!(vq.end().0 < 0x1000);
// index >= queue_size
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 16).is_none());
// desc_table address is way off
assert!(DescriptorChain::checked_new(m, GuestAddress(0x00ff_ffff_ffff), 16, 0).is_none());
// the addr field of the descriptor is way off
vq.dtable[0].addr.set(0x0fff_ffff_ffff);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none());
// let's create some invalid chains
{
// the addr field of the desc is ok now
vq.dtable[0].addr.set(0x1000);
// ...but the length is too large
vq.dtable[0].len.set(0xffff_ffff);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none());
}
{
// the first desc has a normal len now, and the next_descriptor flag is set
vq.dtable[0].len.set(0x1000);
vq.dtable[0].flags.set(VIRTQ_DESC_F_NEXT);
//..but the the index of the next descriptor is too large
vq.dtable[0].next.set(16);
assert!(DescriptorChain::checked_new(m, vq.start(), 16, 0).is_none());
}
// finally, let's test an ok chain
{
vq.dtable[0].next.set(1);
vq.dtable[1].set(0x2000, 0x1000, 0, 0);
let c = DescriptorChain::checked_new(m, vq.start(), 16, 0).unwrap();
assert_eq!(c.mem as *const GuestMemoryMmap, m as *const GuestMemoryMmap);
assert_eq!(c.desc_table, vq.start());
assert_eq!(c.queue_size, 16);
assert_eq!(c.ttl, c.queue_size);
assert_eq!(c.index, 0);
assert_eq!(c.addr, GuestAddress(0x1000));
assert_eq!(c.len, 0x1000);
assert_eq!(c.flags, VIRTQ_DESC_F_NEXT);
assert_eq!(c.next, 1);
assert!(c.next_descriptor().unwrap().next_descriptor().is_none());
}
}
#[test]
fn test_queue_and_iterator() {
let m = &GuestMemoryMmap::new(&[(GuestAddress(0), 0x10000)]).unwrap();
let vq = VirtQueue::new(GuestAddress(0), m, 16);
let mut q = vq.create_queue();
// q is currently valid
assert!(q.is_valid(m));
// shouldn't be valid when not marked as ready
q.ready = false;
assert!(!q.is_valid(m));
q.ready = true;
// or when size > max_size
q.size = q.max_size << 1;
assert!(!q.is_valid(m));
q.size = q.max_size;
// or when size is 0
q.size = 0;
assert!(!q.is_valid(m));
q.size = q.max_size;
// or when size is not a power of 2
q.size = 11;
assert!(!q.is_valid(m));
q.size = q.max_size;
// or if the various addresses are off
q.desc_table = GuestAddress(0xffff_ffff);
assert!(!q.is_valid(m));
q.desc_table = GuestAddress(0x1001);
assert!(!q.is_valid(m));
q.desc_table = vq.dtable_start();
q.avail_ring = GuestAddress(0xffff_ffff);
assert!(!q.is_valid(m));
q.avail_ring = GuestAddress(0x1001);
assert!(!q.is_valid(m));
q.avail_ring = vq.avail_start();
q.used_ring = GuestAddress(0xffff_ffff);
assert!(!q.is_valid(m));
q.used_ring = GuestAddress(0x1001);
assert!(!q.is_valid(m));
q.used_ring = vq.used_start();
{
// an invalid queue should return an iterator with no next
q.ready = false;
let mut i = q.iter(m);
assert!(i.next().is_none());
}
q.ready = true;
// now let's create two simple descriptor chains
{
for j in 0..5 {
vq.dtable[j].set(
0x1000 * (j + 1) as u64,
0x1000,
VIRTQ_DESC_F_NEXT,
(j + 1) as u16,
);
}
// the chains are (0, 1) and (2, 3, 4)
vq.dtable[1].flags.set(0);
vq.dtable[4].flags.set(0);
vq.avail.ring[0].set(0);
vq.avail.ring[1].set(2);
vq.avail.idx.set(2);
let mut i = q.iter(m);
{
let mut c = i.next().unwrap();
c = c.next_descriptor().unwrap();
assert!(!c.has_next());
}
{
let mut c = i.next().unwrap();
c = c.next_descriptor().unwrap();
c = c.next_descriptor().unwrap();
assert!(!c.has_next());
}
}
// also test go_to_previous_position() works as expected
{
assert!(q.iter(m).next().is_none());
q.go_to_previous_position();
let mut c = q.iter(m).next().unwrap();
c = c.next_descriptor().unwrap();
c = c.next_descriptor().unwrap();
assert!(!c.has_next());
}
}
#[test]
fn test_add_used() {
let m = &GuestMemoryMmap::new(&[(GuestAddress(0), 0x10000)]).unwrap();
let vq = VirtQueue::new(GuestAddress(0), m, 16);
let mut q = vq.create_queue();
assert_eq!(vq.used.idx.get(), 0);
//index too large
q.add_used(m, 16, 0x1000);
assert_eq!(vq.used.idx.get(), 0);
//should be ok
q.add_used(m, 1, 0x1000);
assert_eq!(vq.used.idx.get(), 1);
let x = vq.used.ring[0].get();
assert_eq!(x.id, 1);
assert_eq!(x.len, 0x1000);
}
}