From f06970730b84dbec896f4dcec2a97e5a7d608e51 Mon Sep 17 00:00:00 2001 From: Rob Bradford Date: Tue, 2 Jun 2020 17:00:31 +0100 Subject: [PATCH] vm-virtio: net: Handle lost interrupts on restore In some situations it is seen that the first interrupt sent to the guest is lost upon a restore (due to the tap worker being awake ahead of the vPUs). This causes problems with VIRTIO_RING_F_EVENT_IDX interrupt suppression as the guest will not be interrupted again in order to mitigate this we always interrupt the guest until the device itself has been signalled by the guest. Signed-off-by: Rob Bradford --- vm-virtio/src/net.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vm-virtio/src/net.rs b/vm-virtio/src/net.rs index 5eeb9ca2f..b43de7eaa 100644 --- a/vm-virtio/src/net.rs +++ b/vm-virtio/src/net.rs @@ -187,6 +187,12 @@ struct NetEpollHandler { interrupt_cb: Arc, kill_evt: EventFd, pause_evt: EventFd, + + // Always generate interrupts until the driver has signalled to the device. + // This mitigates a problem with interrupts from tap events being "lost" upon + // a restore as the vCPU thread isn't ready to handle the interrupt. This causes + // issues when combined with VIRTIO_RING_F_EVENT_IDX interrupt suppression. + driver_awake: bool, } impl NetEpollHandler { @@ -208,7 +214,7 @@ impl NetEpollHandler { error!("Failed to get rx queue event: {:?}", e); } - if self.net.resume_rx(&mut queue)? { + if self.net.resume_rx(&mut queue)? || !self.driver_awake { self.signal_used_queue(queue)?; info!("Signalling RX queue"); } else { @@ -226,7 +232,7 @@ impl NetEpollHandler { if let Err(e) = queue_evt.read() { error!("Failed to get tx queue event: {:?}", e); } - if self.net.process_tx(&mut queue)? { + if self.net.process_tx(&mut queue)? || !self.driver_awake { self.signal_used_queue(queue)?; info!("Signalling TX queue"); } else { @@ -329,9 +335,11 @@ impl NetEpollHandler { match ev_type { RX_QUEUE_EVENT => { + self.driver_awake = true; self.handle_rx_event(&mut queues[0], &queue_evts[0])?; } TX_QUEUE_EVENT => { + self.driver_awake = true; self.handle_tx_event(&mut queues[1], &queue_evts[1])?; } RX_TAP_EVENT => { @@ -633,6 +641,7 @@ impl VirtioDevice for Net { interrupt_cb: interrupt_cb.clone(), kill_evt: kill_evt.try_clone().unwrap(), pause_evt: pause_evt.try_clone().unwrap(), + driver_awake: false, }; let paused = self.paused.clone();