From 6275d4f87e2fac04c7e82281686812910bace950 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 8 Mar 2022 19:04:24 +0800 Subject: [PATCH] vmm: interrupt: fix msi mask irq causing kernel panic on AMD When mask a msi irq, we set the entry.masked to be true, so kvm hypervisor will not pass the gsi to kernel through KVM_SET_GSI_ROUTING ioctl which update kvm->irq_routing. This will trigger kernel panic on AMD platform when the gsi is the largest one in kernel kvm->irqfds.items: crash> bt PID: 22218 TASK: ffff951a6ad74980 CPU: 73 COMMAND: "vcpu8" #0 [ffffb1ba6707fa40] machine_kexec at ffffffff8565b397 #1 [ffffb1ba6707fa90] __crash_kexec at ffffffff85788a6d #2 [ffffb1ba6707fb58] crash_kexec at ffffffff8578995d #3 [ffffb1ba6707fb70] oops_end at ffffffff85623c0d #4 [ffffb1ba6707fb90] no_context at ffffffff856692c9 #5 [ffffb1ba6707fbf8] exc_page_fault at ffffffff85f95b51 #6 [ffffb1ba6707fc50] asm_exc_page_fault at ffffffff86000ace [exception RIP: svm_update_pi_irte+227] RIP: ffffffffc0761b53 RSP: ffffb1ba6707fd08 RFLAGS: 00010086 RAX: ffffb1ba6707fd78 RBX: ffffb1ba66d91000 RCX: 0000000000000001 RDX: 00003c803f63f1c0 RSI: 000000000000019a RDI: ffffb1ba66db2ab8 RBP: 000000000000019a R8: 0000000000000040 R9: ffff94ca41b82200 R10: ffffffffffffffcf R11: 0000000000000001 R12: 0000000000000001 R13: 0000000000000001 R14: ffffffffffffffcf R15: 000000000000005f ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffffb1ba6707fdb8] kvm_irq_routing_update at ffffffffc09f19a1 [kvm] #8 [ffffb1ba6707fde0] kvm_set_irq_routing at ffffffffc09f2133 [kvm] #9 [ffffb1ba6707fe18] kvm_vm_ioctl at ffffffffc09ef544 [kvm] RIP: 00007f143c36488b RSP: 00007f143a4e04b8 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 00007f05780041d0 RCX: 00007f143c36488b RDX: 00007f05780041d0 RSI: 000000004008ae6a RDI: 0000000000000020 RBP: 00000000000004e8 R8: 0000000000000008 R9: 00007f05780041e0 R10: 00007f0578004560 R11: 0000000000000246 R12: 00000000000004e0 R13: 000000000000001a R14: 00007f1424001c60 R15: 00007f0578003bc0 ORIG_RAX: 0000000000000010 CS: 0033 SS: 002b To solve this problem, move route.disable() before set_gsi_routes() to remove the gsi from irqfds.items first. This problem only exists on AMD platform, 'cause on Intel platform kernel just return when update irte while it only prints a warning on AMD. Also, this patch adjusts the order of enable() and set_gsi_routes() in unmask(), which should do no harm. Signed-off-by: Yi Wang (cherry picked from commit 5375b84e3bebc2b681e51afecf9deabc49e8c986) Signed-off-by: Rob Bradford --- vmm/src/interrupt.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vmm/src/interrupt.rs b/vmm/src/interrupt.rs index 12369453f..14b1d590f 100644 --- a/vmm/src/interrupt.rs +++ b/vmm/src/interrupt.rs @@ -211,8 +211,8 @@ impl InterruptSourceGroup for MsiInterruptGroup { format!("mask: No existing route for interrupt index {}", index), )); } - self.set_gsi_routes(&routes)?; - return route.enable(&self.vm); + route.enable(&self.vm)?; + return self.set_gsi_routes(&routes); } Err(io::Error::new(