diff --git a/tests/integration.rs b/tests/integration.rs index f2d21549e..e63590e96 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1857,7 +1857,19 @@ fn _test_virtio_iommu(acpi: bool) { handle_child_output(r, &output); } -fn enable_guest_watchdog(guest: &Guest, current_reboot_count: &mut u32) { +fn get_reboot_count(guest: &Guest) -> u32 { + // Ensure that the current boot journal is written so reboot counts are valid + guest.ssh_command("sudo journalctl --sync").unwrap(); + + guest + .ssh_command("sudo journalctl --list-boots | wc -l") + .unwrap() + .trim() + .parse::() + .unwrap_or_default() +} + +fn enable_guest_watchdog(guest: &Guest, watchdog_sec: u32) { // Check for PCI device assert!(guest .does_device_vendor_pair_match("0x1063", "0x1af4") @@ -1865,89 +1877,11 @@ fn enable_guest_watchdog(guest: &Guest, current_reboot_count: &mut u32) { // Enable systemd watchdog guest - .ssh_command("echo RuntimeWatchdogSec=15s | sudo tee -a /etc/systemd/system.conf") + .ssh_command(&format!( + "echo RuntimeWatchdogSec={}s | sudo tee -a /etc/systemd/system.conf", + watchdog_sec + )) .unwrap(); - - guest.ssh_command("sudo reboot").unwrap(); - - guest.wait_vm_boot(None).unwrap(); - - // Check that systemd has activated the watchdog - assert_eq!( - guest - .ssh_command("sudo journalctl | grep -c -- \"Watchdog started\"") - .unwrap() - .trim() - .parse::() - .unwrap_or_default(), - 2 - ); - - // Ensure that the current boot journal is written so reboot counts are valid - guest.ssh_command("sudo journalctl --sync").unwrap(); - - *current_reboot_count += 1; - - let boot_count = guest - .ssh_command("sudo journalctl --list-boots | wc -l") - .unwrap() - .trim() - .parse::() - .unwrap_or_default(); - assert_eq!(boot_count, *current_reboot_count); -} - -fn check_guest_watchdog_no_reboot(guest: &Guest, current_reboot_count: &u32) { - // Allow some normal time to elapse to check we don't get spurious reboots - thread::sleep(std::time::Duration::new(40, 0)); - - // Check no reboot - let boot_count = guest - .ssh_command("sudo journalctl --list-boots | wc -l") - .unwrap() - .trim() - .parse::() - .unwrap_or_default(); - assert_eq!(boot_count, *current_reboot_count); -} - -fn check_guest_watchdog_one_reboot( - guest: &Guest, - _api_socket: &str, - current_reboot_count: &mut u32, -) { - // Trigger a panic (sync first). We need to do this inside a screen with a delay so the SSH command returns. - guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap(); - - // Allow some time for the watchdog to trigger (max 30s) and reboot to happen - guest.wait_vm_boot(Some(50)).unwrap(); - - // Check that watchdog triggered reboot - *current_reboot_count += 1; - let boot_count = guest - .ssh_command("sudo journalctl --list-boots | wc -l") - .unwrap() - .trim() - .parse::() - .unwrap_or_default(); - assert_eq!(boot_count, *current_reboot_count); - - #[cfg(target_arch = "x86_64")] - { - // Now pause the VM and remain offline for 30s - assert!(remote_command(_api_socket, "pause", None)); - thread::sleep(std::time::Duration::new(30, 0)); - assert!(remote_command(_api_socket, "resume", None)); - - // Check no reboot - let boot_count = guest - .ssh_command("sudo journalctl --list-boots | wc -l") - .unwrap() - .trim() - .parse::() - .unwrap_or_default(); - assert_eq!(boot_count, *current_reboot_count); - } } mod common_parallel { @@ -6097,10 +6031,50 @@ mod common_parallel { let r = std::panic::catch_unwind(|| { guest.wait_vm_boot(None).unwrap(); - let mut current_reboot_count = 1; - enable_guest_watchdog(&guest, &mut current_reboot_count); - check_guest_watchdog_no_reboot(&guest, ¤t_reboot_count); - check_guest_watchdog_one_reboot(&guest, &api_socket, &mut current_reboot_count); + + let mut expected_reboot_count = 1; + + // Enable the watchdog with a 15s timeout + enable_guest_watchdog(&guest, 15); + + // Reboot and check that systemd has activated the watchdog + guest.ssh_command("sudo reboot").unwrap(); + guest.wait_vm_boot(None).unwrap(); + expected_reboot_count += 1; + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + assert_eq!( + guest + .ssh_command("sudo journalctl | grep -c -- \"Watchdog started\"") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 2 + ); + + // Allow some normal time to elapse to check we don't get spurious reboots + thread::sleep(std::time::Duration::new(40, 0)); + // Check no reboot + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + + // Trigger a panic (sync first). We need to do this inside a screen with a delay so the SSH command returns. + guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap(); + // Allow some time for the watchdog to trigger (max 30s) and reboot to happen + guest.wait_vm_boot(Some(50)).unwrap(); + // Check a reboot is triggerred by the watchdog + expected_reboot_count += 1; + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + + #[cfg(target_arch = "x86_64")] + { + // Now pause the VM and remain offline for 30s + assert!(remote_command(&api_socket, "pause", None)); + thread::sleep(std::time::Duration::new(30, 0)); + assert!(remote_command(&api_socket, "resume", None)); + + // Check no reboot + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + } }); let _ = child.kill(); @@ -8453,9 +8427,30 @@ mod live_migration { } if watchdog { - let mut current_reboot_count = 1; - enable_guest_watchdog(&guest, &mut current_reboot_count); - check_guest_watchdog_no_reboot(&guest, ¤t_reboot_count); + let mut expected_reboot_count = 1; + + // Enable the watchdog with a 15s timeout + enable_guest_watchdog(&guest, 15); + + // Reboot and check that systemd has activated the watchdog + guest.ssh_command("sudo reboot").unwrap(); + guest.wait_vm_boot(None).unwrap(); + expected_reboot_count += 1; + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + assert_eq!( + guest + .ssh_command("sudo journalctl | grep -c -- \"Watchdog started\"") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 2 + ); + + // Allow some normal time to elapse to check we don't get spurious reboots + thread::sleep(std::time::Duration::new(40, 0)); + // Check no reboot + assert_eq!(get_reboot_count(&guest), expected_reboot_count); } // Start the live-migration @@ -8639,13 +8634,31 @@ mod live_migration { } if watchdog { - let mut current_reboot_count = 2; - check_guest_watchdog_no_reboot(&guest, ¤t_reboot_count); - check_guest_watchdog_one_reboot( - &guest, - &dest_api_socket, - &mut current_reboot_count, - ); + let mut expected_reboot_count = 2; + + // Allow some normal time to elapse to check we don't get spurious reboots + thread::sleep(std::time::Duration::new(40, 0)); + // Check no reboot + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + + // Trigger a panic (sync first). We need to do this inside a screen with a delay so the SSH command returns. + guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap(); + // Allow some time for the watchdog to trigger (max 30s) and reboot to happen + guest.wait_vm_boot(Some(50)).unwrap(); + // Check a reboot is triggerred by the watchdog + expected_reboot_count += 1; + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + + #[cfg(target_arch = "x86_64")] + { + // Now pause the VM and remain offline for 30s + assert!(remote_command(&dest_api_socket, "pause", None)); + thread::sleep(std::time::Duration::new(30, 0)); + assert!(remote_command(&dest_api_socket, "resume", None)); + + // Check no reboot + assert_eq!(get_reboot_count(&guest), expected_reboot_count); + } } if balloon {