mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-12-22 13:45:20 +00:00
tests: live-migration: Split watchdog tests into its own function
Signed-off-by: Bo Chen <chen.bo@intel.com>
This commit is contained in:
parent
e3d7824f09
commit
ff3e9eb668
@ -8358,13 +8358,7 @@ mod live_migration {
|
||||
// 4. The destination VM is functional (including various virtio-devices are working properly) after
|
||||
// live migration;
|
||||
// Note: This test does not use vsock as we can't create two identical vsock on the same host.
|
||||
fn _test_live_migration(
|
||||
upgrade_test: bool,
|
||||
numa: bool,
|
||||
local: bool,
|
||||
watchdog: bool,
|
||||
balloon: bool,
|
||||
) {
|
||||
fn _test_live_migration(upgrade_test: bool, numa: bool, local: bool, balloon: bool) {
|
||||
assert!(
|
||||
!(numa && balloon),
|
||||
"Invalid inputs: 'numa' and 'balloon' cannot be tested at the same time."
|
||||
@ -8455,9 +8449,6 @@ mod live_migration {
|
||||
"--pmem",
|
||||
format!("file={}", pmem_temp_file.as_path().to_str().unwrap(),).as_str(),
|
||||
]);
|
||||
if watchdog {
|
||||
src_vm_cmd.args(&["--watchdog"]);
|
||||
}
|
||||
let mut src_child = src_vm_cmd.capture_output().spawn().unwrap();
|
||||
|
||||
// Start the destination VM
|
||||
@ -8546,33 +8537,6 @@ mod live_migration {
|
||||
thread::sleep(std::time::Duration::new(10, 0));
|
||||
}
|
||||
|
||||
if watchdog {
|
||||
let mut expected_reboot_count = 1;
|
||||
|
||||
// Enable the watchdog with a 15s timeout
|
||||
enable_guest_watchdog(&guest, 15);
|
||||
|
||||
// Reboot and check that systemd has activated the watchdog
|
||||
guest.ssh_command("sudo reboot").unwrap();
|
||||
guest.wait_vm_boot(None).unwrap();
|
||||
expected_reboot_count += 1;
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
assert_eq!(
|
||||
guest
|
||||
.ssh_command("sudo journalctl | grep -c -- \"Watchdog started\"")
|
||||
.unwrap()
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.unwrap_or_default(),
|
||||
2
|
||||
);
|
||||
|
||||
// Allow some normal time to elapse to check we don't get spurious reboots
|
||||
thread::sleep(std::time::Duration::new(40, 0));
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
}
|
||||
|
||||
// Start the live-migration
|
||||
let migration_socket = String::from(
|
||||
guest
|
||||
@ -8666,34 +8630,6 @@ mod live_migration {
|
||||
}
|
||||
}
|
||||
|
||||
if watchdog {
|
||||
let mut expected_reboot_count = 2;
|
||||
|
||||
// Allow some normal time to elapse to check we don't get spurious reboots
|
||||
thread::sleep(std::time::Duration::new(40, 0));
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
|
||||
// Trigger a panic (sync first). We need to do this inside a screen with a delay so the SSH command returns.
|
||||
guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap();
|
||||
// Allow some time for the watchdog to trigger (max 30s) and reboot to happen
|
||||
guest.wait_vm_boot(Some(50)).unwrap();
|
||||
// Check a reboot is triggerred by the watchdog
|
||||
expected_reboot_count += 1;
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
// Now pause the VM and remain offline for 30s
|
||||
assert!(remote_command(&dest_api_socket, "pause", None));
|
||||
thread::sleep(std::time::Duration::new(30, 0));
|
||||
assert!(remote_command(&dest_api_socket, "resume", None));
|
||||
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
}
|
||||
}
|
||||
|
||||
if balloon {
|
||||
let total_memory = guest.get_total_memory().unwrap_or_default();
|
||||
assert!(total_memory > 4_800_000);
|
||||
@ -8723,6 +8659,208 @@ mod live_migration {
|
||||
handle_child_output(r, &dest_output);
|
||||
}
|
||||
|
||||
fn _test_live_migration_watchdog(upgrade_test: bool, local: bool) {
|
||||
let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string());
|
||||
let guest = Guest::new(Box::new(focal));
|
||||
let kernel_path = direct_kernel_boot_path();
|
||||
let console_text = String::from("On a branch floating down river a cricket, singing.");
|
||||
let net_id = "net123";
|
||||
let net_params = format!(
|
||||
"id={},tap=,mac={},ip={},mask=255.255.255.0",
|
||||
net_id, guest.network.guest_mac, guest.network.host_ip
|
||||
);
|
||||
|
||||
let memory_param: &[&str] = if local {
|
||||
&["--memory", "size=4G,shared=on"]
|
||||
} else {
|
||||
&["--memory", "size=4G"]
|
||||
};
|
||||
|
||||
let boot_vcpus = 2;
|
||||
let max_vcpus = 4;
|
||||
|
||||
let pmem_temp_file = TempFile::new().unwrap();
|
||||
pmem_temp_file.as_file().set_len(128 << 20).unwrap();
|
||||
std::process::Command::new("mkfs.ext4")
|
||||
.arg(pmem_temp_file.as_path())
|
||||
.output()
|
||||
.expect("Expect creating disk image to succeed");
|
||||
let pmem_path = String::from("/dev/pmem0");
|
||||
|
||||
// Start the source VM
|
||||
let src_vm_path = if !upgrade_test {
|
||||
clh_command("cloud-hypervisor")
|
||||
} else {
|
||||
cloud_hypervisor_release_path()
|
||||
};
|
||||
let src_api_socket = temp_api_path(&guest.tmp_dir);
|
||||
let mut src_vm_cmd = GuestCommand::new_with_binary_path(&guest, &src_vm_path);
|
||||
src_vm_cmd
|
||||
.args(&[
|
||||
"--cpus",
|
||||
format!("boot={},max={}", boot_vcpus, max_vcpus).as_str(),
|
||||
])
|
||||
.args(memory_param)
|
||||
.args(&["--kernel", kernel_path.to_str().unwrap()])
|
||||
.args(&["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE])
|
||||
.default_disks()
|
||||
.args(&["--net", net_params.as_str()])
|
||||
.args(&["--api-socket", &src_api_socket])
|
||||
.args(&[
|
||||
"--pmem",
|
||||
format!("file={}", pmem_temp_file.as_path().to_str().unwrap(),).as_str(),
|
||||
])
|
||||
.args(&["--watchdog"]);
|
||||
let mut src_child = src_vm_cmd.capture_output().spawn().unwrap();
|
||||
|
||||
// Start the destination VM
|
||||
let mut dest_api_socket = temp_api_path(&guest.tmp_dir);
|
||||
dest_api_socket.push_str(".dest");
|
||||
let mut dest_child = GuestCommand::new(&guest)
|
||||
.args(&["--api-socket", &dest_api_socket])
|
||||
.capture_output()
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
let r = std::panic::catch_unwind(|| {
|
||||
guest.wait_vm_boot(None).unwrap();
|
||||
|
||||
// Make sure the source VM is functaionl
|
||||
// Check the number of vCPUs
|
||||
assert_eq!(guest.get_cpu_count().unwrap_or_default(), boot_vcpus);
|
||||
// Check the guest RAM
|
||||
assert!(guest.get_total_memory().unwrap_or_default() > 3_840_000);
|
||||
// Check the guest virtio-devices, e.g. block, rng, console, and net
|
||||
guest.check_devices_common(None, Some(&console_text), Some(&pmem_path));
|
||||
// x86_64: Following what's done in the `test_snapshot_restore`, we need
|
||||
// to make sure that removing and adding back the virtio-net device does
|
||||
// not break the live-migration support for virtio-pci.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
assert!(remote_command(
|
||||
&src_api_socket,
|
||||
"remove-device",
|
||||
Some(net_id),
|
||||
));
|
||||
thread::sleep(std::time::Duration::new(10, 0));
|
||||
|
||||
// Plug the virtio-net device again
|
||||
assert!(remote_command(
|
||||
&src_api_socket,
|
||||
"add-net",
|
||||
Some(net_params.as_str()),
|
||||
));
|
||||
thread::sleep(std::time::Duration::new(10, 0));
|
||||
}
|
||||
|
||||
// Enable watchdog and ensure its functional
|
||||
let mut expected_reboot_count = 1;
|
||||
// Enable the watchdog with a 15s timeout
|
||||
enable_guest_watchdog(&guest, 15);
|
||||
// Reboot and check that systemd has activated the watchdog
|
||||
guest.ssh_command("sudo reboot").unwrap();
|
||||
guest.wait_vm_boot(None).unwrap();
|
||||
expected_reboot_count += 1;
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
assert_eq!(
|
||||
guest
|
||||
.ssh_command("sudo journalctl | grep -c -- \"Watchdog started\"")
|
||||
.unwrap()
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.unwrap_or_default(),
|
||||
2
|
||||
);
|
||||
// Allow some normal time to elapse to check we don't get spurious reboots
|
||||
thread::sleep(std::time::Duration::new(40, 0));
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
|
||||
// Start the live-migration
|
||||
let migration_socket = String::from(
|
||||
guest
|
||||
.tmp_dir
|
||||
.as_path()
|
||||
.join("live-migration.sock")
|
||||
.to_str()
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
assert!(
|
||||
start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local),
|
||||
"Unsuccessful command: 'send-migration' or 'receive-migration'."
|
||||
);
|
||||
});
|
||||
|
||||
// Check and report any errors occured during the live-migration
|
||||
if r.is_err() {
|
||||
print_and_panic(
|
||||
src_child,
|
||||
dest_child,
|
||||
None,
|
||||
"Error occured during live-migration",
|
||||
);
|
||||
}
|
||||
|
||||
// Check the source vm has been terminated successful (give it '3s' to settle)
|
||||
thread::sleep(std::time::Duration::new(3, 0));
|
||||
if !src_child.try_wait().unwrap().map_or(false, |s| s.success()) {
|
||||
print_and_panic(
|
||||
src_child,
|
||||
dest_child,
|
||||
None,
|
||||
"source VM was not terminated successfully.",
|
||||
);
|
||||
};
|
||||
|
||||
// Post live-migration check to make sure the destination VM is funcational
|
||||
let r = std::panic::catch_unwind(|| {
|
||||
// Perform same checks to validate VM has been properly migrated
|
||||
assert_eq!(guest.get_cpu_count().unwrap_or_default(), boot_vcpus);
|
||||
assert!(guest.get_total_memory().unwrap_or_default() > 3_840_000);
|
||||
|
||||
guest.check_devices_common(None, Some(&console_text), Some(&pmem_path));
|
||||
|
||||
// Perform checks on watchdog
|
||||
let mut expected_reboot_count = 2;
|
||||
|
||||
// Allow some normal time to elapse to check we don't get spurious reboots
|
||||
thread::sleep(std::time::Duration::new(40, 0));
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
|
||||
// Trigger a panic (sync first). We need to do this inside a screen with a delay so the SSH command returns.
|
||||
guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap();
|
||||
// Allow some time for the watchdog to trigger (max 30s) and reboot to happen
|
||||
guest.wait_vm_boot(Some(50)).unwrap();
|
||||
// Check a reboot is triggerred by the watchdog
|
||||
expected_reboot_count += 1;
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
// Now pause the VM and remain offline for 30s
|
||||
assert!(remote_command(&dest_api_socket, "pause", None));
|
||||
thread::sleep(std::time::Duration::new(30, 0));
|
||||
assert!(remote_command(&dest_api_socket, "resume", None));
|
||||
|
||||
// Check no reboot
|
||||
assert_eq!(get_reboot_count(&guest), expected_reboot_count);
|
||||
}
|
||||
});
|
||||
|
||||
// Clean-up the destination VM and make sure it terminated correctly
|
||||
let _ = dest_child.kill();
|
||||
let dest_output = dest_child.wait_with_output().unwrap();
|
||||
handle_child_output(r, &dest_output);
|
||||
|
||||
// Check the destination VM has the expected 'concole_text' from its output
|
||||
let r = std::panic::catch_unwind(|| {
|
||||
assert!(String::from_utf8_lossy(&dest_output.stdout).contains(&console_text));
|
||||
});
|
||||
handle_child_output(r, &dest_output);
|
||||
}
|
||||
|
||||
fn _test_live_migration_ovs_dpdk(upgrade_test: bool, local: bool) {
|
||||
let ovs_focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string());
|
||||
let ovs_guest = Guest::new(Box::new(ovs_focal));
|
||||
@ -8825,86 +8963,86 @@ mod live_migration {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_live_migration_basic() {
|
||||
_test_live_migration(false, false, false, false, false)
|
||||
_test_live_migration(false, false, false, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_migration_local() {
|
||||
_test_live_migration(false, false, true, false, false)
|
||||
_test_live_migration(false, false, true, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "mshv"))]
|
||||
fn test_live_migration_numa() {
|
||||
_test_live_migration(false, true, false, false, false)
|
||||
_test_live_migration(false, true, false, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "mshv"))]
|
||||
fn test_live_migration_numa_local() {
|
||||
_test_live_migration(false, true, true, false, false)
|
||||
_test_live_migration(false, true, true, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_migration_watchdog() {
|
||||
_test_live_migration(false, false, false, true, false)
|
||||
_test_live_migration_watchdog(false, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_migration_watchdog_local() {
|
||||
_test_live_migration(false, false, true, true, false)
|
||||
_test_live_migration_watchdog(false, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_migration_balloon() {
|
||||
_test_live_migration(false, false, false, false, true)
|
||||
_test_live_migration(false, false, false, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_migration_balloon_local() {
|
||||
_test_live_migration(false, false, true, false, true)
|
||||
_test_live_migration(false, false, true, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_basic() {
|
||||
_test_live_migration(true, false, false, false, false)
|
||||
_test_live_migration(true, false, false, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_local() {
|
||||
_test_live_migration(true, false, true, false, false)
|
||||
_test_live_migration(true, false, true, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "mshv"))]
|
||||
fn test_live_upgrade_numa() {
|
||||
_test_live_migration(true, true, false, false, false)
|
||||
_test_live_migration(true, true, false, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "mshv"))]
|
||||
fn test_live_upgrade_numa_local() {
|
||||
_test_live_migration(true, true, true, false, false)
|
||||
_test_live_migration(true, true, true, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_watchdog() {
|
||||
_test_live_migration(true, false, false, true, false)
|
||||
_test_live_migration_watchdog(true, false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_watchdog_local() {
|
||||
_test_live_migration(true, false, true, true, false)
|
||||
_test_live_migration_watchdog(true, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_balloon() {
|
||||
_test_live_migration(true, false, false, false, true)
|
||||
_test_live_migration(true, false, false, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_live_upgrade_balloon_local() {
|
||||
_test_live_migration(true, false, true, false, true)
|
||||
_test_live_migration(true, false, true, true)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user