tests: Stabilize snapshot_restore tests

Since the 'write()' to the event file was moved to its own thread
(see #5633), we have no reliable way to read the latest contents of
the event file from our integration tests, since we can't ensure the
'read()' from our test always happen after 'write()' is completed from
Cloud Hypervisor. This is also why we started to see random failures on
snapshot_restore tests (particularly when the system workload is high).

This patch adds a 1s sleep before reading the event file to mitigate the
random failures.

Signed-off-by: Bo Chen <chen.bo@intel.com>
This commit is contained in:
Bo Chen 2023-11-09 11:26:57 -08:00 committed by Rob Bradford
parent d4892f41b3
commit de2fcc2d87

View File

@ -6080,6 +6080,12 @@ mod common_parallel {
let r = std::panic::catch_unwind(|| { let r = std::panic::catch_unwind(|| {
// Resume the VM // Resume the VM
assert!(remote_command(&api_socket_restored, "resume", None)); assert!(remote_command(&api_socket_restored, "resume", None));
// There is no way that we can ensure the 'write()' to the
// event file is completed when the 'resume' request is
// returned successfully, because the 'write()' was done
// asynchronously from a different thread of Cloud
// Hypervisor (e.g. the event-monitor thread).
thread::sleep(std::time::Duration::new(1, 0));
let latest_events = [ let latest_events = [
&MetaEvent { &MetaEvent {
event: "resuming".to_string(), event: "resuming".to_string(),