From 8c92d1dbdc7617b88d34687115e0e69bd78b332c Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 26 Oct 2020 16:46:04 -0700 Subject: [PATCH] tests: Wait explicitly for the guest vm to boot Instead of blindly waiting for 20-40s for the guest VM to boot, this patch waits the notification from the guest VM explicitly by using a simple TcpListener on the host and a custom systemd service in the guest. This patch also ported few tests to use this new machanism, while more tests are to be ported. Signed-off-by: Bo Chen --- Cargo.lock | 1 + Cargo.toml | 1 + test_data/cloud-init/ubuntu/user-data | 28 +++++ tests/integration.rs | 151 +++++++++++++++++++++++--- 4 files changed, 167 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70b4f9eee..0ecbbf54d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,6 +217,7 @@ dependencies = [ "clap", "credibility", "dirs", + "epoll", "hypervisor", "lazy_static", "libc", diff --git a/Cargo.toml b/Cargo.toml index 2e52f0cd9..194947b45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ lto = true anyhow = "1.0" api_client = { path = "api_client" } clap = { version = "2.33.3", features = ["wrap_help"] } +epoll = ">=4.0.1" hypervisor = { path = "hypervisor" } libc = "0.2.80" log = { version = "0.4.11", features = ["std"] } diff --git a/test_data/cloud-init/ubuntu/user-data b/test_data/cloud-init/ubuntu/user-data index 86ebff765..fefa3770f 100644 --- a/test_data/cloud-init/ubuntu/user-data +++ b/test_data/cloud-init/ubuntu/user-data @@ -9,6 +9,11 @@ users: ssh_pwauth: True +runcmd: + - [ systemctl, daemon-reload] + - [ systemctl, enable, notify-booted.service] + - [ systemctl, start, --no-block, notify-booted.service ] + write_files: - path: /etc/systemd/system/vfio.service @@ -39,3 +44,26 @@ write_files: echo 512 | sudo tee /proc/sys/vm/nr_hugepages sudo chmod a+rwX /dev/hugepages /mnt/cloud-hypervisor --kernel /mnt/vmlinux --cmdline "console=hvc0 reboot=k panic=1 nomodules i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd root=/dev/vda1 VFIOTAG" --disk path=/mnt/focal-server-cloudimg-amd64-custom.qcow2 path=/mnt/cloudinit.img --cpus boot=1 --memory size=512M,hotplug_size=1G,hugepages=on --device path=/sys/bus/pci/devices/0000:00:06.0/ path=/sys/bus/pci/devices/0000:00:07.0/ --api-socket /tmp/ch_api.sock + + - + path: /etc/systemd/system/notify-booted.service + permissions: 0644 + content: | + [Unit] + Description=Notify the tcp listener on the host that the guest is booted + After=sshd.service + + [Service] + Type=simple + ExecStart=/usr/bin/cloud-hypervisor-notify-booted.sh + + [Install] + WantedBy=multi-user.target + + - + path: /usr/bin/cloud-hypervisor-notify-booted.sh + permissions: 0755 + content: | + #!/bin/bash + + echo -n "@DEFAULT_TCP_LISTENER_MESSAGE" | nc -w0 @HOST_IP @TCP_LISTENER_PORT diff --git a/tests/integration.rs b/tests/integration.rs index a4aa0147d..9ba33076c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3,8 +3,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#[cfg(test)] -#[cfg(feature = "integration_tests")] #[cfg(test)] #[cfg(feature = "integration_tests")] #[macro_use] @@ -23,10 +21,11 @@ mod tests { use std::io; use std::io::BufRead; use std::io::{Read, Write}; - use std::net::TcpStream; + use std::net::{TcpListener, TcpStream}; use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use std::process::{Child, Command, Stdio}; + use std::str::FromStr; use std::string::String; use std::sync::Mutex; use std::thread; @@ -47,8 +46,13 @@ mod tests { l2_guest_mac1: String, l2_guest_mac2: String, l2_guest_mac3: String, + tcp_listener_port: u16, } + const DEFAULT_TCP_LISTENER_MESSAGE: &str = "booted"; + const DEFAULT_TCP_LISTENER_PORT: u16 = 8000; + const DEFAULT_TCP_LISTENER_TIMEOUT: i32 = 40; + struct Guest<'a> { tmp_dir: TempDir, disk_config: &'a dyn DiskConfig, @@ -205,11 +209,29 @@ mod tests { .join("cloud-init") .join("ubuntu"); - vec!["meta-data", "user-data"].iter().for_each(|x| { + vec!["meta-data"].iter().for_each(|x| { rate_limited_copy(source_file_dir.join(x), cloud_init_directory.join(x)) .expect("Expect copying cloud-init meta-data to succeed"); }); + let mut user_data_string = String::new(); + fs::File::open(source_file_dir.join("user-data")) + .unwrap() + .read_to_string(&mut user_data_string) + .expect("Expected reading user-data file in to succeed"); + user_data_string = user_data_string.replace( + "@DEFAULT_TCP_LISTENER_MESSAGE", + &DEFAULT_TCP_LISTENER_MESSAGE, + ); + user_data_string = user_data_string.replace("@HOST_IP", &network.host_ip); + user_data_string = user_data_string + .replace("@TCP_LISTENER_PORT", &network.tcp_listener_port.to_string()); + + fs::File::create(cloud_init_directory.join("user-data")) + .unwrap() + .write_all(&user_data_string.as_bytes()) + .expect("Expected writing out user-data to succeed"); + let mut network_config_string = String::new(); fs::File::open(source_file_dir.join("network-config")) @@ -615,6 +637,12 @@ mod tests { ChannelSession(ssh2::Error), Command(ssh2::Error), Parsing(std::num::ParseIntError), + EpollWait(std::io::Error), + EpollWaitTimeout, + ReadToString(std::io::Error), + SetReadTimeout(std::io::Error), + WrongGuestAddr, + WrongGuestMsg, } impl std::error::Error for Error {} @@ -648,6 +676,7 @@ mod tests { l2_guest_mac1: format!("de:ad:be:ef:12:{:02x}", id), l2_guest_mac2: format!("de:ad:be:ef:34:{:02x}", id), l2_guest_mac3: format!("de:ad:be:ef:56:{:02x}", id), + tcp_listener_port: DEFAULT_TCP_LISTENER_PORT + id as u16, }; disk_config.prepare_files(&tmp_dir, &network); @@ -798,6 +827,97 @@ mod tests { .map_err(Error::Parsing)?) } + fn wait_vm_boot(&self) -> Result<(), Error> { + let start = std::time::Instant::now(); + // The 'port' is unique per 'GUEST' and listening to wild-card ip avoids retrying on 'TcpListener::bind()' + let listen_addr = format!("0.0.0.0:{}", self.network.tcp_listener_port); + let expected_guest_addr = self.network.guest_ip.as_str(); + let mut s = String::new(); + + match (|| -> Result<(), Error> { + let listener = + TcpListener::bind(&listen_addr.as_str()).map_err(Error::Connection)?; + listener + .set_nonblocking(true) + .expect("Cannot set non-blocking for tcp listener"); + + // Reply on epoll w/ timeout to wait for guest connections faithfully + let epoll_fd = epoll::create(true).expect("Cannot create epoll fd"); + epoll::ctl( + epoll_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + listener.as_raw_fd(), + epoll::Event::new(epoll::Events::EPOLLIN, 0), + ) + .expect("Cannot add 'tcp_listener' event to epoll"); + let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 1]; + let num_events = epoll::wait( + epoll_fd, + DEFAULT_TCP_LISTENER_TIMEOUT * 1000 as i32, + &mut events[..], + ) + .map_err(Error::EpollWait)?; + if num_events == 0 { + return Err(Error::EpollWaitTimeout); + } + + match listener.accept() { + Ok((mut stream, addr)) => { + // Make sure the connection is from the expected 'guest_addr' + if addr.ip() != std::net::IpAddr::from_str(expected_guest_addr).unwrap() { + s = format!( + "Expecting the guest ip '{}' while being connected with ip '{}'", + expected_guest_addr, + addr.ip() + ); + return Err(Error::WrongGuestAddr); + } + + // Make sure the right message is to notify the guest VM is booted + let mut data = String::new(); + stream + .set_read_timeout(Some(std::time::Duration::new( + DEFAULT_TCP_LISTENER_TIMEOUT as u64, + 0, + ))) + .map_err(Error::SetReadTimeout)?; + stream + .read_to_string(&mut data) + .map_err(Error::ReadToString)?; + if data != DEFAULT_TCP_LISTENER_MESSAGE { + s = format!( + "Expecting the guest message '{}' while receiving the message '{}'", + DEFAULT_TCP_LISTENER_MESSAGE, data + ); + return Err(Error::WrongGuestMsg); + }; + + Ok(()) + } + Err(e) => { + s = "TcpListener::accept() failed".to_string(); + Err(Error::Connection(e)) + } + } + })() { + Err(e) => { + let duration = start.elapsed(); + eprintln!( + "\n\n==== Start 'wait_vm_boot' (FAILED) ====\n\n\ + duration =\"{:?}\"\n\ + listen_addr=\"{}\"\n\ + expected_guest_addr=\"{}\"\n\ + message =\"{}\"\n\ + \n==== End 'wait_vm_boot' outout ====\n\n", + duration, listen_addr, expected_guest_addr, s, + ); + + Err(e) + } + Ok(_) => Ok(()), + } + } + fn check_numa_node_cpus(&self, node_id: usize, cpus: Vec) -> Result { for cpu in cpus.iter() { let cmd = format!( @@ -3236,9 +3356,9 @@ mod tests { .spawn() .unwrap(); - thread::sleep(std::time::Duration::new(20, 0)); - let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot().unwrap(); + // Test that there is no ttyS0 assert_eq!( guest @@ -3277,9 +3397,9 @@ mod tests { let mut child = cmd.spawn().unwrap(); - thread::sleep(std::time::Duration::new(20, 0)); - let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot().unwrap(); + #[cfg(target_arch = "x86_64")] // Test that there is a ttyS0 assert_eq!( @@ -3328,9 +3448,9 @@ mod tests { .spawn() .unwrap(); - thread::sleep(std::time::Duration::new(20, 0)); - let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot().unwrap(); + // Test that there is a ttyS0 assert_eq!( guest @@ -3343,6 +3463,9 @@ mod tests { ); }); + // This sleep is needed to wait for the login prompt + thread::sleep(std::time::Duration::new(2, 0)); + let _ = child.kill(); let output = child.wait_with_output().unwrap(); handle_child_output(r, &output); @@ -3375,9 +3498,9 @@ mod tests { .spawn() .unwrap(); - thread::sleep(std::time::Duration::new(20, 0)); - let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot().unwrap(); + // Test that there is a ttyS0 assert_eq!( guest @@ -5305,9 +5428,9 @@ mod tests { let mut child = cmd.spawn().unwrap(); - thread::sleep(std::time::Duration::new(20, 0)); - let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot().unwrap(); + let orig_counters = get_counters(&api_socket); assert!(guest .ssh_command("dd if=/dev/zero of=test count=8 bs=1M")