diff --git a/scripts/run_integration_tests_vfio.sh b/scripts/run_integration_tests_vfio.sh index 542dfe973..77db60500 100755 --- a/scripts/run_integration_tests_vfio.sh +++ b/scripts/run_integration_tests_vfio.sh @@ -8,6 +8,33 @@ process_common_args "$@" # For now these values are default for kvm features="" +WORKLOADS_DIR="$HOME/workloads" +FOCAL_OS_IMAGE_NAME="focal-server-cloudimg-amd64-custom-20210609-0.raw" +FOCAL_OS_IMAGE="$WORKLOADS_DIR/$FOCAL_OS_IMAGE_NAME" +FW="$WORKLOADS_DIR/hypervisor-fw" +VMLINUX_IMAGE="$WORKLOADS_DIR/vmlinux" + +BLK_IMAGE="$WORKLOADS_DIR/blk.img" +MNT_DIR="mount_image" +rm -rf $BLK_IMAGE +pushd $WORKLOADS_DIR +fallocate -l 16M $BLK_IMAGE +mkfs.ext4 -j $BLK_IMAGE +mkdir $MNT_DIR +sudo mount -t ext4 $BLK_IMAGE $MNT_DIR +sudo bash -c "echo bar > $MNT_DIR/foo" || exit 1 +sudo umount $BLK_IMAGE +rm -r $MNT_DIR +popd + +VFIO_DIR="$WORKLOADS_DIR/vfio" +VFIO_DISK_IMAGE="$WORKLOADS_DIR/vfio.img" +rm -rf $VFIO_DIR $VFIO_DISK_IMAGE +mkdir -p $VFIO_DIR +cp $FOCAL_OS_IMAGE $VFIO_DIR +cp $FW $VFIO_DIR +cp $VMLINUX_IMAGE $VFIO_DIR || exit 1 + BUILD_TARGET="$(uname -m)-unknown-linux-${CH_LIBC}" CFLAGS="" TARGET_CC="" @@ -18,10 +45,23 @@ fi cargo build --all --release $features --target $BUILD_TARGET strip target/$BUILD_TARGET/release/cloud-hypervisor +strip target/$BUILD_TARGET/release/ch-remote + +# We always copy a fresh version of our binary for our L2 guest. +cp target/$BUILD_TARGET/release/cloud-hypervisor $VFIO_DIR +cp target/$BUILD_TARGET/release/ch-remote $VFIO_DIR + +# test_vfio rely on hugepages +echo 6144 | sudo tee /proc/sys/vm/nr_hugepages +sudo chmod a+rwX /dev/hugepages export RUST_BACKTRACE=1 - -time cargo test $features "vfio::$test_filter" -- --test-threads=1 ${test_binary_args[*]} +time cargo test $features "vfio::test_vfio" -- ${test_binary_args[*]} RES=$? +if [ $RES -eq 0 ]; then + time cargo test $features "vfio::test_nvidia" -- --test-threads=1 ${test_binary_args[*]} + RES=$? +fi + exit $RES diff --git a/tests/integration.rs b/tests/integration.rs index 9b3385685..8c1c7be13 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -7681,6 +7681,269 @@ mod sgx { mod vfio { use crate::*; + #[test] + // The VFIO integration test starts cloud-hypervisor guest with 3 TAP + // backed networking interfaces, bound through a simple bridge on the host. + // So if the nested cloud-hypervisor succeeds in getting a directly + // assigned interface from its cloud-hypervisor host, we should be able to + // ssh into it, and verify that it's running with the right kernel command + // line (We tag the command line from cloud-hypervisor for that purpose). + // The third device is added to validate that hotplug works correctly since + // it is being added to the L2 VM through hotplugging mechanism. + // Also, we pass-through a vitio-blk device to the L2 VM to test the 32-bit + // vfio device support + fn test_vfio() { + setup_vfio_network_interfaces(); + + let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); + let guest = Guest::new_from_ip_range(Box::new(focal), "172.18", 0); + + let mut workload_path = dirs::home_dir().unwrap(); + workload_path.push("workloads"); + + let kernel_path = direct_kernel_boot_path(); + + let mut vfio_path = workload_path.clone(); + vfio_path.push("vfio"); + + let mut cloud_init_vfio_base_path = vfio_path.clone(); + cloud_init_vfio_base_path.push("cloudinit.img"); + + // We copy our cloudinit into the vfio mount point, for the nested + // cloud-hypervisor guest to use. + rate_limited_copy( + &guest.disk_config.disk(DiskType::CloudInit).unwrap(), + &cloud_init_vfio_base_path, + ) + .expect("copying of cloud-init disk failed"); + + let mut vfio_disk_path = workload_path.clone(); + vfio_disk_path.push("vfio.img"); + + // Create the vfio disk image + let output = Command::new("mkfs.ext4") + .arg("-d") + .arg(vfio_path.to_str().unwrap()) + .arg(vfio_disk_path.to_str().unwrap()) + .arg("2g") + .output() + .unwrap(); + if !output.status.success() { + eprintln!("{}", String::from_utf8_lossy(&output.stderr)); + panic!("mkfs.ext4 command generated an error"); + } + + let mut blk_file_path = workload_path; + blk_file_path.push("blk.img"); + + let vfio_tap0 = "vfio-tap0"; + let vfio_tap1 = "vfio-tap1"; + let vfio_tap2 = "vfio-tap2"; + let vfio_tap3 = "vfio-tap3"; + + let mut child = GuestCommand::new(&guest) + .args(&["--cpus", "boot=4"]) + .args(&["--memory", "size=2G,hugepages=on,shared=on"]) + .args(&["--kernel", kernel_path.to_str().unwrap()]) + .args(&[ + "--disk", + format!( + "path={}", + guest.disk_config.disk(DiskType::OperatingSystem).unwrap() + ) + .as_str(), + format!( + "path={}", + guest.disk_config.disk(DiskType::CloudInit).unwrap() + ) + .as_str(), + format!("path={}", vfio_disk_path.to_str().unwrap()).as_str(), + format!("path={},iommu=on", blk_file_path.to_str().unwrap()).as_str(), + ]) + .args(&[ + "--cmdline", + format!( + "{} kvm-intel.nested=1 vfio_iommu_type1.allow_unsafe_interrupts", + DIRECT_KERNEL_BOOT_CMDLINE + ) + .as_str(), + ]) + .args(&[ + "--net", + format!("tap={},mac={}", vfio_tap0, guest.network.guest_mac).as_str(), + format!( + "tap={},mac={},iommu=on", + vfio_tap1, guest.network.l2_guest_mac1 + ) + .as_str(), + format!( + "tap={},mac={},iommu=on", + vfio_tap2, guest.network.l2_guest_mac2 + ) + .as_str(), + format!( + "tap={},mac={},iommu=on", + vfio_tap3, guest.network.l2_guest_mac3 + ) + .as_str(), + ]) + .capture_output() + .spawn() + .unwrap(); + + thread::sleep(std::time::Duration::new(30, 0)); + + let r = std::panic::catch_unwind(|| { + guest.ssh_command_l1("sudo systemctl start vfio").unwrap(); + thread::sleep(std::time::Duration::new(120, 0)); + + // We booted our cloud hypervisor L2 guest with a "VFIOTAG" tag + // added to its kernel command line. + // Let's ssh into it and verify that it's there. If it is it means + // we're in the right guest (The L2 one) because the QEMU L1 guest + // does not have this command line tag. + assert_eq!( + guest + .ssh_command_l2_1("grep -c VFIOTAG /proc/cmdline") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 1 + ); + + // Let's also verify from the second virtio-net device passed to + // the L2 VM. + assert_eq!( + guest + .ssh_command_l2_2("grep -c VFIOTAG /proc/cmdline") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 1 + ); + + // Check the amount of PCI devices appearing in L2 VM. + assert_eq!( + guest + .ssh_command_l2_1("ls /sys/bus/pci/devices | wc -l") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 8, + ); + + // Check both if /dev/vdc exists and if the block size is 16M in L2 VM + assert_eq!( + guest + .ssh_command_l2_1("lsblk | grep vdc | grep -c 16M") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 1 + ); + + // Hotplug an extra virtio-net device through L2 VM. + guest + .ssh_command_l1( + "echo 0000:00:09.0 | sudo tee /sys/bus/pci/devices/0000:00:09.0/driver/unbind", + ) + .unwrap(); + guest + .ssh_command_l1("echo 0000:00:09.0 | sudo tee /sys/bus/pci/drivers/vfio-pci/bind") + .unwrap(); + let vfio_hotplug_output = guest + .ssh_command_l1( + "sudo /mnt/ch-remote \ + --api-socket=/tmp/ch_api.sock \ + add-device path=/sys/bus/pci/devices/0000:00:09.0,id=vfio123", + ) + .unwrap(); + assert!(vfio_hotplug_output.contains("{\"id\":\"vfio123\",\"bdf\":\"0000:00:08.0\"}")); + + thread::sleep(std::time::Duration::new(10, 0)); + + // Let's also verify from the third virtio-net device passed to + // the L2 VM. This third device has been hotplugged through the L2 + // VM, so this is our way to validate hotplug works for VFIO PCI. + assert_eq!( + guest + .ssh_command_l2_3("grep -c VFIOTAG /proc/cmdline") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 1 + ); + + // Check the amount of PCI devices appearing in L2 VM. + // There should be one more device than before, raising the count + // up to 9 PCI devices. + assert_eq!( + guest + .ssh_command_l2_1("ls /sys/bus/pci/devices | wc -l") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 9, + ); + + // Let's now verify that we can correctly remove the virtio-net + // device through the "remove-device" command responsible for + // unplugging VFIO devices. + guest + .ssh_command_l1( + "sudo /mnt/ch-remote \ + --api-socket=/tmp/ch_api.sock \ + remove-device vfio123", + ) + .unwrap(); + thread::sleep(std::time::Duration::new(10, 0)); + + // Check the amount of PCI devices appearing in L2 VM is back down + // to 8 devices. + assert_eq!( + guest + .ssh_command_l2_1("ls /sys/bus/pci/devices | wc -l") + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 8, + ); + + // Perform memory hotplug in L2 and validate the memory is showing + // up as expected. In order to check, we will use the virtio-net + // device already passed through L2 as a VFIO device, this will + // verify that VFIO devices are functional with memory hotplug. + assert!(guest.get_total_memory_l2().unwrap_or_default() > 480_000); + guest + .ssh_command_l2_1( + "sudo bash -c 'echo online > /sys/devices/system/memory/auto_online_blocks'", + ) + .unwrap(); + guest + .ssh_command_l1( + "sudo /mnt/ch-remote \ + --api-socket=/tmp/ch_api.sock \ + resize --memory=1073741824", + ) + .unwrap(); + assert!(guest.get_total_memory_l2().unwrap_or_default() > 960_000); + }); + + let _ = child.kill(); + let output = child.wait_with_output().unwrap(); + + cleanup_vfio_network_interfaces(); + + handle_child_output(r, &output); + } + fn test_nvidia_card_memory_hotplug(hotplug_method: &str) { let hirsute = UbuntuDiskConfig::new(HIRSUTE_NVIDIA_IMAGE_NAME.to_string()); let guest = Guest::new(Box::new(hirsute));