From 04dc49680809c258070e304004a82f636b21576c Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 16 Mar 2021 16:11:00 +0100 Subject: [PATCH] ci: Enable baremetal VFIO integration tests Relying on a NVIDIA Tesla T4 card present in the SGX machine, this patch enables baremetal VFIO testing, validated by running several NVIDIA tools in the guest. The guest image has been prepared to include all the software needed to run these tests. Signed-off-by: Sebastien Boeuf --- Jenkinsfile | 33 ++++++++++++++ scripts/dev_cli.sh | 22 ++++++++++ scripts/run_integration_tests_vfio.sh | 28 ++++++++++++ tests/integration.rs | 63 +++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100755 scripts/run_integration_tests_vfio.sh diff --git a/Jenkinsfile b/Jenkinsfile index 37008649e..8facceace 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -136,6 +136,39 @@ pipeline{ } } } + stage ('Worker build VFIO') { + agent { node { label 'bionic-vfio' } } + when { branch 'master' } + stages { + stage ('Checkout') { + steps { + checkout scm + } + } + stage ('Run VFIO integration tests') { + options { + timeout(time: 1, unit: 'HOURS') + } + steps { + sh "scripts/dev_cli.sh tests --integration-vfio" + } + } + stage ('Run VFIO integration tests for musl') { + options { + timeout(time: 1, unit: 'HOURS') + } + steps { + sh "scripts/dev_cli.sh tests --integration-vfio --libc musl" + } + } + } + post { + always { + sh "sudo chown -R jenkins.jenkins ${WORKSPACE}" + deleteDir() + } + } + } stage ('Worker build - Windows guest') { agent { node { label 'groovy-win' } } stages { diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index fd26c6acd..5b31f3f03 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -182,6 +182,7 @@ cmd_help() { echo " --cargo Run the cargo tests." echo " --integration Run the integration tests." echo " --integration-sgx Run the SGX integration tests." + echo " --integration-vfio Run the VFIO integration tests." echo " --integration-windows Run the Windows guest integration tests." echo " --libc Select the C library Cloud Hypervisor will be built against. Default is gnu" echo " --volumes Hash separated volumes to be exported. Example --volumes /mnt:/mnt#/myvol:/myvol" @@ -298,6 +299,7 @@ cmd_tests() { cargo=false integration=false integration_sgx=false + integration_vfio=false integration_windows=false libc="gnu" arg_vols="" @@ -310,6 +312,7 @@ cmd_tests() { "--cargo") { cargo=true; } ;; "--integration") { integration=true; } ;; "--integration-sgx") { integration_sgx=true; } ;; + "--integration-vfio") { integration_vfio=true; } ;; "--integration-windows") { integration_windows=true; } ;; "--libc") shift @@ -419,6 +422,25 @@ cmd_tests() { ./scripts/run_integration_tests_sgx.sh "$@" || fix_dir_perms $? || exit $? fi + if [ "$integration_vfio" = true ] ; then + say "Running VFIO integration tests for $target..." + $DOCKER_RUNTIME run \ + --workdir "$CTR_CLH_ROOT_DIR" \ + --rm \ + --privileged \ + --security-opt seccomp=unconfined \ + --ipc=host \ + --net="$CTR_CLH_NET" \ + --mount type=tmpfs,destination=/tmp \ + --volume /dev:/dev \ + --volume "$CLH_ROOT_DIR:$CTR_CLH_ROOT_DIR" $exported_volumes \ + --volume "$CLH_INTEGRATION_WORKLOADS:$CTR_CLH_INTEGRATION_WORKLOADS" \ + --env USER="root" \ + --env CH_LIBC="${libc}" \ + "$CTR_IMAGE" \ + ./scripts/run_integration_tests_vfio.sh "$@" || fix_dir_perms $? || exit $? + fi + if [ "$integration_windows" = true ] ; then say "Running Windows integration tests for $target..." $DOCKER_RUNTIME run \ diff --git a/scripts/run_integration_tests_vfio.sh b/scripts/run_integration_tests_vfio.sh new file mode 100755 index 000000000..424b82dbf --- /dev/null +++ b/scripts/run_integration_tests_vfio.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -x + +source $HOME/.cargo/env +source $(dirname "$0")/test-util.sh + +process_common_args "$@" +# For now these values are deafult for kvm +features_build="" +features_test="--features integration_tests" + +BUILD_TARGET="$(uname -m)-unknown-linux-${CH_LIBC}" +CFLAGS="" +TARGET_CC="" +if [[ "${BUILD_TARGET}" == "x86_64-unknown-linux-musl" ]]; then +TARGET_CC="musl-gcc" +CFLAGS="-I /usr/include/x86_64-linux-musl/ -idirafter /usr/include/" +fi + +cargo build --all --release $features_build --target $BUILD_TARGET +strip target/$BUILD_TARGET/release/cloud-hypervisor + +export RUST_BACKTRACE=1 + +time cargo test $features_test "tests::vfio::$test_filter" +RES=$? + +exit $RES diff --git a/tests/integration.rs b/tests/integration.rs index b8764e6ce..a39c98b3c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -88,6 +88,8 @@ mod tests { const FOCAL_IMAGE_NAME: &str = "focal-server-cloudimg-amd64-custom-20210106-1.raw"; #[cfg(target_arch = "x86_64")] const FOCAL_SGX_IMAGE_NAME: &str = "focal-server-cloudimg-amd64-sgx.raw"; + #[cfg(target_arch = "x86_64")] + const FOCAL_NVIDIA_IMAGE_NAME: &str = "focal-server-cloudimg-amd64-nvidia.raw"; #[cfg(target_arch = "aarch64")] const BIONIC_IMAGE_NAME: &str = "bionic-server-cloudimg-arm64.raw"; #[cfg(target_arch = "aarch64")] @@ -6323,4 +6325,65 @@ mod tests { handle_child_output(r, &output); } } + + #[cfg(target_arch = "x86_64")] + mod vfio { + use crate::tests::*; + + #[test] + fn test_nvidia_card() { + let mut focal = UbuntuDiskConfig::new(FOCAL_NVIDIA_IMAGE_NAME.to_string()); + let guest = Guest::new(&mut focal); + + let mut child = GuestCommand::new(&guest) + .args(&["--cpus", "boot=4"]) + .args(&["--memory", "size=4G"]) + .args(&["--kernel", guest.fw_path.as_str()]) + .args(&["--device", "path=/sys/bus/pci/devices/0000:31:00.0/"]) + .default_disks() + .default_net() + .capture_output() + .spawn() + .unwrap(); + + let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot(None).unwrap(); + + // Run CUDA sample to validate it can find the device + let device_query_result = guest + .ssh_command( + "sudo /root/NVIDIA_CUDA-11.2_Samples/bin/x86_64/linux/release/deviceQuery", + ) + .unwrap(); + assert!(device_query_result.contains("Detected 1 CUDA Capable device")); + assert!(device_query_result.contains("Device 0: \"Tesla T4\"")); + assert!(device_query_result.contains("Result = PASS")); + + // Run NVIDIA DCGM Diagnostics to validate the device is functional + assert_eq!( + guest + .ssh_command("sudo nv-hostengine && echo ok") + .unwrap() + .trim(), + "ok" + ); + assert!(guest + .ssh_command("sudo dcgmi discovery -l") + .unwrap() + .contains("Name: Tesla T4")); + assert_eq!( + guest + .ssh_command("sudo dcgmi diag -r 'diagnostic' | grep Pass | wc -l") + .unwrap() + .trim(), + "10" + ); + }); + + let _ = child.kill(); + let output = child.wait_with_output().unwrap(); + + handle_child_output(r, &output); + } + } }