diff --git a/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch b/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch new file mode 100644 index 0000000..e0dffa5 --- /dev/null +++ b/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch @@ -0,0 +1,462 @@ +From e1b250fc0b5e377285db5d90476fdd2d63501191 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Fri, 28 Jan 2022 01:09:23 +0100 +Subject: [PATCH] virtcontainers, agent: Add passt networking model and + endpoint + +This implements a draft support for user-mode networking using +passt (https://passt.top), the corresponding networking model +can be enabled via: + + internetworking_model=passt + +in the [runtime] section of the TOML configuration file. + +The networking endpoint does essentially nothing, other than +starting and stopping passt as needed: no interfaces are configured, +qemu connects to passt via UNIX domain socket, the corresponding +command line option is appended if this networking model is +selected. + +The passt instance started by the endpoint take cares of forwarding +traffic back and forth, translating between the L2 frames qemu-side +and native L4 sockets on the host. + +This network setup doesn't need elevated privileges or any kind of +capability. However, this patch doesn't implement privileges drop +as the containerd interface allows only runtimes running as the +same user to connect to its own UNIX domain socket interface, +typically root (at least in the case of CRI-O), and root privileges +might anyway be needed for other purposes (block devices, etc.) + +Signed-off-by: Stefano Brivio +--- +SPDX-FileCopyrightText: 2021-2022 Red Hat GmbH +SPDX-License-Identifier: Apache-2.0 + + src/agent/src/netlink.rs | 3 +- + .../kata-containers/govmm/qemu/qemu.go | 23 ++- + src/runtime/virtcontainers/endpoint.go | 7 + + src/runtime/virtcontainers/network.go | 24 +++ + src/runtime/virtcontainers/passt_endpoint.go | 156 ++++++++++++++++++ + .../virtcontainers/persist/api/network.go | 5 + + src/runtime/virtcontainers/qemu_arch_base.go | 11 ++ + 7 files changed, 226 insertions(+), 3 deletions(-) + create mode 100644 src/runtime/virtcontainers/passt_endpoint.go + +diff --git a/src/agent/src/netlink.rs b/src/agent/src/netlink.rs +index ed071b60..34c6df96 100644 +--- a/src/agent/src/netlink.rs ++++ b/src/agent/src/netlink.rs +@@ -312,7 +312,8 @@ impl Handle { + let list = a.iter().chain(&b); + + for route in list { +- let link = self.find_link(LinkFilter::Name(&route.device)).await?; ++ // TODO: "eth0" hardcoded for passt networking model ++ let link = self.find_link(LinkFilter::Name("eth0")).await?; + + const MAIN_TABLE: u8 = packet::constants::RT_TABLE_MAIN; + const UNICAST: u8 = packet::constants::RTN_UNICAST; +diff --git a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go +index e57a4b26..1756bdfd 100644 +--- a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go ++++ b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go +@@ -682,6 +682,8 @@ const ( + + // VHOSTUSER is a vhost-user port (socket) + VHOSTUSER NetDeviceType = "vhostuser" ++ ++ PASST NetDeviceType = "passt" + ) + + // QemuNetdevParam converts to the QEMU -netdev parameter notation +@@ -709,6 +711,8 @@ func (n NetDeviceType) QemuNetdevParam(netdev *NetDevice, config *Config) string + log.Fatal("vhost-user devices are not supported on IBM Z") + } + return "vhost-user" // -netdev type=vhost-user (no device) ++ case PASST: ++ return "socket" // -netdev type=socket,connect=... + default: + return "" + +@@ -742,6 +746,8 @@ func (n NetDeviceType) QemuDeviceParam(netdev *NetDevice, config *Config) Device + log.Fatal("vhost-user devices are not supported on IBM Z") + } + return "" // -netdev type=vhost-user (no device) ++ case PASST: ++ device = "virtio-net" + default: + return "" + } +@@ -806,6 +812,8 @@ type NetDevice struct { + + // Transport is the virtio transport for this device. + Transport VirtioTransport ++ ++ SocketPath string + } + + // VirtioNetTransport is a map of the virtio-net device name that corresponds +@@ -818,6 +826,10 @@ var VirtioNetTransport = map[VirtioTransport]string{ + + // Valid returns true if the NetDevice structure is valid and complete. + func (netdev NetDevice) Valid() bool { ++ if netdev.Type == PASST { ++ return true ++ } ++ + if netdev.ID == "" || netdev.IFName == "" { + return false + } +@@ -867,7 +879,9 @@ func (netdev NetDevice) QemuDeviceParams(config *Config) []string { + + deviceParams = append(deviceParams, fmt.Sprintf("driver=%s", driver)) + deviceParams = append(deviceParams, fmt.Sprintf("netdev=%s", netdev.ID)) +- deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", netdev.MACAddress)) ++ if netdev.MACAddress != "" { ++ deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", netdev.MACAddress)) ++ } + + if netdev.Bus != "" { + deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", netdev.Bus)) +@@ -937,7 +951,12 @@ func (netdev NetDevice) QemuNetdevParams(config *Config) []string { + netdevParams = append(netdevParams, fmt.Sprintf("fds=%s", strings.Join(fdParams, ":"))) + + } else { +- netdevParams = append(netdevParams, fmt.Sprintf("ifname=%s", netdev.IFName)) ++ if netdev.IFName != "" { ++ netdevParams = append(netdevParams, fmt.Sprintf("ifname=%s", netdev.IFName)) ++ } ++ if netdev.SocketPath != "" { ++ netdevParams = append(netdevParams, fmt.Sprintf("connect=%s", netdev.SocketPath)) ++ } + if netdev.DownScript != "" { + netdevParams = append(netdevParams, fmt.Sprintf("downscript=%s", netdev.DownScript)) + } +diff --git a/src/runtime/virtcontainers/endpoint.go b/src/runtime/virtcontainers/endpoint.go +index 7786bb3e..e167304a 100644 +--- a/src/runtime/virtcontainers/endpoint.go ++++ b/src/runtime/virtcontainers/endpoint.go +@@ -65,6 +65,8 @@ const ( + + // IPVlanEndpointType is ipvlan network interface. + IPVlanEndpointType EndpointType = "ipvlan" ++ ++ PasstEndpointType EndpointType = "passt" + ) + + // Set sets an endpoint type based on the input string. +@@ -94,6 +96,9 @@ func (endpointType *EndpointType) Set(value string) error { + case "ipvlan": + *endpointType = IPVlanEndpointType + return nil ++ case "passt": ++ *endpointType = PasstEndpointType ++ return nil + default: + return fmt.Errorf("Unknown endpoint type %s", value) + } +@@ -118,6 +123,8 @@ func (endpointType *EndpointType) String() string { + return string(TuntapEndpointType) + case IPVlanEndpointType: + return string(IPVlanEndpointType) ++ case PasstEndpointType: ++ return string(PasstEndpointType) + default: + return "" + } +diff --git a/src/runtime/virtcontainers/network.go b/src/runtime/virtcontainers/network.go +index e6c681da..2de692fe 100644 +--- a/src/runtime/virtcontainers/network.go ++++ b/src/runtime/virtcontainers/network.go +@@ -57,6 +57,9 @@ const ( + // NetXConnectNoneModel can be used when the VM is in the host network namespace + NetXConnectNoneModel + ++ // passt in namespace connecting hypervisor via host sockets ++ NetXConnectPasstModel ++ + // NetXConnectInvalidModel is the last item to Check valid values by IsValid() + NetXConnectInvalidModel + ) +@@ -73,6 +76,8 @@ const ( + + tcFilterNetModelStr = "tcfilter" + ++ passtNetModelStr = "passt" ++ + noneNetModelStr = "none" + ) + +@@ -85,6 +90,8 @@ func (n *NetInterworkingModel) GetModel() string { + return macvtapNetModelStr + case NetXConnectTCFilterModel: + return tcFilterNetModelStr ++ case NetXConnectPasstModel: ++ return passtNetModelStr + case NetXConnectNoneModel: + return noneNetModelStr + } +@@ -103,6 +110,9 @@ func (n *NetInterworkingModel) SetModel(modelName string) error { + case tcFilterNetModelStr: + *n = NetXConnectTCFilterModel + return nil ++ case passtNetModelStr: ++ *n = NetXConnectPasstModel ++ return nil + case noneNetModelStr: + *n = NetXConnectNoneModel + return nil +@@ -254,6 +264,8 @@ func getLinkForEndpoint(endpoint Endpoint, netHandle *netlink.Handle) (netlink.L + link = &netlink.IPVlan{} + case *TuntapEndpoint: + link = &netlink.Tuntap{} ++ case *PasstEndpoint: ++ return nil, nil + default: + return nil, fmt.Errorf("Unexpected endpointType %s", ep.Type()) + } +@@ -302,6 +314,11 @@ func xConnectVMNetwork(ctx context.Context, endpoint Endpoint, h Hypervisor) err + span, ctx := networkTrace(ctx, "xConnectVMNetwork", endpoint) + defer closeSpan(span, err) + ++ if endpoint.Type() == PasstEndpointType { ++ networkLogger().Info("VM network via passt user-mode networking") ++ return nil ++ } ++ + netPair := endpoint.NetworkPair() + + queues := 0 +@@ -347,6 +364,7 @@ func xDisconnectVMNetwork(ctx context.Context, endpoint Endpoint) error { + err = untapNetworkPair(ctx, endpoint) + case NetXConnectTCFilterModel: + err = removeTCFiltering(ctx, endpoint) ++ case NetXConnectPasstModel: + default: + err = fmt.Errorf("Invalid internetworking model") + } +@@ -1095,6 +1113,12 @@ func createEndpoint(netInfo NetworkInfo, idx int, model NetInterworkingModel, li + // an appropriate EndPoint based on interface type + // This should be a switch + ++ if model == NetXConnectPasstModel { ++ networkLogger().Info("creating passt endpoint") ++ endpoint, err := createPasstNetworkEndpoint(idx) ++ return endpoint, err ++ } ++ + // Check if interface is a physical interface. Do not create + // tap interface/bridge if it is. + isPhysical, err := isPhysicalIface(netInfo.Iface.Name) +diff --git a/src/runtime/virtcontainers/passt_endpoint.go b/src/runtime/virtcontainers/passt_endpoint.go +new file mode 100644 +index 00000000..7f40135a +--- /dev/null ++++ b/src/runtime/virtcontainers/passt_endpoint.go +@@ -0,0 +1,156 @@ ++// SPDX-License-Identifier: Apache-2.0 ++// ++// passt_endpoint.go - passt endpoint for Kata Containers: start and stop passt ++// ++// Copyright (c) 2021-2022 Red Hat GmbH ++// Author: Stefano Brivio ++ ++package virtcontainers ++ ++import ( ++ "context" ++ "fmt" ++ "os" ++ "os/exec" ++ "syscall" ++ ++ persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api" ++ vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ++) ++ ++type PasstEndpoint struct { ++ EndpointType EndpointType ++ EndpointProperties NetworkInfo ++ PCIPath vcTypes.PciPath ++ PasstPID int ++} ++ ++func createPasstNetworkEndpoint(idx int) (*PasstEndpoint, error) { ++ if idx < 0 { ++ return &PasstEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx) ++ } ++ ++ cmd := exec.Command("passt", ++ "-P", fmt.Sprintf("/tmp/kata-passt-%d.pid", idx), ++ "-s", fmt.Sprintf("/tmp/kata-passt-%d.socket", idx)) ++ err := cmd.Run() ++ if err != nil { ++ return &PasstEndpoint{}, fmt.Errorf("passt failed to start: %v", err) ++ } ++ ++ in, err := os.Open(fmt.Sprintf("/tmp/kata-passt-%d.pid", idx)) ++ if err != nil { ++ return &PasstEndpoint{}, fmt.Errorf("Failed to read passt PID: %v", err) ++ } ++ defer in.Close() ++ ++ var pid int ++ _, err = fmt.Fscanf(in, "%d", &pid) ++ if err != nil { ++ return &PasstEndpoint{}, fmt.Errorf("Failed to read passt pid: %v", err) ++ } ++ ++ endpoint := &PasstEndpoint{ ++ EndpointType: PasstEndpointType, ++ PasstPID: pid, ++ } ++ ++ return endpoint, nil ++} ++ ++func (endpoint *PasstEndpoint) Properties() NetworkInfo { ++ return endpoint.EndpointProperties ++} ++ ++func (endpoint *PasstEndpoint) Type() EndpointType { ++ return endpoint.EndpointType ++} ++ ++// unsupported ++func (endpoint *PasstEndpoint) HardwareAddr() string { ++ return "00:11:22:33:44:55" ++} ++ ++// unsupported ++func (endpoint *PasstEndpoint) Name() string { ++ return "" ++} ++ ++// unsupported ++func (endpoint *PasstEndpoint) NetworkPair() *NetworkInterfacePair { ++ return nil ++} ++ ++// PciPath returns the PCI path of the endpoint. ++func (endpoint *PasstEndpoint) PciPath() vcTypes.PciPath { ++ return endpoint.PCIPath ++} ++ ++// useless ++func (endpoint *PasstEndpoint) SetPciPath(pciPath vcTypes.PciPath) { ++ endpoint.PCIPath = pciPath ++} ++ ++func (endpoint *PasstEndpoint) SetProperties(properties NetworkInfo) { ++ endpoint.EndpointProperties = properties ++} ++ ++func (endpoint *PasstEndpoint) Attach(ctx context.Context, s *Sandbox) error { ++ h := s.hypervisor ++ if err := xConnectVMNetwork(ctx, endpoint, h); err != nil { ++ networkLogger().WithError(err).Error("Error attaching passt endpoint") ++ return err ++ } ++ ++ return h.AddDevice(ctx, endpoint, NetDev) ++} ++ ++func (endpoint *PasstEndpoint) Detach(ctx context.Context, netNsCreated bool, netNsPath string) error { ++ syscall.Kill(endpoint.PasstPID, syscall.SIGQUIT) ++ ++ return nil ++} ++ ++func (endpoint *PasstEndpoint) HotAttach(ctx context.Context, h Hypervisor) error { ++ return fmt.Errorf("HotAttach not supported by PasstEndpoint") ++} ++ ++func (endpoint *PasstEndpoint) HotDetach(ctx context.Context, h Hypervisor, netNsCreated bool, netNsPath string) error { ++ return fmt.Errorf("HotDetatch not supported by PasstEndpoint") ++} ++ ++func (endpoint *PasstEndpoint) save() persistapi.NetworkEndpoint { ++ return persistapi.NetworkEndpoint{ ++ Type: string(endpoint.Type()), ++ ++ Passt: &persistapi.PasstEndpoint{ ++ PasstPID: endpoint.PasstPID, ++ }, ++ } ++} ++ ++func (endpoint *PasstEndpoint) load(s persistapi.NetworkEndpoint) { ++ endpoint.EndpointType = PasstEndpointType ++ ++ if s.Passt != nil { ++ endpoint.PasstPID = s.Passt.PasstPID ++ } ++} ++ ++// unsupported ++func (endpoint *PasstEndpoint) GetRxRateLimiter() bool { ++ return false ++} ++ ++func (endpoint *PasstEndpoint) SetRxRateLimiter() error { ++ return fmt.Errorf("rx rate limiter is unsupported for physical endpoint") ++} ++ ++// unsupported ++func (endpoint *PasstEndpoint) GetTxRateLimiter() bool { ++ return false ++} ++ ++func (endpoint *PasstEndpoint) SetTxRateLimiter() error { ++ return fmt.Errorf("tx rate limiter is unsupported for physical endpoint") ++} +diff --git a/src/runtime/virtcontainers/persist/api/network.go b/src/runtime/virtcontainers/persist/api/network.go +index 51c3aac6..79d77cd9 100644 +--- a/src/runtime/virtcontainers/persist/api/network.go ++++ b/src/runtime/virtcontainers/persist/api/network.go +@@ -79,6 +79,10 @@ type VhostUserEndpoint struct { + PCIPath vcTypes.PciPath + } + ++type PasstEndpoint struct { ++ PasstPID int ++} ++ + // NetworkEndpoint contains network interface information + type NetworkEndpoint struct { + // One and only one of these below are not nil according to Type. +@@ -90,6 +94,7 @@ type NetworkEndpoint struct { + Tap *TapEndpoint `json:",omitempty"` + IPVlan *IPVlanEndpoint `json:",omitempty"` + Tuntap *TuntapEndpoint `json:",omitempty"` ++ Passt *PasstEndpoint `json:",omitempty"` + + Type string + } +diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go +index 97cd6eb8..9ace0ace 100644 +--- a/src/runtime/virtcontainers/qemu_arch_base.go ++++ b/src/runtime/virtcontainers/qemu_arch_base.go +@@ -615,6 +615,17 @@ func genericNetwork(endpoint Endpoint, vhost, nestedRun bool, index int) (govmmQ + FDs: netPair.VMFds, + VhostFDs: netPair.VhostFds, + } ++ case *PasstEndpoint: ++ d = govmmQemu.NetDevice{ ++ Type: govmmQemu.PASST, ++ Driver: govmmQemu.VirtioNet, ++ ID: fmt.Sprintf("network-%d", index), ++ // TODO: Drop hardcoded MAC address, passt endpoint ++ // doesn't need to know it ++ MACAddress: "00:11:22:33:44:55", ++ DisableModern: nestedRun, ++ SocketPath: fmt.Sprintf("/tmp/kata-passt-%d.socket", index), ++ } + default: + return govmmQemu.NetDevice{}, fmt.Errorf("Unknown type for endpoint") + } +-- +2.28.0 + diff --git a/contrib/kata-containers/README.md b/contrib/kata-containers/README.md new file mode 100644 index 0000000..96acd5f --- /dev/null +++ b/contrib/kata-containers/README.md @@ -0,0 +1,302 @@ +This document shows how to set up a Kata Containers environment using passt to +implement user-mode networking: contrary to other networking models currently +implemented, this kind of setup requires no elevated privileges or capabilities +as far as networking is concerned. + +This proof-of-concept uses CRI-O as implementation container runtime, which is +controlled directly without resorting to a full Kubernetes environment. + +# Pre-requisites + +* Go and rust toolchains, typically provided by distribution packages +* the usual tools, such as git, make, etc. +* a 4.x qemu version, or more recent, with a working virtiofsd executable + (provided at least by Debian, Ubuntu, Fedora packages) + +# Fetch and prepare components + +## CRI-O + +CRI-O is the container runtime. It implements the Kubernetes CRI (Container +Runtime Interface) on one side -- and we'll handle that part manually with +`crictl` here, and on the other side it supports OCI (Open Container Initiative) +runtimes -- Kata Containers is one of them. + +### Fetch + + git clone https://github.com/cri-o/cri-o.git + +### Build + + cd cri-o + make + +### Install + +As root: + + make install + +### Configure + +Configuration is now at `/etc/crio/crio.conf`. This would also be the case for +distribution packages. Some specific configuration items for Kata Containers +are: + + # Cgroup management implementation used for the runtime. + cgroup_manager = "cgroupfs" + + # manage_ns_lifecycle determines whether we pin and remove namespaces + # and manage their lifecycle + manage_ns_lifecycle = true + +and the following section, that can be added at the end, defines a special type +of runtime, the `vm` type. This is needed to run the Kata Containers runtime +instead of the default `crun` choice: + + [crio.runtime.runtimes.kata] + runtime_path = "/usr/local/bin/containerd-shim-kata-v2" + runtime_type = "vm" + runtime_root = "/run/vc" + +Note that we don't have a containerd-shim-kata-v2 binary yet, we'll deal with +that in the next steps. + +## CNI plugins + +CNI plugins are actually binaries, run by CRI-O, used to configure networking on +the host as well as on the pod side. A few network topologies are offered, with +very limited capabilities. + +### Fetch + + git clone https://github.com/containernetworking/plugins + +### Build + + cd plugins + ./build_linux.sh + +### Install + +As root: + + mkdir -p /opt/cni/bin + cp bin/* /opt/cni/bin/ + + +### Configure + +The path where CNI configurations are located is configurable in +`/etc/crio/crio.conf`, see the `network_dir` parameter there. Assuming the +default value, we need to provide at least one configuration under +`/etc/cni/net.d/`. For example: + + # cat /etc/cni/net.d/50-kata-sandbox.conf + { + "cniVersion": "0.3.0", + "name": "crio-bridge", + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "10.88.0.0/16", + "routes": [ + { "dst": "0.0.0.0/0" } + ] + } + } + +## crictl + +`crictl` is needed to control CRI-O in lieu of Kubernetes. + +### Fetch + + git clone https://github.com/kubernetes-sigs/cri-tools.git + +### Build + + cd cri-tools + make + +### Install + +As root: + + make install + +## mbuto + +We'll use `mbuto` to build a minimal virtual machine image for usage with the +Kata Containers runtime. + +### Fetch + + git clone https://mbuto.lameexcu.se/mbuto + +## Kata Containers + +### Fetch + + git clone https://github.com/kata-containers/kata-containers + +### Patch + +The current upstream version doesn't support the _passt_ networking model yet, +use the patch from this directory to add it: + + patch -p1 < 0001-virtcontainers-agent-Add-passt-networking-model-and-.patch + +### Build + + make -C src/runtime + make -C src/agent LIBC=gnu + +### Install + +As root: + + make -C src/runtime install + cp src/agent/target/x86_64-unknown-linux-gnu/release/kata-agent /usr/libexec/ + chmod 755 /usr/libexec/kata-agent + +### Build the Virtual Machine image + + cd mbuto + ./mbuto -f /tmp/kata.img + +See `mbuto -h` for additional parameters, such as choice of kernel version, +kernel modules, program add-ons, etc. `mbuto` will print some configuration +parameters to be used in the configuration of the Kata Containers runtime below. +For example: + + $ ./mbuto -c lz4 -f /tmp/kata.img + Not running as root, won't keep cpio mounted + Size: bin 12M lib 59M kmod 1.4M total 70M compressed 33M + Kata Containers [hypervisor.qemu] configuration: + + kernel = "/boot/vmlinuz-5.10.0-6-amd64" + initrd = "/tmp/kata.img" + +### Configure + +The configuration file at this point is located at +`/usr/share/defaults/kata-containers/configuration-qemu.toml`. Some parameters of general interest are: + + [hypervisor.qemu] + kernel = "/boot/vmlinuz-5.10.0-6-amd64" + initrd = "/tmp/kata.img" + +where we can use the values indicated earlier by `mbuto`. Currently, the default +path for the `virtiofsd` daemon doesn't work for all distributions, ensure that +it matches. For example, on Debian: + + virtio_fs_daemon = "/usr/lib/qemu/virtiofsd" + +we'll then need to enable the `passt` networking model for the runtime. In the +`[runtime]` section: + + internetworking_model=passt + +# Run an example container + +## Fetch + +We'll now need an image of a container to run as example. With `podman` +installed via distribution package, we can import one: + + podman pull docker.io/i386/busybox + +## Configure + +Now we can define configuration files for pod and container we want to create +and start: + + $ cat pod-config.json + { + "metadata": { + "name": "kata-sandbox", + "namespace": "default", + "attempt": 1, + "uid": "hdishd83djaidwnduwk28bcsb" + }, + "logDirectory": "/tmp", + "linux": { + } + } + + $ cat container-busybox.json + { + "metadata": { + "name": "kata-busybox" + }, + "image": { + "image": "docker.io/i386/busybox" + }, + "command": [ + "sleep", "6000" + ], + "log_path":"kata-busybox.log", + "linux": { + } + } + +## Run the container workload + +Assuming we have `pod-config.json` and `container-busybox.json` defined above, +we can now: + +### start CRI-O + + crio -l debug + +### create the pod and run a container inside it + + c=$(crictl start $(crictl create $(crictl runp --runtime=kata pod-config.json) container-dpdk.json pod-config.json)) + +### verify that addresses are properly configured + + crictl exec $c ip ad sh + +## Enable support for ICMP/ICMPv6 Echo Request + +_passt_ can replicate ICMP Echo Requests sent by the workload, and propagate the +replies back. However, as it's not running as root, we need to enable so-called +_ping_ sockets for unprivileged users. From the namespace created by CRI-O for +this container: + + sysctl -w net.ipv4.ping_group_range=net.ipv4.ping_group_range = 0 2147483647 + +# Troubleshooting + +## Redirect qemu's console output to file + +Agent errors and kernel messages should be accessible via named UNIX domain +socket at `/run/vc/vm/*/console.sock`, provided `agent.debug_console` is enabled +in `kernel_params` of `configuration.toml` but this won't work if the agent +doesn't start. In order to get those, we can wrap `qemu` and get, additionally, +all the output piped to a file: + + $ cat /usr/local/bin/qemu.sh + #!/bin/sh + + /usr/bin/qemu-system-x86_64 "$@" -serial file:/tmp/qemu.log 2>/tmp/qemu_err.log + +now, use this as path for `qemu` in `configuration.toml`: + + [hypervisor.qemu] + path = "/usr/local/bin/qemu.sh" + +and don't forget to add `console=ttyS0` to the kernel parameters, so that kernel +messages will also be included: + + kernel_params = "... console=ttyS0" + +## Debug console + +See the `kata-console` script in the +[kata-vfio-tools repository](https://github.com/dgibson/kata-vfio-tools) for a +convenient helper to access the debug console provided by the agent.