From 6597cc25a1f00a466293074c95c65c44817bc9e6 Mon Sep 17 00:00:00 2001 From: Laine Stump Date: Mon, 6 May 2013 15:43:56 -0400 Subject: [PATCH] qemu: allocate network connections sooner during domain startup VFIO device assignment requires a cgroup ACL to be setup for access to the /dev/vfio/nn "group" device for any devices that will be assigned to a guest. In the case of a host device that is allocated from a pool, it was being allocated during qemuBuildCommandLine(), which is called by qemuProcessStart() *after* the all-encompassing qemuSetupCgroup() was called, meaning that the standard Cgroup ACL setup wasn't creating ACLs for these devices allocated from pools. One possible solution was to manually add a single ACL down inside qemuBuildCommandLine() when networkAllocateActualDevice() is called, but that has two problems: 1) the function that adds the cgroup ACL requires a virDomainObjPtr, which isn't available in qemuBuildCommandLine(), and 2) we really shouldn't be doing network device setup inside qemuBuildCommandLine() anyway. Instead, I've created a new function called qemuNetworkPrepareDevices() which is called just before qemuPrepareHostDevices() during qemuProcessStart() (explanation of ordering in the comments), i.e. well before the call to qemuSetupCgroup(). To minimize code churn in a patch that will be backported to 1.0.5-maint, qemuNetworkPrepareDevices only does networkAllocateActualDevice() and the bare amount of setup required for type='hostdev network devices, but it eventually should do *all* device setup for guest network devices. Note that some of the code that was previously needed in qemuBuildCommandLine() is no longer required when networkAllocateActualDevice() is called earlier: * qemuAssignDeviceHostdevAlias() is already done further down in qemuProcessStart(). * qemuPrepareHostdevPCIDevices() is called by qemuPrepareHostDevices() which is called after qemuNetworkPrepareDevices() in qemuProcessStart(). As hinted above, this new function should be moved into a separate qemu_network.c (or similarly named) file along with qemuPhysIfaceConnect(), qemuNetworkIfaceConnect(), and qemuOpenVhostNet(), and expanded to call those functions as well, then the nnets loop in qemuBuildCommandLine() should be reduced to only build the commandline string (which itself can be in a separate qemuInterfaceBuilldCommandLine() function as suggested by Michal). However, this will require storing away an array of tapfd and vhostfd that are needed for the commandline, so I would rather do that in a separate patch and leave this patch at the minimum to fix the bug. (cherry picked from commit 8cd40e7e0d92a0edbe08941fdf728a81c2e6cf15) --- src/qemu/qemu_command.c | 108 ++++++++++++++++++++++------------------ src/qemu/qemu_command.h | 4 +- src/qemu/qemu_process.c | 8 +++ 3 files changed, 71 insertions(+), 49 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7d851e3786..c81579998a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -458,6 +458,58 @@ qemuOpenVhostNet(virDomainDefPtr def, return 0; } +int +qemuNetworkPrepareDevices(virDomainDefPtr def) +{ + int ret = -1; + int ii; + + for (ii = 0; ii < def->nnets; ii++) { + virDomainNetDefPtr net = def->nets[ii]; + int actualType; + + /* If appropriate, grab a physical device from the configured + * network's pool of devices, or resolve bridge device name + * to the one defined in the network definition. + */ + if (networkAllocateActualDevice(net) < 0) + goto cleanup; + + actualType = virDomainNetGetActualType(net); + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV && + net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { + /* Each type='hostdev' network device must also have a + * corresponding entry in the hostdevs array. For netdevs + * that are hardcoded as type='hostdev', this is already + * done by the parser, but for those allocated from a + * network / determined at runtime, we need to do it + * separately. + */ + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); + + if (virDomainHostdevFind(def, hostdev, NULL) >= 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("PCI device %04x:%02x:%02x.%x " + "allocated from network %s is already " + "in use by domain %s"), + hostdev->source.subsys.u.pci.addr.domain, + hostdev->source.subsys.u.pci.addr.bus, + hostdev->source.subsys.u.pci.addr.slot, + hostdev->source.subsys.u.pci.addr.function, + net->data.network.name, + def->name); + goto cleanup; + } + if (virDomainHostdevInsert(def, hostdev) < 0) { + virReportOOMError(); + goto cleanup; + } + } + } + ret = 0; +cleanup: + return ret; +} static int qemuDomainDeviceAliasIndex(virDomainDeviceInfoPtr info, const char *prefix) @@ -7107,7 +7159,14 @@ qemuBuildCommandLine(virConnectPtr conn, char vhostfd_name[50] = ""; int vlan; int bootindex = bootNet; - int actualType; + int actualType = virDomainNetGetActualType(net); + + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { + /* NET_TYPE_HOSTDEV devices are really hostdev devices, so + * their commandlines are constructed with other hostdevs. + */ + continue; + } bootNet = 0; if (!bootindex) @@ -7120,53 +7179,6 @@ qemuBuildCommandLine(virConnectPtr conn, else vlan = i; - /* If appropriate, grab a physical device from the configured - * network's pool of devices, or resolve bridge device name - * to the one defined in the network definition. - */ - if (networkAllocateActualDevice(net) < 0) - goto error; - - actualType = virDomainNetGetActualType(net); - if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { - if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { - virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); - virDomainHostdevDefPtr found; - /* For a network with , there is a need to - * add the newly minted hostdev to the hostdevs array. - */ - if (qemuAssignDeviceHostdevAlias(def, hostdev, - (def->nhostdevs-1)) < 0) { - goto error; - } - - if (virDomainHostdevFind(def, hostdev, &found) < 0) { - if (virDomainHostdevInsert(def, hostdev) < 0) { - virReportOOMError(); - goto error; - } - if (qemuPrepareHostdevPCIDevices(driver, def->name, def->uuid, - &hostdev, 1) < 0) { - goto error; - } - } - else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("PCI device %04x:%02x:%02x.%x " - "allocated from network %s is already " - "in use by domain %s"), - hostdev->source.subsys.u.pci.addr.domain, - hostdev->source.subsys.u.pci.addr.bus, - hostdev->source.subsys.u.pci.addr.slot, - hostdev->source.subsys.u.pci.addr.function, - net->data.network.name, - def->name); - goto error; - } - } - continue; - } - if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK || actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) { int tapfd = qemuNetworkIfaceConnect(def, conn, driver, net, diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index a70694288d..e690dee051 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -1,7 +1,7 @@ /* * qemu_command.h: QEMU command generation * - * Copyright (C) 2006-2012 Red Hat, Inc. + * Copyright (C) 2006-2013 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -161,6 +161,8 @@ int qemuOpenVhostNet(virDomainDefPtr def, virQEMUCapsPtr qemuCaps, int *vhostfd); +int qemuNetworkPrepareDevices(virDomainDefPtr def); + /* * NB: def->name can be NULL upon return and the caller * *must* decide how to fill in a name in this case diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 58f64b7310..0be43b46ef 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3422,6 +3422,14 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } + /* network devices must be "prepared" before hostdevs, because + * setting up a network device might create a new hostdev that + * will need to be setup. + */ + VIR_DEBUG("Preparing network devices"); + if (qemuNetworkPrepareDevices(vm->def) < 0) + goto cleanup; + /* Must be run before security labelling */ VIR_DEBUG("Preparing host devices"); if (qemuPrepareHostDevices(driver, vm->def, !migrateFrom) < 0)