qemu: allocate network connections sooner during domain startup

VFIO device assignment requires a cgroup ACL to be setup for access to
the /dev/vfio/nn "group" device for any devices that will be assigned
to a guest. In the case of a host device that is allocated from a
pool, it was being allocated during qemuBuildCommandLine(), which is
called by qemuProcessStart() *after* the all-encompassing
qemuSetupCgroup() was called, meaning that the standard Cgroup ACL
setup wasn't creating ACLs for these devices allocated from pools.

One possible solution was to manually add a single ACL down inside
qemuBuildCommandLine() when networkAllocateActualDevice() is called,
but that has two problems: 1) the function that adds the cgroup ACL
requires a virDomainObjPtr, which isn't available in
qemuBuildCommandLine(), and 2) we really shouldn't be doing network
device setup inside qemuBuildCommandLine() anyway.

Instead, I've created a new function called
qemuNetworkPrepareDevices() which is called just before
qemuPrepareHostDevices() during qemuProcessStart() (explanation of
ordering in the comments), i.e. well before the call to
qemuSetupCgroup(). To minimize code churn in a patch that will be
backported to 1.0.5-maint, qemuNetworkPrepareDevices only does
networkAllocateActualDevice() and the bare amount of setup required
for type='hostdev network devices, but it eventually should do *all*
device setup for guest network devices.

Note that some of the code that was previously needed in
qemuBuildCommandLine() is no longer required when
networkAllocateActualDevice() is called earlier:

 * qemuAssignDeviceHostdevAlias() is already done further down in
   qemuProcessStart().

 * qemuPrepareHostdevPCIDevices() is called by
   qemuPrepareHostDevices() which is called after
   qemuNetworkPrepareDevices() in qemuProcessStart().

As hinted above, this new function should be moved into a separate
qemu_network.c (or similarly named) file along with
qemuPhysIfaceConnect(), qemuNetworkIfaceConnect(), and
qemuOpenVhostNet(), and expanded to call those functions as well, then
the nnets loop in qemuBuildCommandLine() should be reduced to only
build the commandline string (which itself can be in a separate
qemuInterfaceBuilldCommandLine() function as suggested by
Michal). However, this will require storing away an array of tapfd and
vhostfd that are needed for the commandline, so I would rather do that
in a separate patch and leave this patch at the minimum to fix the
bug.
This commit is contained in:
Laine Stump 2013-05-06 15:43:56 -04:00
parent 039e30805c
commit 8cd40e7e0d
3 changed files with 71 additions and 49 deletions

View File

@ -457,6 +457,58 @@ qemuOpenVhostNet(virDomainDefPtr def,
return 0;
}
int
qemuNetworkPrepareDevices(virDomainDefPtr def)
{
int ret = -1;
int ii;
for (ii = 0; ii < def->nnets; ii++) {
virDomainNetDefPtr net = def->nets[ii];
int actualType;
/* If appropriate, grab a physical device from the configured
* network's pool of devices, or resolve bridge device name
* to the one defined in the network definition.
*/
if (networkAllocateActualDevice(net) < 0)
goto cleanup;
actualType = virDomainNetGetActualType(net);
if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
/* Each type='hostdev' network device must also have a
* corresponding entry in the hostdevs array. For netdevs
* that are hardcoded as type='hostdev', this is already
* done by the parser, but for those allocated from a
* network / determined at runtime, we need to do it
* separately.
*/
virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("PCI device %04x:%02x:%02x.%x "
"allocated from network %s is already "
"in use by domain %s"),
hostdev->source.subsys.u.pci.addr.domain,
hostdev->source.subsys.u.pci.addr.bus,
hostdev->source.subsys.u.pci.addr.slot,
hostdev->source.subsys.u.pci.addr.function,
net->data.network.name,
def->name);
goto cleanup;
}
if (virDomainHostdevInsert(def, hostdev) < 0) {
virReportOOMError();
goto cleanup;
}
}
}
ret = 0;
cleanup:
return ret;
}
static int qemuDomainDeviceAliasIndex(virDomainDeviceInfoPtr info,
const char *prefix)
@ -7106,7 +7158,14 @@ qemuBuildCommandLine(virConnectPtr conn,
char vhostfd_name[50] = "";
int vlan;
int bootindex = bootNet;
int actualType;
int actualType = virDomainNetGetActualType(net);
if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
/* NET_TYPE_HOSTDEV devices are really hostdev devices, so
* their commandlines are constructed with other hostdevs.
*/
continue;
}
bootNet = 0;
if (!bootindex)
@ -7119,53 +7178,6 @@ qemuBuildCommandLine(virConnectPtr conn,
else
vlan = i;
/* If appropriate, grab a physical device from the configured
* network's pool of devices, or resolve bridge device name
* to the one defined in the network definition.
*/
if (networkAllocateActualDevice(net) < 0)
goto error;
actualType = virDomainNetGetActualType(net);
if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
virDomainHostdevDefPtr found;
/* For a network with <forward mode='hostdev'>, there is a need to
* add the newly minted hostdev to the hostdevs array.
*/
if (qemuAssignDeviceHostdevAlias(def, hostdev,
(def->nhostdevs-1)) < 0) {
goto error;
}
if (virDomainHostdevFind(def, hostdev, &found) < 0) {
if (virDomainHostdevInsert(def, hostdev) < 0) {
virReportOOMError();
goto error;
}
if (qemuPrepareHostdevPCIDevices(driver, def->name, def->uuid,
&hostdev, 1) < 0) {
goto error;
}
}
else {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("PCI device %04x:%02x:%02x.%x "
"allocated from network %s is already "
"in use by domain %s"),
hostdev->source.subsys.u.pci.addr.domain,
hostdev->source.subsys.u.pci.addr.bus,
hostdev->source.subsys.u.pci.addr.slot,
hostdev->source.subsys.u.pci.addr.function,
net->data.network.name,
def->name);
goto error;
}
}
continue;
}
if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK ||
actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
int tapfd = qemuNetworkIfaceConnect(def, conn, driver, net,

View File

@ -1,7 +1,7 @@
/*
* qemu_command.h: QEMU command generation
*
* Copyright (C) 2006-2012 Red Hat, Inc.
* Copyright (C) 2006-2013 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
@ -161,6 +161,8 @@ int qemuOpenVhostNet(virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
int *vhostfd);
int qemuNetworkPrepareDevices(virDomainDefPtr def);
/*
* NB: def->name can be NULL upon return and the caller
* *must* decide how to fill in a name in this case

View File

@ -3422,6 +3422,14 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
}
/* network devices must be "prepared" before hostdevs, because
* setting up a network device might create a new hostdev that
* will need to be setup.
*/
VIR_DEBUG("Preparing network devices");
if (qemuNetworkPrepareDevices(vm->def) < 0)
goto cleanup;
/* Must be run before security labelling */
VIR_DEBUG("Preparing host devices");
if (qemuPrepareHostDevices(driver, vm->def, !migrateFrom) < 0)