libvirt/src/lxc/lxc_process.c

1743 lines
54 KiB
C
Raw Normal View History

/*
* Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright IBM Corp. 2008
*
* lxc_process.c: LXC process lifecycle management
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include "lxc_process.h"
#include "lxc_domain.h"
#include "lxc_container.h"
#include "lxc_cgroup.h"
#include "lxc_fuse.h"
#include "datatypes.h"
#include "virfile.h"
#include "virpidfile.h"
#include "virnetdev.h"
#include "virnetdevveth.h"
#include "virnetdevbridge.h"
#include "virnetdevopenvswitch.h"
#include "virtime.h"
#include "domain_nwfilter.h"
2012-12-12 18:06:53 +00:00
#include "viralloc.h"
#include "domain_audit.h"
#include "virerror.h"
2012-12-12 17:59:27 +00:00
#include "virlog.h"
#include "vircommand.h"
#include "lxc_hostdev.h"
2012-12-12 17:00:34 +00:00
#include "virhook.h"
#include "virstring.h"
#include "viratomic.h"
#include "virprocess.h"
#include "virsystemd.h"
#include "netdev_bandwidth_conf.h"
#define VIR_FROM_THIS VIR_FROM_LXC
VIR_LOG_INIT("lxc.lxc_process");
#define START_POSTFIX ": starting up\n"
static void
2013-07-15 19:08:11 +02:00
lxcProcessAutoDestroy(virDomainObjPtr dom,
virConnectPtr conn,
void *opaque)
{
2013-07-15 19:08:11 +02:00
virLXCDriverPtr driver = opaque;
virObjectEventPtr event = NULL;
virLXCDomainObjPrivatePtr priv;
2013-07-15 19:08:11 +02:00
VIR_DEBUG("driver=%p dom=%s conn=%p", driver, dom->def->name, conn);
priv = dom->privateData;
VIR_DEBUG("Killing domain");
2013-07-15 19:08:11 +02:00
virLXCProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED);
virDomainAuditStop(dom, "destroyed");
event = virDomainEventLifecycleNewFromObj(dom,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
priv->doneStopEvent = true;
if (!dom->persistent)
2013-07-15 19:08:11 +02:00
virDomainObjListRemove(driver->domains, dom);
virObjectEventStateQueue(driver->domainEventState, event);
}
/*
* Precondition: driver is locked
*/
static int
virLXCProcessReboot(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
2013-07-15 19:08:11 +02:00
virConnectPtr conn = virCloseCallbacksGetConn(driver->closeCallbacks, vm);
int reason = vm->state.reason;
bool autodestroy = false;
int ret = -1;
virDomainDefPtr savedDef;
Fix deadlock in handling EOF in LXC monitor Depending on the scenario in which LXC containers exit, it is possible for the EOF callback of the LXC monitor to deadlock the driver. #0 0x00000038a0a0de4d in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00000038a0a09ca6 in _L_lock_840 () from /lib64/libpthread.so.0 #2 0x00000038a0a09ba8 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007f4bd9579d55 in virMutexLock (m=<optimized out>) at util/threads-pthread.c:85 #4 0x00007f4bcacc7597 in lxcDriverLock (driver=0x7f4bc40c8290) at lxc/lxc_conf.h:81 #5 virLXCProcessMonitorEOFNotify (mon=<optimized out>, vm=0x7f4bb4000b00) at lxc/lxc_process.c:581 #6 0x00007f4bd9645c91 in virNetClientCloseLocked (client=client@entry=0x7f4bb4009e60) at rpc/virnetclient.c:554 #7 0x00007f4bd96460f8 in virNetClientIOEventLoopPassTheBuck (thiscall=0x0, client=0x7f4bb4009e60) at rpc/virnetclient.c:1306 #8 virNetClientIOEventLoopPassTheBuck (client=0x7f4bb4009e60, thiscall=0x0) at rpc/virnetclient.c:1287 #9 0x00007f4bd96467a2 in virNetClientCloseInternal (reason=3, client=0x7f4bb4009e60) at rpc/virnetclient.c:589 #10 virNetClientCloseInternal (client=0x7f4bb4009e60, reason=3) at rpc/virnetclient.c:561 #11 0x00007f4bcacc4a82 in virLXCMonitorClose (mon=0x7f4bb4000a00) at lxc/lxc_monitor.c:201 #12 0x00007f4bcacc55ac in virLXCProcessCleanup (reason=<optimized out>, vm=0x7f4bb4000b00, driver=0x7f4bc40c8290) at lxc/lxc_process.c:240 #13 virLXCProcessStop (driver=0x7f4bc40c8290, vm=vm@entry=0x7f4bb4000b00, reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED) at lxc/lxc_process.c:735 #14 0x00007f4bcacc5bd2 in virLXCProcessAutoDestroyDom (payload=<optimized out>, name=0x7f4bb4003c80, opaque=0x7fff41af2df0) at lxc/lxc_process.c:94 #15 0x00007f4bd9586649 in virHashForEach (table=0x7f4bc409b270, iter=iter@entry=0x7f4bcacc5ab0 <virLXCProcessAutoDestroyDom>, data=data@entry=0x7fff41af2df0) at util/virhash.c:514 #16 0x00007f4bcacc52d7 in virLXCProcessAutoDestroyRun (driver=driver@entry=0x7f4bc40c8290, conn=conn@entry=0x7f4bb8000ab0) at lxc/lxc_process.c:120 #17 0x00007f4bcacca628 in lxcClose (conn=0x7f4bb8000ab0) at lxc/lxc_driver.c:128 #18 0x00007f4bd95e67ab in virReleaseConnect (conn=conn@entry=0x7f4bb8000ab0) at datatypes.c:114 When the driver calls virLXCMonitorClose, there is really no need for the EOF callback to be invoked in this case, since the caller can easily handle events itself. In changing this, the monitor needs to take a deep copy of the callback list, not merely a reference. Also adds debug statements in various places to aid troubleshooting Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-09-26 15:08:20 +01:00
VIR_DEBUG("Faking reboot");
if (conn) {
virObjectRef(conn);
autodestroy = true;
} else {
conn = virConnectOpen("lxc:///system");
/* Ignoring NULL conn which is mostly harmless here */
}
/* In a reboot scenario, we need to make sure we continue
* to use the current 'def', and not switch to 'newDef'.
* So temporarily hide the newDef and then reinstate it
*/
savedDef = vm->newDef;
vm->newDef = NULL;
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
vm->newDef = savedDef;
if (virLXCProcessStart(conn, driver, vm,
0, NULL, autodestroy, reason) < 0) {
VIR_WARN("Unable to handle reboot of vm %s",
vm->def->name);
goto cleanup;
}
ret = 0;
cleanup:
virObjectUnref(conn);
return ret;
}
static void
lxcProcessRemoveDomainStatus(virLXCDriverConfigPtr cfg,
virDomainObjPtr vm)
{
char ebuf[1024];
char *file = NULL;
file = g_strdup_printf("%s/%s.xml", cfg->stateDir, vm->def->name);
if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
VIR_WARN("Failed to remove domain XML for %s: %s",
vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));
VIR_FREE(file);
}
/**
* virLXCProcessCleanup:
* @driver: pointer to driver structure
* @vm: pointer to VM to clean up
* @reason: reason for switching the VM to shutoff state
*
* Cleanout resources associated with the now dead VM
*
*/
static void virLXCProcessCleanup(virLXCDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason)
{
size_t i;
virLXCDomainObjPrivatePtr priv = vm->privateData;
const virNetDevVPortProfile *vport = NULL;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
virConnectPtr conn = NULL;
VIR_DEBUG("Cleanup VM name=%s pid=%d reason=%d",
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
vm->def->name, (int)vm->pid, (int)reason);
/* now that we know it's stopped call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
/* we can't stop the operation even if the script raised an error */
virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_STOPPED, VIR_HOOK_SUBOP_END,
NULL, xml, NULL);
VIR_FREE(xml);
}
virSecurityManagerRestoreAllLabel(driver->securityManager,
vm->def, false, false);
virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
/* Clear out dynamically assigned labels */
if (vm->def->nseclabels &&
vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
VIR_FREE(vm->def->seclabels[0]->model);
VIR_FREE(vm->def->seclabels[0]->label);
VIR_FREE(vm->def->seclabels[0]->imagelabel);
}
/* Stop autodestroy in case guest is restarted */
2013-07-15 19:08:11 +02:00
virCloseCallbacksUnset(driver->closeCallbacks, vm,
lxcProcessAutoDestroy);
if (priv->monitor) {
virLXCMonitorClose(priv->monitor);
virObjectUnref(priv->monitor);
priv->monitor = NULL;
}
virPidFileDelete(cfg->stateDir, vm->def->name);
lxcProcessRemoveDomainStatus(cfg, vm);
virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
vm->pid = -1;
vm->def->id = -1;
if (virAtomicIntDecAndTest(&driver->nactive) && driver->inhibitCallback)
driver->inhibitCallback(false, driver->inhibitOpaque);
virLXCDomainReAttachHostDevices(driver, vm->def);
for (i = 0; i < vm->def->nnets; i++) {
virDomainNetDefPtr iface = vm->def->nets[i];
vport = virDomainNetGetActualVirtPortProfile(iface);
if (iface->ifname) {
if (vport &&
vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)
ignore_value(virNetDevOpenvswitchRemovePort(
virDomainNetGetActualBridgeName(iface),
iface->ifname));
ignore_value(virNetDevVethDelete(iface->ifname));
}
if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
if (conn || (conn = virGetConnectNetwork()))
virDomainNetReleaseActualDevice(conn, vm->def, iface);
else
VIR_WARN("Unable to release network device '%s'", NULLSTR(iface->ifname));
}
}
virDomainConfVMNWFilterTeardown(vm);
if (priv->cgroup) {
virCgroupRemove(priv->cgroup);
virCgroupFree(&priv->cgroup);
}
/* Get machined to terminate the machine as it may not have cleaned it
* properly. See https://bugs.freedesktop.org/show_bug.cgi?id=68370 for
* the bug we are working around here.
*/
systemd: Modernize machine naming So, systemd-machined has this philosophy that machine names are like hostnames and hence should follow the same rules. But we always allowed international characters in domain names. Thus we need to modify the machine name we are passing to systemd. In order to change some machine names that we will be passing to systemd, we also need to call TerminateMachine at the end of a lifetime of a domain. Even for domains that were started with older libvirt. That can be achieved thanks to virSystemdGetMachineNameByPID(). And because we can change machine names, we can get rid of the inconsistent and pointless escaping of domain names when creating machine names. So this patch modifies the naming in the following way. It creates the name as <drivername>-<id>-<name> where invalid hostname characters are stripped out of the name and if the resulting name is longer, it truncates it to 64 characters. That way we can start domains we couldn't start before. Well, at least on systemd. To make it work all together, the machineName (which is needed only with systemd) is saved in domain's private data. That way the generation is moved to the driver and we don't need to pass various unnecessary arguments to cgroup functions. The only thing this complicates a bit is the scope generation when validating a cgroup where we must check both old and new naming, so a slight modification was needed there. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1282846 Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
2016-02-01 16:50:54 +01:00
virCgroupTerminateMachine(priv->machineName);
VIR_FREE(priv->machineName);
/* The "release" hook cleans up additional resources */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
/* we can't stop the operation even if the script raised an error */
virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_RELEASE, VIR_HOOK_SUBOP_END,
NULL, xml, NULL);
VIR_FREE(xml);
}
virDomainObjRemoveTransientDef(vm);
virObjectUnref(cfg);
virObjectUnref(conn);
}
int
virLXCProcessValidateInterface(virDomainNetDefPtr net)
{
if (net->script) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("scripts are not supported on LXC network interfaces"));
return -1;
}
return 0;
}
char *
virLXCProcessSetupInterfaceTap(virDomainDefPtr vm,
virDomainNetDefPtr net,
const char *brname)
{
char *parentVeth;
char *containerVeth = NULL;
const virNetDevVPortProfile *vport = virDomainNetGetActualVirtPortProfile(net);
VIR_DEBUG("calling vethCreate()");
parentVeth = net->ifname;
if (virNetDevVethCreate(&parentVeth, &containerVeth) < 0)
return NULL;
VIR_DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
if (net->ifname == NULL)
net->ifname = parentVeth;
if (virNetDevSetMAC(containerVeth, &net->mac) < 0)
return NULL;
if (brname) {
if (vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
if (virNetDevOpenvswitchAddPort(brname, parentVeth, &net->mac, vm->uuid,
vport, virDomainNetGetActualVlan(net)) < 0)
return NULL;
} else {
if (virNetDevBridgeAddPort(brname, parentVeth) < 0)
return NULL;
}
}
if (virNetDevSetOnline(parentVeth, true) < 0)
return NULL;
if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_ETHERNET) {
/* Set IP info for the host side, but only if the type is
* 'ethernet'.
*/
if (virNetDevIPInfoAddToDev(parentVeth, &net->hostIP) < 0)
return NULL;
}
if (net->filter &&
virDomainConfNWFilterInstantiate(vm->name, vm->uuid, net, false) < 0)
return NULL;
return containerVeth;
}
char *
virLXCProcessSetupInterfaceDirect(virLXCDriverPtr driver,
virDomainDefPtr def,
virDomainNetDefPtr net)
{
char *ret = NULL;
char *res_ifname = NULL;
const virNetDevBandwidth *bw;
const virNetDevVPortProfile *prof;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
const char *linkdev = virDomainNetGetActualDirectDev(net);
unsigned int macvlan_create_flags = VIR_NETDEV_MACVLAN_CREATE_IFUP;
/* XXX how todo bandwidth controls ?
* Since the 'net-ifname' is about to be moved to a different
* namespace & renamed, there will be no host side visible
* interface for the container to attach rules to
*/
bw = virDomainNetGetActualBandwidth(net);
if (bw) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Unable to set network bandwidth on direct interfaces"));
return NULL;
}
/* XXX how todo port profiles ?
* Although we can do the association during container
* startup, at shutdown we are unable to disassociate
* because the macvlan device was moved to the container
* and automagically dies when the container dies. So
* we have no dev to perform disassociation with.
*/
prof = virDomainNetGetActualVirtPortProfile(net);
if (prof) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Unable to set port profile on direct interfaces"));
return NULL;
}
if (virNetDevMacVLanCreateWithVPortProfile(
net->ifname, &net->mac,
linkdev,
virDomainNetGetActualDirectMode(net),
virDomainNetGetActualVlan(net),
def->uuid,
prof,
&res_ifname,
VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
cfg->stateDir,
NULL, 0,
macvlan_create_flags) < 0)
goto cleanup;
ret = res_ifname;
cleanup:
virObjectUnref(cfg);
return ret;
}
static const char *nsInfoLocal[VIR_LXC_DOMAIN_NAMESPACE_LAST] = {
[VIR_LXC_DOMAIN_NAMESPACE_SHARENET] = "net",
[VIR_LXC_DOMAIN_NAMESPACE_SHAREIPC] = "ipc",
[VIR_LXC_DOMAIN_NAMESPACE_SHAREUTS] = "uts",
};
static int virLXCProcessSetupNamespaceName(virLXCDriverPtr driver,
int ns_type,
const char *name)
{
int fd = -1;
virDomainObjPtr vm;
virLXCDomainObjPrivatePtr priv;
char *path;
vm = virDomainObjListFindByName(driver->domains, name);
if (!vm) {
virReportError(VIR_ERR_NO_DOMAIN,
_("No domain with matching name '%s'"), name);
return -1;
}
priv = vm->privateData;
if (!priv->initpid) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("Init pid is not yet available"));
goto cleanup;
}
path = g_strdup_printf("/proc/%lld/ns/%s", (long long int)priv->initpid,
nsInfoLocal[ns_type]);
if ((fd = open(path, O_RDONLY)) < 0) {
virReportSystemError(errno,
_("failed to open ns %s"),
virLXCDomainNamespaceTypeToString(ns_type));
goto cleanup;
}
cleanup:
VIR_FREE(path);
virDomainObjEndAPI(&vm);
return fd;
}
static int virLXCProcessSetupNamespacePID(int ns_type, const char *name)
{
int fd;
char *path;
path = g_strdup_printf("/proc/%s/ns/%s", name, nsInfoLocal[ns_type]);
fd = open(path, O_RDONLY);
VIR_FREE(path);
if (fd < 0) {
virReportSystemError(errno,
_("failed to open ns %s"),
virLXCDomainNamespaceTypeToString(ns_type));
return -1;
}
return fd;
}
static int virLXCProcessSetupNamespaceNet(int ns_type, const char *name)
{
char *path;
int fd;
if (ns_type != VIR_LXC_DOMAIN_NAMESPACE_SHARENET) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("'netns' namespace source can only be "
"used with sharenet"));
return -1;
}
path = g_strdup_printf("%s/netns/%s", RUNSTATEDIR, name);
fd = open(path, O_RDONLY);
VIR_FREE(path);
if (fd < 0) {
virReportSystemError(errno,
_("failed to open netns %s"), name);
return -1;
}
return fd;
}
/**
* virLXCProcessSetupNamespaces:
* @driver: pointer to driver structure
* @def: pointer to virtual machines namespaceData
* @nsFDs: out parameter to store the namespace FD
*
* Opens the specified namespace that needs to be shared and
* will moved into the container namespace later after clone has been called.
*
* Returns 0 on success or -1 in case of error
*/
static int
virLXCProcessSetupNamespaces(virLXCDriverPtr driver,
lxcDomainDefPtr lxcDef,
int *nsFDs)
{
size_t i;
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
nsFDs[i] = -1;
/*If there are no namespace to be opened just return success*/
if (lxcDef == NULL)
return 0;
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
switch (lxcDef->ns_source[i]) {
case VIR_LXC_DOMAIN_NAMESPACE_SOURCE_NONE:
continue;
case VIR_LXC_DOMAIN_NAMESPACE_SOURCE_NAME:
if ((nsFDs[i] = virLXCProcessSetupNamespaceName(driver, i,
lxcDef->ns_val[i])) < 0)
return -1;
break;
case VIR_LXC_DOMAIN_NAMESPACE_SOURCE_PID:
if ((nsFDs[i] = virLXCProcessSetupNamespacePID(i, lxcDef->ns_val[i])) < 0)
return -1;
break;
case VIR_LXC_DOMAIN_NAMESPACE_SOURCE_NETNS:
if ((nsFDs[i] = virLXCProcessSetupNamespaceNet(i, lxcDef->ns_val[i])) < 0)
return -1;
break;
}
}
return 0;
}
/**
* virLXCProcessSetupInterfaces:
* @driver: pointer to driver structure
* @def: pointer to virtual machine structure
* @veths: string list of interface names
*
* Sets up the container interfaces by creating the veth device pairs and
* attaching the parent end to the appropriate bridge. The container end
* will moved into the container namespace later after clone has been called.
*
* Returns 0 on success or -1 in case of error
*/
static int
virLXCProcessSetupInterfaces(virLXCDriverPtr driver,
virDomainDefPtr def,
char ***veths)
{
int ret = -1;
size_t i;
size_t niface = 0;
virDomainNetDefPtr net;
virDomainNetType type;
virConnectPtr netconn = NULL;
virErrorPtr save_err = NULL;
if (VIR_ALLOC_N(*veths, def->nnets + 1) < 0)
return -1;
for (i = 0; i < def->nnets; i++) {
char *veth = NULL;
const virNetDevBandwidth *actualBandwidth;
/* If appropriate, grab a physical device from the configured
* network's pool of devices, or resolve bridge device name
* to the one defined in the network definition.
*/
net = def->nets[i];
if (virLXCProcessValidateInterface(net) < 0)
goto cleanup;
if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
if (!netconn && !(netconn = virGetConnectNetwork()))
goto cleanup;
if (virDomainNetAllocateActualDevice(netconn, def, net) < 0)
goto cleanup;
}
conf: add hypervisor agnostic, domain start-time, validation function for NetDef <interface> devices (virDomainNetDef) are a bit different from other types of devices in that their actual type may come from a network (in the form of a port connection), and that doesn't happen until the domain is started. This means that any validation of an <interface> at parse time needs to be a bit liberal in what it accepts - when type='network', you could think that something is/isn't allowed, but once the domain is started and a port is created by the configured network, the opposite might be true. To solve this problem hypervisor drivers need to do an extra validation step when the domain is being started. I recently (commit 3cff23f7, libvirt 5.7.0) added a function to peform such validation for all interfaces to the QEMU driver - qemuDomainValidateActualNetDef() - but while that function is a good single point to call for the multiple places that need to "start" an interface (domain startup, device hotplug, device update), it can't be called by the other hypervisor drivers, since 1) it's in the QEMU driver, and 2) it contains some checks specific to QEMU. For validation that applies to network devices on *all* hypervisors, we need yet another interface validation function that can be called by any hypervisor driver (not just QEMU) right after its network port has been created during domain startup or hotplug. This patch adds that function - virDomainActualNetDefValidate(), in the conf directory, and calls it in appropriate places in the QEMU, lxc, and libxl drivers. This new function is the place to put all network device validation that 1) is hypervisor agnostic, and 2) can't be done until we know the "actual type" of an interface. There is no framework for validation at domain startup as there is for post-parse validation, but I don't want to create a whole elaborate system that will only be used by one type of device. For that reason, I just made a single function that should be called directly from the hypervisors, when they are initializing interfaces to start a domain, right after conditionally allocating the network port (and regardless of whether or not that was actually needed). In the case of the QEMU driver, qemuDomainValidateActualNetDef() is already called in all the appropriate places, so we can just call the new function from there. In the case of the other hypervisors, we search for virDomainNetAllocateActualDevice() (which is the hypervisor-agnostic function that calls virNetworkPortCreateXML()), and add the call to our new function right after that. The new function itself could be plunked down into many places in the code, but we already have 3 validation functions for network devices in 2 different places (not counting any basic validation done in virDomainNetDefParseXML() itself): 1) post-parse hypervisor-agnostic (virDomainNetDefValidate() - domain_conf.c:6145) 2) post-parse hypervisor-specific (qemuDomainDeviceDefValidateNetwork() - qemu_domain.c:5498) 3) domain-start hypervisor-specific (qemuDomainValidateActualNetDef() - qemu_domain.c:5390) I placed (3) right next to (2) when I added it, specifically to avoid spreading validation all over the code. For the same reason, I decided to put this new function right next to (1) - this way if someone needs to add validation specific to qemu, they go to one location, and if they need to add validation applying to everyone, they go to the other. It looks a bit strange to have a public function in between a bunch of statics, but I think it's better than the alternative of further fragmentation. (I'm open to other ideas though, of course.) Signed-off-by: Laine Stump <laine@redhat.com> Reviewed-by: Cole Robinson <crobinso@redhat.com>
2019-10-18 15:48:13 -04:00
/* final validation now that actual type is known */
if (virDomainActualNetDefValidate(net) < 0)
return -1;
type = virDomainNetGetActualType(net);
switch (type) {
case VIR_DOMAIN_NET_TYPE_NETWORK:
case VIR_DOMAIN_NET_TYPE_BRIDGE: {
const char *brname = virDomainNetGetActualBridgeName(net);
if (!brname) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("No bridge name specified"));
goto cleanup;
}
if (!(veth = virLXCProcessSetupInterfaceTap(def, net, brname)))
goto cleanup;
} break;
case VIR_DOMAIN_NET_TYPE_ETHERNET:
if (!(veth = virLXCProcessSetupInterfaceTap(def, net, NULL)))
goto cleanup;
break;
case VIR_DOMAIN_NET_TYPE_DIRECT:
if (!(veth = virLXCProcessSetupInterfaceDirect(driver, def, net)))
goto cleanup;
break;
case VIR_DOMAIN_NET_TYPE_USER:
case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
case VIR_DOMAIN_NET_TYPE_SERVER:
case VIR_DOMAIN_NET_TYPE_CLIENT:
case VIR_DOMAIN_NET_TYPE_MCAST:
case VIR_DOMAIN_NET_TYPE_UDP:
case VIR_DOMAIN_NET_TYPE_INTERNAL:
case VIR_DOMAIN_NET_TYPE_LAST:
case VIR_DOMAIN_NET_TYPE_HOSTDEV:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unsupported network type %s"),
virDomainNetTypeToString(type));
goto cleanup;
}
/* Set bandwidth or warn if requested and not supported. */
actualBandwidth = virDomainNetGetActualBandwidth(net);
if (actualBandwidth) {
if (virNetDevSupportBandwidth(type)) {
if (virNetDevBandwidthSet(net->ifname, actualBandwidth, false,
!virDomainNetTypeSharesHostView(net)) < 0)
goto cleanup;
} else {
VIR_WARN("setting bandwidth on interfaces of "
"type '%s' is not implemented yet",
virDomainNetTypeToString(type));
}
}
(*veths)[i] = veth;
def->nets[i]->ifname_guest_actual = g_strdup(veth);
/* Make sure all net definitions will have a name in the container */
if (!net->ifname_guest) {
net->ifname_guest = g_strdup_printf("eth%zu", niface);
niface++;
}
}
ret = 0;
cleanup:
if (ret < 0) {
virErrorPreserveLast(&save_err);
for (i = 0; i < def->nnets; i++) {
virDomainNetDefPtr iface = def->nets[i];
const virNetDevVPortProfile *vport = virDomainNetGetActualVirtPortProfile(iface);
if (vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)
ignore_value(virNetDevOpenvswitchRemovePort(
virDomainNetGetActualBridgeName(iface),
iface->ifname));
if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && netconn)
virDomainNetReleaseActualDevice(netconn, def, iface);
}
virErrorRestore(&save_err);
}
virObjectUnref(netconn);
return ret;
}
static void
virLXCProcessCleanInterfaces(virDomainDefPtr def)
{
size_t i;
for (i = 0; i < def->nnets; i++) {
VIR_FREE(def->nets[i]->ifname_guest_actual);
VIR_DEBUG("Cleared net names: %s", def->nets[i]->ifname_guest);
}
}
extern virLXCDriverPtr lxc_driver;
Fix deadlock in handling EOF in LXC monitor Depending on the scenario in which LXC containers exit, it is possible for the EOF callback of the LXC monitor to deadlock the driver. #0 0x00000038a0a0de4d in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00000038a0a09ca6 in _L_lock_840 () from /lib64/libpthread.so.0 #2 0x00000038a0a09ba8 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007f4bd9579d55 in virMutexLock (m=<optimized out>) at util/threads-pthread.c:85 #4 0x00007f4bcacc7597 in lxcDriverLock (driver=0x7f4bc40c8290) at lxc/lxc_conf.h:81 #5 virLXCProcessMonitorEOFNotify (mon=<optimized out>, vm=0x7f4bb4000b00) at lxc/lxc_process.c:581 #6 0x00007f4bd9645c91 in virNetClientCloseLocked (client=client@entry=0x7f4bb4009e60) at rpc/virnetclient.c:554 #7 0x00007f4bd96460f8 in virNetClientIOEventLoopPassTheBuck (thiscall=0x0, client=0x7f4bb4009e60) at rpc/virnetclient.c:1306 #8 virNetClientIOEventLoopPassTheBuck (client=0x7f4bb4009e60, thiscall=0x0) at rpc/virnetclient.c:1287 #9 0x00007f4bd96467a2 in virNetClientCloseInternal (reason=3, client=0x7f4bb4009e60) at rpc/virnetclient.c:589 #10 virNetClientCloseInternal (client=0x7f4bb4009e60, reason=3) at rpc/virnetclient.c:561 #11 0x00007f4bcacc4a82 in virLXCMonitorClose (mon=0x7f4bb4000a00) at lxc/lxc_monitor.c:201 #12 0x00007f4bcacc55ac in virLXCProcessCleanup (reason=<optimized out>, vm=0x7f4bb4000b00, driver=0x7f4bc40c8290) at lxc/lxc_process.c:240 #13 virLXCProcessStop (driver=0x7f4bc40c8290, vm=vm@entry=0x7f4bb4000b00, reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED) at lxc/lxc_process.c:735 #14 0x00007f4bcacc5bd2 in virLXCProcessAutoDestroyDom (payload=<optimized out>, name=0x7f4bb4003c80, opaque=0x7fff41af2df0) at lxc/lxc_process.c:94 #15 0x00007f4bd9586649 in virHashForEach (table=0x7f4bc409b270, iter=iter@entry=0x7f4bcacc5ab0 <virLXCProcessAutoDestroyDom>, data=data@entry=0x7fff41af2df0) at util/virhash.c:514 #16 0x00007f4bcacc52d7 in virLXCProcessAutoDestroyRun (driver=driver@entry=0x7f4bc40c8290, conn=conn@entry=0x7f4bb8000ab0) at lxc/lxc_process.c:120 #17 0x00007f4bcacca628 in lxcClose (conn=0x7f4bb8000ab0) at lxc/lxc_driver.c:128 #18 0x00007f4bd95e67ab in virReleaseConnect (conn=conn@entry=0x7f4bb8000ab0) at datatypes.c:114 When the driver calls virLXCMonitorClose, there is really no need for the EOF callback to be invoked in this case, since the caller can easily handle events itself. In changing this, the monitor needs to take a deep copy of the callback list, not merely a reference. Also adds debug statements in various places to aid troubleshooting Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-09-26 15:08:20 +01:00
static void virLXCProcessMonitorEOFNotify(virLXCMonitorPtr mon,
virDomainObjPtr vm)
{
virLXCDriverPtr driver = lxc_driver;
virObjectEventPtr event = NULL;
virLXCDomainObjPrivatePtr priv;
Fix deadlock in handling EOF in LXC monitor Depending on the scenario in which LXC containers exit, it is possible for the EOF callback of the LXC monitor to deadlock the driver. #0 0x00000038a0a0de4d in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00000038a0a09ca6 in _L_lock_840 () from /lib64/libpthread.so.0 #2 0x00000038a0a09ba8 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007f4bd9579d55 in virMutexLock (m=<optimized out>) at util/threads-pthread.c:85 #4 0x00007f4bcacc7597 in lxcDriverLock (driver=0x7f4bc40c8290) at lxc/lxc_conf.h:81 #5 virLXCProcessMonitorEOFNotify (mon=<optimized out>, vm=0x7f4bb4000b00) at lxc/lxc_process.c:581 #6 0x00007f4bd9645c91 in virNetClientCloseLocked (client=client@entry=0x7f4bb4009e60) at rpc/virnetclient.c:554 #7 0x00007f4bd96460f8 in virNetClientIOEventLoopPassTheBuck (thiscall=0x0, client=0x7f4bb4009e60) at rpc/virnetclient.c:1306 #8 virNetClientIOEventLoopPassTheBuck (client=0x7f4bb4009e60, thiscall=0x0) at rpc/virnetclient.c:1287 #9 0x00007f4bd96467a2 in virNetClientCloseInternal (reason=3, client=0x7f4bb4009e60) at rpc/virnetclient.c:589 #10 virNetClientCloseInternal (client=0x7f4bb4009e60, reason=3) at rpc/virnetclient.c:561 #11 0x00007f4bcacc4a82 in virLXCMonitorClose (mon=0x7f4bb4000a00) at lxc/lxc_monitor.c:201 #12 0x00007f4bcacc55ac in virLXCProcessCleanup (reason=<optimized out>, vm=0x7f4bb4000b00, driver=0x7f4bc40c8290) at lxc/lxc_process.c:240 #13 virLXCProcessStop (driver=0x7f4bc40c8290, vm=vm@entry=0x7f4bb4000b00, reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED) at lxc/lxc_process.c:735 #14 0x00007f4bcacc5bd2 in virLXCProcessAutoDestroyDom (payload=<optimized out>, name=0x7f4bb4003c80, opaque=0x7fff41af2df0) at lxc/lxc_process.c:94 #15 0x00007f4bd9586649 in virHashForEach (table=0x7f4bc409b270, iter=iter@entry=0x7f4bcacc5ab0 <virLXCProcessAutoDestroyDom>, data=data@entry=0x7fff41af2df0) at util/virhash.c:514 #16 0x00007f4bcacc52d7 in virLXCProcessAutoDestroyRun (driver=driver@entry=0x7f4bc40c8290, conn=conn@entry=0x7f4bb8000ab0) at lxc/lxc_process.c:120 #17 0x00007f4bcacca628 in lxcClose (conn=0x7f4bb8000ab0) at lxc/lxc_driver.c:128 #18 0x00007f4bd95e67ab in virReleaseConnect (conn=conn@entry=0x7f4bb8000ab0) at datatypes.c:114 When the driver calls virLXCMonitorClose, there is really no need for the EOF callback to be invoked in this case, since the caller can easily handle events itself. In changing this, the monitor needs to take a deep copy of the callback list, not merely a reference. Also adds debug statements in various places to aid troubleshooting Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-09-26 15:08:20 +01:00
VIR_DEBUG("mon=%p vm=%p", mon, vm);
virObjectLock(vm);
priv = vm->privateData;
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
if (!priv->wantReboot) {
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
if (!priv->doneStopEvent) {
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
priv->stopReason);
virDomainAuditStop(vm, "shutdown");
} else {
VIR_DEBUG("Stop event has already been sent");
}
conf: Clean up object referencing for Add and Remove When adding a new object to the domain object list, there should have been 2 virObjectRef calls made one for each list into which the object was placed to match the 2 virObjectUnref calls that would occur during Remove as part of virHashRemoveEntry when virObjectFreeHashData is called when the element is removed from the hash table as set up in virDomainObjListNew. Some drivers (libxl, lxc, qemu, and vz) handled this inconsistency by calling virObjectRef upon successful return from virDomainObjListAdd in order to use virDomainObjEndAPI when done with the returned @vm. While others (bhyve, openvz, test, and vmware) handled this via only calling virObjectUnlock upon successful return from virDomainObjListAdd. This patch will "unify" the approach to use virDomainObjEndAPI for any @vm successfully returned from virDomainObjListAdd. Because list removal is so tightly coupled with list addition, this patch fixes the list removal algorithm to return the object as entered - "locked and reffed". This way, the callers can then decide how to uniformly handle add/remove success and failure. This removes the onus on the caller to "specially handle" the @vm during removal processing. The Add/Remove logic allows for some logic simplification such as in libxl where we can Remove the @vm directly rather than needing to set a @remove_dom boolean and removing after the libxlDomainObjEndJob completes as the @vm is locked/reffed. Signed-off-by: John Ferlan <jferlan@redhat.com> Reviewed-by: Erik Skultety <eskultet@redhat.com>
2018-04-23 10:40:48 -04:00
if (!vm->persistent)
virDomainObjListRemove(driver->domains, vm);
} else {
int ret = virLXCProcessReboot(driver, vm);
virDomainAuditStop(vm, "reboot");
virDomainAuditStart(vm, "reboot", ret == 0);
if (ret == 0) {
event = virDomainEventRebootNewFromObj(vm);
} else {
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
priv->stopReason);
conf: Clean up object referencing for Add and Remove When adding a new object to the domain object list, there should have been 2 virObjectRef calls made one for each list into which the object was placed to match the 2 virObjectUnref calls that would occur during Remove as part of virHashRemoveEntry when virObjectFreeHashData is called when the element is removed from the hash table as set up in virDomainObjListNew. Some drivers (libxl, lxc, qemu, and vz) handled this inconsistency by calling virObjectRef upon successful return from virDomainObjListAdd in order to use virDomainObjEndAPI when done with the returned @vm. While others (bhyve, openvz, test, and vmware) handled this via only calling virObjectUnlock upon successful return from virDomainObjListAdd. This patch will "unify" the approach to use virDomainObjEndAPI for any @vm successfully returned from virDomainObjListAdd. Because list removal is so tightly coupled with list addition, this patch fixes the list removal algorithm to return the object as entered - "locked and reffed". This way, the callers can then decide how to uniformly handle add/remove success and failure. This removes the onus on the caller to "specially handle" the @vm during removal processing. The Add/Remove logic allows for some logic simplification such as in libxl where we can Remove the @vm directly rather than needing to set a @remove_dom boolean and removing after the libxlDomainObjEndJob completes as the @vm is locked/reffed. Signed-off-by: John Ferlan <jferlan@redhat.com> Reviewed-by: Erik Skultety <eskultet@redhat.com>
2018-04-23 10:40:48 -04:00
if (!vm->persistent)
virDomainObjListRemove(driver->domains, vm);
}
}
conf: Clean up object referencing for Add and Remove When adding a new object to the domain object list, there should have been 2 virObjectRef calls made one for each list into which the object was placed to match the 2 virObjectUnref calls that would occur during Remove as part of virHashRemoveEntry when virObjectFreeHashData is called when the element is removed from the hash table as set up in virDomainObjListNew. Some drivers (libxl, lxc, qemu, and vz) handled this inconsistency by calling virObjectRef upon successful return from virDomainObjListAdd in order to use virDomainObjEndAPI when done with the returned @vm. While others (bhyve, openvz, test, and vmware) handled this via only calling virObjectUnlock upon successful return from virDomainObjListAdd. This patch will "unify" the approach to use virDomainObjEndAPI for any @vm successfully returned from virDomainObjListAdd. Because list removal is so tightly coupled with list addition, this patch fixes the list removal algorithm to return the object as entered - "locked and reffed". This way, the callers can then decide how to uniformly handle add/remove success and failure. This removes the onus on the caller to "specially handle" the @vm during removal processing. The Add/Remove logic allows for some logic simplification such as in libxl where we can Remove the @vm directly rather than needing to set a @remove_dom boolean and removing after the libxlDomainObjEndJob completes as the @vm is locked/reffed. Signed-off-by: John Ferlan <jferlan@redhat.com> Reviewed-by: Erik Skultety <eskultet@redhat.com>
2018-04-23 10:40:48 -04:00
/* NB: virLXCProcessConnectMonitor will perform the virObjectRef(vm)
* before adding monitorCallbacks. Since we are now done with the @vm
* we can Unref/Unlock */
virDomainObjEndAPI(&vm);
virObjectEventStateQueue(driver->domainEventState, event);
}
static void virLXCProcessMonitorExitNotify(virLXCMonitorPtr mon G_GNUC_UNUSED,
virLXCMonitorExitStatus status,
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
virDomainObjPtr vm)
{
virLXCDomainObjPrivatePtr priv = vm->privateData;
virObjectLock(vm);
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
switch (status) {
case VIR_LXC_MONITOR_EXIT_STATUS_SHUTDOWN:
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
break;
case VIR_LXC_MONITOR_EXIT_STATUS_ERROR:
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
break;
case VIR_LXC_MONITOR_EXIT_STATUS_REBOOT:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
priv->wantReboot = true;
break;
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
default:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
break;
}
VIR_DEBUG("Domain shutoff reason %d (from status %d)",
priv->stopReason, status);
virObjectUnlock(vm);
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
}
static int
virLXCProcessGetNsInode(pid_t pid,
const char *nsname,
ino_t *inode)
{
char *path = NULL;
struct stat sb;
int ret = -1;
path = g_strdup_printf("/proc/%lld/ns/%s", (long long)pid, nsname);
if (stat(path, &sb) < 0) {
virReportSystemError(errno,
_("Unable to stat %s"), path);
goto cleanup;
}
*inode = sb.st_ino;
ret = 0;
cleanup:
VIR_FREE(path);
return ret;
}
/* XXX a little evil */
extern virLXCDriverPtr lxc_driver;
static void virLXCProcessMonitorInitNotify(virLXCMonitorPtr mon G_GNUC_UNUSED,
pid_t initpid,
virDomainObjPtr vm)
{
virLXCDriverPtr driver = lxc_driver;
virLXCDomainObjPrivatePtr priv;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
ino_t inode = 0;
virObjectLock(vm);
priv = vm->privateData;
priv->initpid = initpid;
if (virLXCProcessGetNsInode(initpid, "pid", &inode) < 0) {
VIR_WARN("Cannot obtain pid NS inode for %lld: %s",
(long long)initpid,
virGetLastErrorMessage());
virResetLastError();
}
virDomainAuditInit(vm, initpid, inode);
if (virDomainObjSave(vm, lxc_driver->xmlopt, cfg->stateDir) < 0)
VIR_WARN("Cannot update XML with PID for LXC %s", vm->def->name);
virObjectUnlock(vm);
virObjectUnref(cfg);
}
static virLXCMonitorCallbacks monitorCallbacks = {
.eofNotify = virLXCProcessMonitorEOFNotify,
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
.exitNotify = virLXCProcessMonitorExitNotify,
.initNotify = virLXCProcessMonitorInitNotify,
};
static virLXCMonitorPtr virLXCProcessConnectMonitor(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
virLXCMonitorPtr monitor = NULL;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
if (virSecurityManagerSetSocketLabel(driver->securityManager, vm->def) < 0)
goto cleanup;
/* Hold an extra reference because we can't allow 'vm' to be
conf: Clean up object referencing for Add and Remove When adding a new object to the domain object list, there should have been 2 virObjectRef calls made one for each list into which the object was placed to match the 2 virObjectUnref calls that would occur during Remove as part of virHashRemoveEntry when virObjectFreeHashData is called when the element is removed from the hash table as set up in virDomainObjListNew. Some drivers (libxl, lxc, qemu, and vz) handled this inconsistency by calling virObjectRef upon successful return from virDomainObjListAdd in order to use virDomainObjEndAPI when done with the returned @vm. While others (bhyve, openvz, test, and vmware) handled this via only calling virObjectUnlock upon successful return from virDomainObjListAdd. This patch will "unify" the approach to use virDomainObjEndAPI for any @vm successfully returned from virDomainObjListAdd. Because list removal is so tightly coupled with list addition, this patch fixes the list removal algorithm to return the object as entered - "locked and reffed". This way, the callers can then decide how to uniformly handle add/remove success and failure. This removes the onus on the caller to "specially handle" the @vm during removal processing. The Add/Remove logic allows for some logic simplification such as in libxl where we can Remove the @vm directly rather than needing to set a @remove_dom boolean and removing after the libxlDomainObjEndJob completes as the @vm is locked/reffed. Signed-off-by: John Ferlan <jferlan@redhat.com> Reviewed-by: Erik Skultety <eskultet@redhat.com>
2018-04-23 10:40:48 -04:00
* deleted while the monitor is active. This will be unreffed
* during EOFNotify processing. */
virObjectRef(vm);
monitor = virLXCMonitorNew(vm, cfg->stateDir, &monitorCallbacks);
if (monitor == NULL)
virObjectUnref(vm);
if (virSecurityManagerClearSocketLabel(driver->securityManager, vm->def) < 0) {
if (monitor) {
virObjectUnref(monitor);
monitor = NULL;
}
goto cleanup;
}
cleanup:
virObjectUnref(cfg);
return monitor;
}
int virLXCProcessStop(virLXCDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason)
{
int rc;
virLXCDomainObjPrivatePtr priv;
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d",
vm->def->name, (int)vm->pid, (int)reason);
if (!virDomainObjIsActive(vm)) {
VIR_DEBUG("VM '%s' not active", vm->def->name);
return 0;
}
priv = vm->privateData;
/* If the LXC domain is suspended we send all processes a SIGKILL
* and thaw them. Upon wakeup the process sees the pending signal
* and dies immediately. It is guaranteed that priv->cgroup != NULL
* here because the domain has aleady been suspended using the
* freezer cgroup.
*/
if (reason == VIR_DOMAIN_SHUTOFF_DESTROYED &&
virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
if (virCgroupKillRecursive(priv->cgroup, SIGKILL) <= 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to kill all processes"));
return -1;
}
if (virCgroupSetFreezerState(priv->cgroup, "THAWED") < 0) {
virReportError(VIR_ERR_OPERATION_FAILED, "%s",
_("Unable to thaw all processes"));
return -1;
}
goto cleanup;
}
if (priv->cgroup) {
rc = virCgroupKillPainfully(priv->cgroup);
if (rc < 0)
return -1;
if (rc > 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Some processes refused to die"));
return -1;
}
} else if (vm->pid > 0) {
/* If cgroup doesn't exist, just try cleaning up the
* libvirt_lxc process */
if (virProcessKillPainfully(vm->pid, true) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Processes %d refused to die"), (int)vm->pid);
return -1;
}
}
cleanup:
virLXCProcessCleanup(driver, vm, reason);
return 0;
}
static virCommandPtr
virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
virDomainObjPtr vm,
char **veths,
int *ttyFDs,
size_t nttyFDs,
int *nsInheritFDs,
int *files,
size_t nfiles,
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
int handshakefd,
lxc: Don't pass a local variable address randomly So, recently I was testing the LXC driver. You know, startup some domains. But to my surprise, I was not able to start a single one: virsh # start --console test error: Reconnected to the hypervisor error: Failed to start domain test error: internal error: guest failed to start: unexpected exit status 125 So I've start digging. It turns out, that in virExec(), when I printed out the @cmd, I got strange values: *(cmd->outfdptr) was certainly not valid FD number: it has random value of several millions. This obviously made prepareStdFd(childout, STDOUT_FILENO) fail (line 611). But outfdptr is set in virCommandSetOutputFD(). The only place within LXC driver where the function is called is in virLXCProcessBuildControllerCmd(). If you take a closer look at the function it looks like this: static virCommandPtr virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, .. int logfd, const char *pidfile) { ... virCommandSetOutputFD(cmd, &logfd); virCommandSetErrorFD(cmd, &logfd); ... } Yes, you guessed it. @logfd is passed into the function by value. However, in the function we try to get its address (an address of a local variable) which is no longer valid once function is finished and stack is cleaned. Therefore when cmd->outfdptr is evaluated at any point after this function, we may get a random number, depending on what's currently on the stack. Of course, this may work sometimes too - it depends on the compiler how it arranges the code, when the stack is wiped out. In order to fix this, lets pass a pointer to @logfd instead of figuring out (wrong) its value in a function. The bug was introduced in e1de5521. Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2015-07-01 17:21:28 +02:00
int * const logfd,
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
const char *pidfile)
{
size_t i;
char *filterstr;
char *outputstr;
virCommandPtr cmd;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
cmd = virCommandNew(vm->def->emulator);
/* The controller may call ip command, so we have to retain PATH. */
virCommandAddEnvPass(cmd, "PATH");
virCommandAddEnvFormat(cmd, "LIBVIRT_DEBUG=%d",
virLogGetDefaultPriority());
if (virLogGetNbFilters() > 0) {
filterstr = virLogGetFilters();
if (!filterstr) {
virReportOOMError();
goto error;
}
virCommandAddEnvPair(cmd, "LIBVIRT_LOG_FILTERS", filterstr);
VIR_FREE(filterstr);
}
if (cfg->log_libvirtd) {
if (virLogGetNbOutputs() > 0) {
outputstr = virLogGetOutputs();
if (!outputstr) {
virReportOOMError();
goto error;
}
virCommandAddEnvPair(cmd, "LIBVIRT_LOG_OUTPUTS", outputstr);
VIR_FREE(outputstr);
}
} else {
virCommandAddEnvFormat(cmd,
"LIBVIRT_LOG_OUTPUTS=%d:stderr",
virLogGetDefaultPriority());
}
virCommandAddArgList(cmd, "--name", vm->def->name, NULL);
for (i = 0; i < nttyFDs; i++) {
virCommandAddArg(cmd, "--console");
virCommandAddArgFormat(cmd, "%d", ttyFDs[i]);
virCommandPassFD(cmd, ttyFDs[i], 0);
}
for (i = 0; i < nfiles; i++) {
virCommandAddArg(cmd, "--passfd");
virCommandAddArgFormat(cmd, "%d", files[i]);
virCommandPassFD(cmd, files[i], 0);
}
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
if (nsInheritFDs[i] > 0) {
char *tmp = NULL;
tmp = g_strdup_printf("--share-%s", nsInfoLocal[i]);
virCommandAddArg(cmd, tmp);
virCommandAddArgFormat(cmd, "%d", nsInheritFDs[i]);
virCommandPassFD(cmd, nsInheritFDs[i], 0);
VIR_FREE(tmp);
}
}
virCommandAddArgPair(cmd, "--security",
virSecurityManagerGetModel(driver->securityManager));
virCommandAddArg(cmd, "--handshake");
virCommandAddArgFormat(cmd, "%d", handshakefd);
for (i = 0; veths && veths[i]; i++)
virCommandAddArgList(cmd, "--veth", veths[i], NULL);
virCommandPassFD(cmd, handshakefd, 0);
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
virCommandDaemonize(cmd);
virCommandSetPidFile(cmd, pidfile);
lxc: Don't pass a local variable address randomly So, recently I was testing the LXC driver. You know, startup some domains. But to my surprise, I was not able to start a single one: virsh # start --console test error: Reconnected to the hypervisor error: Failed to start domain test error: internal error: guest failed to start: unexpected exit status 125 So I've start digging. It turns out, that in virExec(), when I printed out the @cmd, I got strange values: *(cmd->outfdptr) was certainly not valid FD number: it has random value of several millions. This obviously made prepareStdFd(childout, STDOUT_FILENO) fail (line 611). But outfdptr is set in virCommandSetOutputFD(). The only place within LXC driver where the function is called is in virLXCProcessBuildControllerCmd(). If you take a closer look at the function it looks like this: static virCommandPtr virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, .. int logfd, const char *pidfile) { ... virCommandSetOutputFD(cmd, &logfd); virCommandSetErrorFD(cmd, &logfd); ... } Yes, you guessed it. @logfd is passed into the function by value. However, in the function we try to get its address (an address of a local variable) which is no longer valid once function is finished and stack is cleaned. Therefore when cmd->outfdptr is evaluated at any point after this function, we may get a random number, depending on what's currently on the stack. Of course, this may work sometimes too - it depends on the compiler how it arranges the code, when the stack is wiped out. In order to fix this, lets pass a pointer to @logfd instead of figuring out (wrong) its value in a function. The bug was introduced in e1de5521. Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2015-07-01 17:21:28 +02:00
virCommandSetOutputFD(cmd, logfd);
virCommandSetErrorFD(cmd, logfd);
/* So we can pause before exec'ing the controller to
* write the live domain status XML with the PID */
virCommandRequireHandshake(cmd);
cleanup:
virObjectUnref(cfg);
return cmd;
error:
virCommandFree(cmd);
cmd = NULL;
goto cleanup;
}
static bool
virLXCProcessIgnorableLogLine(const char *str)
{
if (virLogProbablyLogMessage(str))
return true;
if (strstr(str, "PATH="))
return true;
if (strstr(str, "error receiving signal from container"))
return true;
if (STREQ(str, ""))
return true;
return false;
}
static int
virLXCProcessReadLogOutputData(virDomainObjPtr vm,
int fd,
char *buf,
size_t buflen)
{
int retries = 10;
int got = 0;
char *filter_next = buf;
buf[0] = '\0';
while (retries) {
ssize_t bytes;
bool isdead = false;
char *eol;
if (vm->pid <= 0 ||
(kill(vm->pid, 0) == -1 && errno == ESRCH))
isdead = true;
/* Any failures should be detected before we read the log, so we
* always have something useful to report on failure. */
bytes = saferead(fd, buf+got, buflen-got-1);
if (bytes < 0) {
virReportSystemError(errno, "%s",
_("Failure while reading log output"));
return -1;
}
got += bytes;
buf[got] = '\0';
/* Filter out debug messages from intermediate libvirt process */
while ((eol = strchr(filter_next, '\n'))) {
*eol = '\0';
if (virLXCProcessIgnorableLogLine(filter_next)) {
memmove(filter_next, eol + 1, got - (eol - buf));
got -= eol + 1 - filter_next;
} else {
filter_next = eol + 1;
*eol = '\n';
}
}
if (got == buflen-1) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Out of space while reading log output: %s"),
buf);
return -1;
}
if (isdead)
return got;
g_usleep(100*1000);
retries--;
}
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Timed out while reading log output: %s"),
buf);
return -1;
}
static int
virLXCProcessReadLogOutput(virDomainObjPtr vm,
char *logfile,
off_t pos,
char *buf,
size_t buflen)
{
int fd = -1;
int ret;
if ((fd = open(logfile, O_RDONLY)) < 0) {
virReportSystemError(errno,
_("Unable to open log file %s"),
logfile);
return -1;
}
if (lseek(fd, pos, SEEK_SET) < 0) {
virReportSystemError(errno,
_("Unable to seek log file %s to %llu"),
logfile, (unsigned long long)pos);
VIR_FORCE_CLOSE(fd);
return -1;
}
ret = virLXCProcessReadLogOutputData(vm,
fd,
buf,
buflen);
VIR_FORCE_CLOSE(fd);
return ret;
}
static int
virLXCProcessEnsureRootFS(virDomainObjPtr vm)
{
virDomainFSDefPtr root = virDomainGetFilesystemForTarget(vm->def, "/");
if (root)
return 0;
if (!(root = virDomainFSDefNew(NULL)))
goto error;
root->type = VIR_DOMAIN_FS_TYPE_MOUNT;
root->src->path = g_strdup("/");
root->dst = g_strdup("/");
if (VIR_INSERT_ELEMENT(vm->def->fss,
0,
vm->def->nfss,
root) < 0)
goto error;
return 0;
error:
virDomainFSDefFree(root);
return -1;
}
/**
* virLXCProcessStart:
* @conn: pointer to connection
* @driver: pointer to driver structure
* @vm: pointer to virtual machine structure
* @autoDestroy: mark the domain for auto destruction
* @reason: reason for switching vm to running state
*
* Starts a vm
*
* Returns 0 on success or -1 in case of error
*/
int virLXCProcessStart(virConnectPtr conn,
virLXCDriverPtr driver,
virDomainObjPtr vm,
unsigned int nfiles, int *files,
bool autoDestroy,
virDomainRunningReason reason)
{
int rc = -1, r;
size_t nttyFDs = 0;
int *ttyFDs = NULL;
size_t i;
char *logfile = NULL;
int logfd = -1;
VIR_AUTOSTRINGLIST veths = NULL;
int handshakefds[2] = { -1, -1 };
off_t pos = -1;
char ebuf[1024];
char *timestamp;
int nsInheritFDs[VIR_LXC_DOMAIN_NAMESPACE_LAST];
virCommandPtr cmd = NULL;
virLXCDomainObjPrivatePtr priv = vm->privateData;
virCapsPtr caps = NULL;
virErrorPtr err = NULL;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
virCgroupPtr selfcgroup;
int status;
char *pidfile = NULL;
if (virCgroupNewSelf(&selfcgroup) < 0)
return -1;
if (!virCgroupHasController(selfcgroup,
VIR_CGROUP_CONTROLLER_CPUACCT)) {
virCgroupFree(&selfcgroup);
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'cpuacct' cgroups controller mount"));
return -1;
}
if (!virCgroupHasController(selfcgroup,
VIR_CGROUP_CONTROLLER_DEVICES)) {
virCgroupFree(&selfcgroup);
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'devices' cgroups controller mount"));
return -1;
}
if (!virCgroupHasController(selfcgroup,
VIR_CGROUP_CONTROLLER_MEMORY)) {
virCgroupFree(&selfcgroup);
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'memory' cgroups controller mount"));
return -1;
}
virCgroupFree(&selfcgroup);
if (vm->def->nconsoles == 0) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("At least one PTY console is required"));
return -1;
}
for (i = 0; i < vm->def->nconsoles; i++) {
if (vm->def->consoles[i]->source->type != VIR_DOMAIN_CHR_TYPE_PTY) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Only PTY console types are supported"));
return -1;
}
}
if (virFileMakePath(cfg->logDir) < 0) {
virReportSystemError(errno,
_("Cannot create log directory '%s'"),
cfg->logDir);
return -1;
}
if (!vm->def->resource) {
virDomainResourceDefPtr res;
if (VIR_ALLOC(res) < 0)
goto cleanup;
res->partition = g_strdup("/machine");
vm->def->resource = res;
}
logfile = g_strdup_printf("%s/%s.log", cfg->logDir, vm->def->name);
if (!(pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name)))
goto cleanup;
if (!(caps = virLXCDriverGetCapabilities(driver, false)))
goto cleanup;
/* Do this up front, so any part of the startup process can add
* runtime state to vm->def that won't be persisted. This let's us
* report implicit runtime defaults in the XML, like vnc listen/socket
*/
VIR_DEBUG("Setting current domain def as transient");
if (virDomainObjSetDefTransient(driver->xmlopt, vm, NULL) < 0)
goto cleanup;
/* Run an early hook to set-up missing devices */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_PREPARE, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto cleanup;
}
if (virLXCProcessEnsureRootFS(vm) < 0)
goto cleanup;
/* Must be run before security labelling */
VIR_DEBUG("Preparing host devices");
if (virLXCPrepareHostDevices(driver, vm->def) < 0)
goto cleanup;
/* Here we open all the PTYs we need on the host OS side.
* The LXC controller will open the guest OS side PTYs
* and forward I/O between them.
*/
nttyFDs = vm->def->nconsoles;
if (VIR_ALLOC_N(ttyFDs, nttyFDs) < 0)
goto cleanup;
for (i = 0; i < vm->def->nconsoles; i++)
ttyFDs[i] = -1;
/* If you are using a SecurityDriver with dynamic labelling,
then generate a security label for isolation */
VIR_DEBUG("Generating domain security label (if required)");
if (vm->def->nseclabels &&
vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DEFAULT)
vm->def->seclabels[0]->type = VIR_DOMAIN_SECLABEL_NONE;
if (virSecurityManagerCheckAllLabel(driver->securityManager, vm->def) < 0)
goto cleanup;
if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) {
virDomainAuditSecurityLabel(vm, false);
goto cleanup;
}
virDomainAuditSecurityLabel(vm, true);
VIR_DEBUG("Setting domain security labels");
if (virSecurityManagerSetAllLabel(driver->securityManager,
vm->def, NULL, false, false) < 0)
goto cleanup;
VIR_DEBUG("Setting up consoles");
for (i = 0; i < vm->def->nconsoles; i++) {
char *ttyPath;
if (virFileOpenTty(&ttyFDs[i], &ttyPath, 1) < 0) {
virReportSystemError(errno, "%s",
_("Failed to allocate tty"));
goto cleanup;
}
VIR_FREE(vm->def->consoles[i]->source->data.file.path);
vm->def->consoles[i]->source->data.file.path = ttyPath;
VIR_FREE(vm->def->consoles[i]->info.alias);
vm->def->consoles[i]->info.alias = g_strdup_printf("console%zu", i);
}
VIR_DEBUG("Setting up Interfaces");
if (virLXCProcessSetupInterfaces(driver, vm->def, &veths) < 0)
goto cleanup;
VIR_DEBUG("Setting up namespaces if any");
if (virLXCProcessSetupNamespaces(driver, vm->def->namespaceData, nsInheritFDs) < 0)
goto cleanup;
VIR_DEBUG("Preparing to launch");
if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT,
S_IRUSR|S_IWUSR)) < 0) {
virReportSystemError(errno,
_("Failed to open '%s'"),
logfile);
goto cleanup;
}
if (pipe(handshakefds) < 0) {
virReportSystemError(errno, "%s",
_("Unable to create pipe"));
goto cleanup;
}
if (!(cmd = virLXCProcessBuildControllerCmd(driver,
vm,
veths,
ttyFDs, nttyFDs,
nsInheritFDs,
files, nfiles,
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
handshakefds[1],
lxc: Don't pass a local variable address randomly So, recently I was testing the LXC driver. You know, startup some domains. But to my surprise, I was not able to start a single one: virsh # start --console test error: Reconnected to the hypervisor error: Failed to start domain test error: internal error: guest failed to start: unexpected exit status 125 So I've start digging. It turns out, that in virExec(), when I printed out the @cmd, I got strange values: *(cmd->outfdptr) was certainly not valid FD number: it has random value of several millions. This obviously made prepareStdFd(childout, STDOUT_FILENO) fail (line 611). But outfdptr is set in virCommandSetOutputFD(). The only place within LXC driver where the function is called is in virLXCProcessBuildControllerCmd(). If you take a closer look at the function it looks like this: static virCommandPtr virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, .. int logfd, const char *pidfile) { ... virCommandSetOutputFD(cmd, &logfd); virCommandSetErrorFD(cmd, &logfd); ... } Yes, you guessed it. @logfd is passed into the function by value. However, in the function we try to get its address (an address of a local variable) which is no longer valid once function is finished and stack is cleaned. Therefore when cmd->outfdptr is evaluated at any point after this function, we may get a random number, depending on what's currently on the stack. Of course, this may work sometimes too - it depends on the compiler how it arranges the code, when the stack is wiped out. In order to fix this, lets pass a pointer to @logfd instead of figuring out (wrong) its value in a function. The bug was introduced in e1de5521. Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2015-07-01 17:21:28 +02:00
&logfd,
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
pidfile)))
goto cleanup;
/* now that we know it is about to start call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_START, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto cleanup;
}
/* Log timestamp */
if ((timestamp = virTimeStringNow()) == NULL)
goto cleanup;
if (safewrite(logfd, timestamp, strlen(timestamp)) < 0 ||
safewrite(logfd, START_POSTFIX, strlen(START_POSTFIX)) < 0) {
VIR_WARN("Unable to write timestamp to logfile: %s",
virStrerror(errno, ebuf, sizeof(ebuf)));
}
VIR_FREE(timestamp);
/* Log generated command line */
virCommandWriteArgLog(cmd, logfd);
if ((pos = lseek(logfd, 0, SEEK_END)) < 0)
VIR_WARN("Unable to seek to end of logfile: %s",
virStrerror(errno, ebuf, sizeof(ebuf)));
VIR_DEBUG("Launching container");
util: make it easier to grab only regular command exit Auditing all callers of virCommandRun and virCommandWait that passed a non-NULL pointer for exit status turned up some interesting observations. Many callers were merely passing a pointer to avoid the overall command dying, but without caring what the exit status was - but these callers would be better off treating a child death by signal as an abnormal exit. Other callers were actually acting on the status, but not all of them remembered to filter by WIFEXITED and convert with WEXITSTATUS; depending on the platform, this can result in a status being reported as 256 times too big. And among those that correctly parse the output, it gets rather verbose. Finally, there were the callers that explicitly checked that the status was 0, and gave their own message, but with fewer details than what virCommand gives for free. So the best idea is to move the complexity out of callers and into virCommand - by default, we return the actual exit status already cleaned through WEXITSTATUS and treat signals as a failed command; but the few callers that care can ask for raw status and act on it themselves. * src/util/vircommand.h (virCommandRawStatus): New prototype. * src/libvirt_private.syms (util/command.h): Export it. * docs/internals/command.html.in: Document it. * src/util/vircommand.c (virCommandRawStatus): New function. (virCommandWait): Adjust semantics. * tests/commandtest.c (test1): Test it. * daemon/remote.c (remoteDispatchAuthPolkit): Adjust callers. * src/access/viraccessdriverpolkit.c (virAccessDriverPolkitCheck): Likewise. * src/fdstream.c (virFDStreamCloseInt): Likewise. * src/lxc/lxc_process.c (virLXCProcessStart): Likewise. * src/qemu/qemu_command.c (qemuCreateInBridgePortWithHelper): Likewise. * src/xen/xen_driver.c (xenUnifiedXendProbe): Simplify. * tests/reconnect.c (mymain): Likewise. * tests/statstest.c (mymain): Likewise. * src/bhyve/bhyve_process.c (virBhyveProcessStart) (virBhyveProcessStop): Don't overwrite virCommand error. * src/libvirt.c (virConnectAuthGainPolkit): Likewise. * src/openvz/openvz_driver.c (openvzDomainGetBarrierLimit) (openvzDomainSetBarrierLimit): Likewise. * src/util/virebtables.c (virEbTablesOnceInit): Likewise. * src/util/viriptables.c (virIpTablesOnceInit): Likewise. * src/util/virnetdevveth.c (virNetDevVethCreate): Fix debug message. * src/qemu/qemu_capabilities.c (virQEMUCapsInitQMP): Add comment. * src/storage/storage_backend_iscsi.c (virStorageBackendISCSINodeUpdate): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-02-19 17:32:19 -07:00
virCommandRawStatus(cmd);
if (virCommandRun(cmd, &status) < 0)
goto cleanup;
if (status != 0) {
util: make it easier to grab only regular command exit Auditing all callers of virCommandRun and virCommandWait that passed a non-NULL pointer for exit status turned up some interesting observations. Many callers were merely passing a pointer to avoid the overall command dying, but without caring what the exit status was - but these callers would be better off treating a child death by signal as an abnormal exit. Other callers were actually acting on the status, but not all of them remembered to filter by WIFEXITED and convert with WEXITSTATUS; depending on the platform, this can result in a status being reported as 256 times too big. And among those that correctly parse the output, it gets rather verbose. Finally, there were the callers that explicitly checked that the status was 0, and gave their own message, but with fewer details than what virCommand gives for free. So the best idea is to move the complexity out of callers and into virCommand - by default, we return the actual exit status already cleaned through WEXITSTATUS and treat signals as a failed command; but the few callers that care can ask for raw status and act on it themselves. * src/util/vircommand.h (virCommandRawStatus): New prototype. * src/libvirt_private.syms (util/command.h): Export it. * docs/internals/command.html.in: Document it. * src/util/vircommand.c (virCommandRawStatus): New function. (virCommandWait): Adjust semantics. * tests/commandtest.c (test1): Test it. * daemon/remote.c (remoteDispatchAuthPolkit): Adjust callers. * src/access/viraccessdriverpolkit.c (virAccessDriverPolkitCheck): Likewise. * src/fdstream.c (virFDStreamCloseInt): Likewise. * src/lxc/lxc_process.c (virLXCProcessStart): Likewise. * src/qemu/qemu_command.c (qemuCreateInBridgePortWithHelper): Likewise. * src/xen/xen_driver.c (xenUnifiedXendProbe): Simplify. * tests/reconnect.c (mymain): Likewise. * tests/statstest.c (mymain): Likewise. * src/bhyve/bhyve_process.c (virBhyveProcessStart) (virBhyveProcessStop): Don't overwrite virCommand error. * src/libvirt.c (virConnectAuthGainPolkit): Likewise. * src/openvz/openvz_driver.c (openvzDomainGetBarrierLimit) (openvzDomainSetBarrierLimit): Likewise. * src/util/virebtables.c (virEbTablesOnceInit): Likewise. * src/util/viriptables.c (virIpTablesOnceInit): Likewise. * src/util/virnetdevveth.c (virNetDevVethCreate): Fix debug message. * src/qemu/qemu_capabilities.c (virQEMUCapsInitQMP): Add comment. * src/storage/storage_backend_iscsi.c (virStorageBackendISCSINodeUpdate): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-02-19 17:32:19 -07:00
if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf,
sizeof(ebuf)) <= 0) {
if (WIFEXITED(status))
g_snprintf(ebuf, sizeof(ebuf), _("unexpected exit status %d"),
WEXITSTATUS(status));
util: make it easier to grab only regular command exit Auditing all callers of virCommandRun and virCommandWait that passed a non-NULL pointer for exit status turned up some interesting observations. Many callers were merely passing a pointer to avoid the overall command dying, but without caring what the exit status was - but these callers would be better off treating a child death by signal as an abnormal exit. Other callers were actually acting on the status, but not all of them remembered to filter by WIFEXITED and convert with WEXITSTATUS; depending on the platform, this can result in a status being reported as 256 times too big. And among those that correctly parse the output, it gets rather verbose. Finally, there were the callers that explicitly checked that the status was 0, and gave their own message, but with fewer details than what virCommand gives for free. So the best idea is to move the complexity out of callers and into virCommand - by default, we return the actual exit status already cleaned through WEXITSTATUS and treat signals as a failed command; but the few callers that care can ask for raw status and act on it themselves. * src/util/vircommand.h (virCommandRawStatus): New prototype. * src/libvirt_private.syms (util/command.h): Export it. * docs/internals/command.html.in: Document it. * src/util/vircommand.c (virCommandRawStatus): New function. (virCommandWait): Adjust semantics. * tests/commandtest.c (test1): Test it. * daemon/remote.c (remoteDispatchAuthPolkit): Adjust callers. * src/access/viraccessdriverpolkit.c (virAccessDriverPolkitCheck): Likewise. * src/fdstream.c (virFDStreamCloseInt): Likewise. * src/lxc/lxc_process.c (virLXCProcessStart): Likewise. * src/qemu/qemu_command.c (qemuCreateInBridgePortWithHelper): Likewise. * src/xen/xen_driver.c (xenUnifiedXendProbe): Simplify. * tests/reconnect.c (mymain): Likewise. * tests/statstest.c (mymain): Likewise. * src/bhyve/bhyve_process.c (virBhyveProcessStart) (virBhyveProcessStop): Don't overwrite virCommand error. * src/libvirt.c (virConnectAuthGainPolkit): Likewise. * src/openvz/openvz_driver.c (openvzDomainGetBarrierLimit) (openvzDomainSetBarrierLimit): Likewise. * src/util/virebtables.c (virEbTablesOnceInit): Likewise. * src/util/viriptables.c (virIpTablesOnceInit): Likewise. * src/util/virnetdevveth.c (virNetDevVethCreate): Fix debug message. * src/qemu/qemu_capabilities.c (virQEMUCapsInitQMP): Add comment. * src/storage/storage_backend_iscsi.c (virStorageBackendISCSINodeUpdate): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-02-19 17:32:19 -07:00
else
g_snprintf(ebuf, sizeof(ebuf), "%s", _("terminated abnormally"));
util: make it easier to grab only regular command exit Auditing all callers of virCommandRun and virCommandWait that passed a non-NULL pointer for exit status turned up some interesting observations. Many callers were merely passing a pointer to avoid the overall command dying, but without caring what the exit status was - but these callers would be better off treating a child death by signal as an abnormal exit. Other callers were actually acting on the status, but not all of them remembered to filter by WIFEXITED and convert with WEXITSTATUS; depending on the platform, this can result in a status being reported as 256 times too big. And among those that correctly parse the output, it gets rather verbose. Finally, there were the callers that explicitly checked that the status was 0, and gave their own message, but with fewer details than what virCommand gives for free. So the best idea is to move the complexity out of callers and into virCommand - by default, we return the actual exit status already cleaned through WEXITSTATUS and treat signals as a failed command; but the few callers that care can ask for raw status and act on it themselves. * src/util/vircommand.h (virCommandRawStatus): New prototype. * src/libvirt_private.syms (util/command.h): Export it. * docs/internals/command.html.in: Document it. * src/util/vircommand.c (virCommandRawStatus): New function. (virCommandWait): Adjust semantics. * tests/commandtest.c (test1): Test it. * daemon/remote.c (remoteDispatchAuthPolkit): Adjust callers. * src/access/viraccessdriverpolkit.c (virAccessDriverPolkitCheck): Likewise. * src/fdstream.c (virFDStreamCloseInt): Likewise. * src/lxc/lxc_process.c (virLXCProcessStart): Likewise. * src/qemu/qemu_command.c (qemuCreateInBridgePortWithHelper): Likewise. * src/xen/xen_driver.c (xenUnifiedXendProbe): Simplify. * tests/reconnect.c (mymain): Likewise. * tests/statstest.c (mymain): Likewise. * src/bhyve/bhyve_process.c (virBhyveProcessStart) (virBhyveProcessStop): Don't overwrite virCommand error. * src/libvirt.c (virConnectAuthGainPolkit): Likewise. * src/openvz/openvz_driver.c (openvzDomainGetBarrierLimit) (openvzDomainSetBarrierLimit): Likewise. * src/util/virebtables.c (virEbTablesOnceInit): Likewise. * src/util/viriptables.c (virIpTablesOnceInit): Likewise. * src/util/virnetdevveth.c (virNetDevVethCreate): Fix debug message. * src/qemu/qemu_capabilities.c (virQEMUCapsInitQMP): Add comment. * src/storage/storage_backend_iscsi.c (virStorageBackendISCSINodeUpdate): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-02-19 17:32:19 -07:00
}
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), ebuf);
goto cleanup;
}
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
/* It has started running, so get its pid */
if ((r = virPidFileReadPath(pidfile, &vm->pid)) < 0) {
if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0)
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), ebuf);
else
virReportSystemError(-r,
_("Failed to read pid file %s"),
pidfile);
goto cleanup;
}
Run an RPC protocol over the LXC controller monitor This defines a new RPC protocol to be used between the LXC controller and the libvirtd LXC driver. There is only a single RPC message defined thus far, an asynchronous "EXIT" event that is emitted just before the LXC controller process exits. This provides the LXC driver with details about how the container shutdown - normally, or abnormally (crashed), thus allowing the driver to emit better libvirt events. Emitting the event in the LXC controller requires a few little tricks with the RPC service. Simply calling the virNetServiceClientSendMessage does not work, since this merely queues the message for asynchronous processing. In addition the main event loop is no longer running at the point the event is emitted, so no I/O is processed. Thus after invoking virNetServiceClientSendMessage it is necessary to mark the client as being in "delayed close" mode. Then the event loop is run again, until the client completes its close - this happens only after the queued message has been fully transmitted. The final complexity is that it is not safe to run virNetServerQuit() from the client close callback, since that is invoked from a context where the server is locked. Thus a zero-second timer is used to trigger shutdown of the event loop, causing the controller to finally exit. * src/Makefile.am: Add rules for generating RPC protocol files and dispatch methods * src/lxc/lxc_controller.c: Emit an RPC event immediately before exiting * src/lxc/lxc_domain.h: Record the shutdown reason given by the controller * src/lxc/lxc_monitor.c, src/lxc/lxc_monitor.h: Register RPC program and event handler. Add callback to let driver receive EXIT event. * src/lxc/lxc_process.c: Use monitor exit event to decide what kind of domain event to emit * src/lxc/lxc_protocol.x: Define wire protocol for LXC controller monitor. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2012-07-17 15:54:08 +01:00
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
priv->wantReboot = false;
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
priv->doneStopEvent = false;
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
if (VIR_CLOSE(handshakefds[1]) < 0) {
virReportSystemError(errno, "%s", _("could not close handshake fd"));
goto cleanup;
}
if (virCommandHandshakeWait(cmd) < 0)
goto cleanup;
/* Write domain status to disk for the controller to
* read when it starts */
if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
goto cleanup;
/* Allow the child to exec the controller */
if (virCommandHandshakeNotify(cmd) < 0)
goto cleanup;
if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
driver->inhibitCallback(true, driver->inhibitOpaque);
if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
char out[1024];
if (!(virLXCProcessReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), out);
}
goto cleanup;
}
priv->machineName = virLXCDomainGetMachineName(vm->def, vm->pid);
if (!priv->machineName)
goto cleanup;
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
/* We know the cgroup must exist by this synchronization
* point so lets detect that first, since it gives us a
* more reliable way to kill everything off if something
* goes wrong from here onwards ... */
systemd: Modernize machine naming So, systemd-machined has this philosophy that machine names are like hostnames and hence should follow the same rules. But we always allowed international characters in domain names. Thus we need to modify the machine name we are passing to systemd. In order to change some machine names that we will be passing to systemd, we also need to call TerminateMachine at the end of a lifetime of a domain. Even for domains that were started with older libvirt. That can be achieved thanks to virSystemdGetMachineNameByPID(). And because we can change machine names, we can get rid of the inconsistent and pointless escaping of domain names when creating machine names. So this patch modifies the naming in the following way. It creates the name as <drivername>-<id>-<name> where invalid hostname characters are stripped out of the name and if the resulting name is longer, it truncates it to 64 characters. That way we can start domains we couldn't start before. Well, at least on systemd. To make it work all together, the machineName (which is needed only with systemd) is saved in domain's private data. That way the generation is moved to the driver and we don't need to pass various unnecessary arguments to cgroup functions. The only thing this complicates a bit is the scope generation when validating a cgroup where we must check both old and new naming, so a slight modification was needed there. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1282846 Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
2016-02-01 16:50:54 +01:00
if (virCgroupNewDetectMachine(vm->def->name, "lxc",
vm->pid, -1, priv->machineName,
&priv->cgroup) < 0)
goto cleanup;
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
if (!priv->cgroup) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("No valid cgroup for machine %s"),
vm->def->name);
goto cleanup;
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
}
/* And we can get the first monitor connection now too */
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
/* Intentionally overwrite the real monitor error message,
* since a better one is almost always found in the logs
*/
if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) {
virResetLastError();
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), ebuf);
}
goto cleanup;
lxc: re-arrange startup synchronization sequence with controller Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point.
2015-01-16 15:03:16 +00:00
}
if (autoDestroy &&
2013-07-15 19:08:11 +02:00
virCloseCallbacksSet(driver->closeCallbacks, vm,
conn, lxcProcessAutoDestroy) < 0)
goto cleanup;
/* We don't need the temporary NIC names anymore, clear them */
virLXCProcessCleanInterfaces(vm->def);
/* finally we can call the 'started' hook script if any */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_STARTED, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto cleanup;
}
rc = 0;
cleanup:
if (VIR_CLOSE(logfd) < 0) {
virReportSystemError(errno, "%s", _("could not close logfile"));
rc = -1;
}
if (rc != 0) {
virErrorPreserveLast(&err);
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
}
virCommandFree(cmd);
for (i = 0; i < nttyFDs; i++)
VIR_FORCE_CLOSE(ttyFDs[i]);
VIR_FREE(ttyFDs);
VIR_FORCE_CLOSE(handshakefds[0]);
VIR_FORCE_CLOSE(handshakefds[1]);
VIR_FREE(pidfile);
VIR_FREE(logfile);
virObjectUnref(cfg);
virObjectUnref(caps);
virErrorRestore(&err);
return rc;
}
struct virLXCProcessAutostartData {
virLXCDriverPtr driver;
virConnectPtr conn;
};
static int
virLXCProcessAutostartDomain(virDomainObjPtr vm,
void *opaque)
{
const struct virLXCProcessAutostartData *data = opaque;
int ret = 0;
virObjectLock(vm);
if (vm->autostart &&
!virDomainObjIsActive(vm)) {
ret = virLXCProcessStart(data->conn, data->driver, vm,
0, NULL, false,
VIR_DOMAIN_RUNNING_BOOTED);
virDomainAuditStart(vm, "booted", ret >= 0);
if (ret < 0) {
VIR_ERROR(_("Failed to autostart VM '%s': %s"),
vm->def->name,
virGetLastErrorMessage());
} else {
virObjectEventPtr event =
virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STARTED,
VIR_DOMAIN_EVENT_STARTED_BOOTED);
virObjectEventStateQueue(data->driver->domainEventState, event);
}
}
virObjectUnlock(vm);
return ret;
}
void
virLXCProcessAutostartAll(virLXCDriverPtr driver)
{
/* XXX: Figure out a better way todo this. The domain
* startup code needs a connection handle in order
* to lookup the bridge associated with a virtual
* network
*/
virConnectPtr conn = virConnectOpen("lxc:///system");
/* Ignoring NULL conn which is mostly harmless here */
struct virLXCProcessAutostartData data = { driver, conn };
virDomainObjListForEach(driver->domains, false,
virLXCProcessAutostartDomain,
&data);
virObjectUnref(conn);
}
static void
virLXCProcessReconnectNotifyNets(virDomainDefPtr def)
{
size_t i;
virConnectPtr conn = NULL;
for (i = 0; i < def->nnets; i++) {
virDomainNetDefPtr net = def->nets[i];
/* keep others from trying to use the macvtap device name, but
* don't return error if this happens, since that causes the
* domain to be unceremoniously killed, which would be *very*
* impolite.
*/
if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_DIRECT)
ignore_value(virNetDevMacVLanReserveName(net->ifname, false));
if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
if (!conn && !(conn = virGetConnectNetwork()))
continue;
virDomainNetNotifyActualDevice(conn, def, net);
}
}
virObjectUnref(conn);
}
static int
virLXCProcessReconnectDomain(virDomainObjPtr vm,
void *opaque)
{
virLXCDriverPtr driver = opaque;
virLXCDomainObjPrivatePtr priv;
virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
int ret = -1;
virObjectLock(vm);
VIR_DEBUG("Reconnect id=%d pid=%d state=%d", vm->def->id, vm->pid, vm->state.state);
priv = vm->privateData;
if (vm->pid != 0) {
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
VIR_DOMAIN_RUNNING_UNKNOWN);
if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
driver->inhibitCallback(true, driver->inhibitOpaque);
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm)))
goto error;
priv->machineName = virLXCDomainGetMachineName(vm->def, vm->pid);
if (!priv->machineName)
goto cleanup;
if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, -1,
priv->machineName, &priv->cgroup) < 0)
goto error;
if (!priv->cgroup) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("No valid cgroup for machine %s"),
vm->def->name);
goto error;
}
if (virLXCUpdateActiveUSBHostdevs(driver, vm->def) < 0)
goto error;
if (virSecurityManagerReserveLabel(driver->securityManager,
vm->def, vm->pid) < 0)
goto error;
virLXCProcessReconnectNotifyNets(vm->def);
if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
VIR_WARN("Cannot update XML for running LXC guest %s", vm->def->name);
/* now that we know it's reconnected call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, driver->xmlopt, 0);
int hookret;
/* we can't stop the operation even if the script raised an error */
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
if (hookret < 0)
goto error;
}
} else {
vm->def->id = -1;
}
ret = 0;
cleanup:
virObjectUnref(cfg);
virObjectUnlock(vm);
return ret;
error:
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
virDomainAuditStop(vm, "failed");
goto cleanup;
}
int virLXCProcessReconnectAll(virLXCDriverPtr driver,
virDomainObjListPtr doms)
{
virDomainObjListForEach(doms, false, virLXCProcessReconnectDomain, driver);
return 0;
}