libvirt/src/lxc/lxc_process.c
Daniel P. Berrange ab1ef3bc6c Include pid namespace inode in LXC audit messages
To allow the efficient correlation of container audit messages
with host hosts, include the pid namespace inode in audit
messages.
2013-03-07 19:43:53 +00:00

1457 lines
45 KiB
C

/*
* Copyright (C) 2010-2012 Red Hat, Inc.
* Copyright IBM Corp. 2008
*
* lxc_process.c: LXC process lifecycle management
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include "lxc_process.h"
#include "lxc_domain.h"
#include "lxc_container.h"
#include "lxc_fuse.h"
#include "datatypes.h"
#include "virfile.h"
#include "virpidfile.h"
#include "virnetdev.h"
#include "virnetdevveth.h"
#include "virnetdevbridge.h"
#include "virtime.h"
#include "domain_nwfilter.h"
#include "network/bridge_driver.h"
#include "viralloc.h"
#include "domain_audit.h"
#include "virerror.h"
#include "virlog.h"
#include "vircommand.h"
#include "lxc_hostdev.h"
#include "virhook.h"
#define VIR_FROM_THIS VIR_FROM_LXC
#define START_POSTFIX ": starting up\n"
int virLXCProcessAutoDestroyInit(virLXCDriverPtr driver)
{
if (!(driver->autodestroy = virHashCreate(5, NULL)))
return -1;
return 0;
}
struct virLXCProcessAutoDestroyData {
virLXCDriverPtr driver;
virConnectPtr conn;
};
static void virLXCProcessAutoDestroyDom(void *payload,
const void *name,
void *opaque)
{
struct virLXCProcessAutoDestroyData *data = opaque;
virConnectPtr conn = payload;
const char *uuidstr = name;
unsigned char uuid[VIR_UUID_BUFLEN];
virDomainObjPtr dom;
virDomainEventPtr event = NULL;
virLXCDomainObjPrivatePtr priv;
VIR_DEBUG("conn=%p uuidstr=%s thisconn=%p", conn, uuidstr, data->conn);
if (data->conn != conn)
return;
if (virUUIDParse(uuidstr, uuid) < 0) {
VIR_WARN("Failed to parse %s", uuidstr);
return;
}
if (!(dom = virDomainObjListFindByUUID(data->driver->domains,
uuid))) {
VIR_DEBUG("No domain object to kill");
return;
}
priv = dom->privateData;
VIR_DEBUG("Killing domain");
virLXCProcessStop(data->driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED);
virDomainAuditStop(dom, "destroyed");
event = virDomainEventNewFromObj(dom,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
priv->doneStopEvent = true;
if (dom && !dom->persistent)
virDomainObjListRemove(data->driver->domains, dom);
if (dom)
virObjectUnlock(dom);
if (event)
virDomainEventStateQueue(data->driver->domainEventState, event);
virHashRemoveEntry(data->driver->autodestroy, uuidstr);
}
/*
* Precondition: driver is locked
*/
void virLXCProcessAutoDestroyRun(virLXCDriverPtr driver, virConnectPtr conn)
{
struct virLXCProcessAutoDestroyData data = {
driver, conn
};
VIR_DEBUG("conn=%p", conn);
virHashForEach(driver->autodestroy, virLXCProcessAutoDestroyDom, &data);
}
void virLXCProcessAutoDestroyShutdown(virLXCDriverPtr driver)
{
virHashFree(driver->autodestroy);
}
int virLXCProcessAutoDestroyAdd(virLXCDriverPtr driver,
virDomainObjPtr vm,
virConnectPtr conn)
{
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(vm->def->uuid, uuidstr);
VIR_DEBUG("vm=%s uuid=%s conn=%p", vm->def->name, uuidstr, conn);
if (virHashAddEntry(driver->autodestroy, uuidstr, conn) < 0)
return -1;
return 0;
}
int virLXCProcessAutoDestroyRemove(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(vm->def->uuid, uuidstr);
VIR_DEBUG("vm=%s uuid=%s", vm->def->name, uuidstr);
if (virHashRemoveEntry(driver->autodestroy, uuidstr) < 0)
return -1;
return 0;
}
static virConnectPtr
virLXCProcessAutoDestroyGetConn(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(vm->def->uuid, uuidstr);
VIR_DEBUG("vm=%s uuid=%s", vm->def->name, uuidstr);
return virHashLookup(driver->autodestroy, uuidstr);
}
static int
virLXCProcessReboot(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
virConnectPtr conn = virLXCProcessAutoDestroyGetConn(driver, vm);
int reason = vm->state.reason;
bool autodestroy = false;
int ret = -1;
virDomainDefPtr savedDef;
VIR_DEBUG("Faking reboot");
if (conn) {
virConnectRef(conn);
autodestroy = true;
} else {
conn = virConnectOpen("lxc:///");
/* Ignoring NULL conn which is mostly harmless here */
}
/* In a reboot scenario, we need to make sure we continue
* to use the current 'def', and not switch to 'newDef'.
* So temporarily hide the newDef and then reinstate it
*/
savedDef = vm->newDef;
vm->newDef = NULL;
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
vm->newDef = savedDef;
if (virLXCProcessStart(conn, driver, vm, autodestroy, reason) < 0) {
VIR_WARN("Unable to handle reboot of vm %s",
vm->def->name);
goto cleanup;
}
if (conn)
virConnectClose(conn);
ret = 0;
cleanup:
return ret;
}
/**
* virLXCProcessCleanup:
* @driver: pointer to driver structure
* @vm: pointer to VM to clean up
* @reason: reason for switching the VM to shutoff state
*
* Cleanout resources associated with the now dead VM
*
*/
static void virLXCProcessCleanup(virLXCDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason)
{
virCgroupPtr cgroup;
int i;
virLXCDomainObjPrivatePtr priv = vm->privateData;
virNetDevVPortProfilePtr vport = NULL;
VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d",
vm->def->name, (int)vm->pid, (int)reason);
/* now that we know it's stopped call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
/* we can't stop the operation even if the script raised an error */
virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_STOPPED, VIR_HOOK_SUBOP_END,
NULL, xml, NULL);
VIR_FREE(xml);
}
/* Stop autodestroy in case guest is restarted */
virLXCProcessAutoDestroyRemove(driver, vm);
if (priv->monitor) {
virLXCMonitorClose(priv->monitor);
virObjectUnref(priv->monitor);
priv->monitor = NULL;
}
virPidFileDelete(driver->stateDir, vm->def->name);
virDomainDeleteConfig(driver->stateDir, NULL, vm);
virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
vm->pid = -1;
vm->def->id = -1;
driver->nactive--;
if (!driver->nactive && driver->inhibitCallback)
driver->inhibitCallback(false, driver->inhibitOpaque);
virLXCDomainReAttachHostDevices(driver, vm->def);
for (i = 0 ; i < vm->def->nnets ; i++) {
virDomainNetDefPtr iface = vm->def->nets[i];
vport = virDomainNetGetActualVirtPortProfile(iface);
if (iface->ifname) {
ignore_value(virNetDevSetOnline(iface->ifname, false));
if (vport &&
vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)
ignore_value(virNetDevOpenvswitchRemovePort(
virDomainNetGetActualBridgeName(iface),
iface->ifname));
ignore_value(virNetDevVethDelete(iface->ifname));
}
networkReleaseActualDevice(iface);
}
virDomainConfVMNWFilterTeardown(vm);
if (driver->cgroup &&
virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) == 0) {
virCgroupRemove(cgroup);
virCgroupFree(&cgroup);
}
/* now that we know it's stopped call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
/* we can't stop the operation even if the script raised an error */
virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_RELEASE, VIR_HOOK_SUBOP_END,
NULL, xml, NULL);
VIR_FREE(xml);
}
if (vm->newDef) {
virDomainDefFree(vm->def);
vm->def = vm->newDef;
vm->def->id = -1;
vm->newDef = NULL;
}
}
char *virLXCProcessSetupInterfaceBridged(virConnectPtr conn,
virDomainDefPtr vm,
virDomainNetDefPtr net,
const char *brname)
{
char *ret = NULL;
char *parentVeth;
char *containerVeth = NULL;
const virNetDevVPortProfilePtr vport = virDomainNetGetActualVirtPortProfile(net);
VIR_DEBUG("calling vethCreate()");
parentVeth = net->ifname;
if (virNetDevVethCreate(&parentVeth, &containerVeth) < 0)
goto cleanup;
VIR_DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
if (net->ifname == NULL)
net->ifname = parentVeth;
if (virNetDevSetMAC(containerVeth, &net->mac) < 0)
goto cleanup;
if (vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
if (virNetDevOpenvswitchAddPort(brname, parentVeth, &net->mac,
vm->uuid, vport, virDomainNetGetActualVlan(net)) < 0)
goto cleanup;
} else {
if (virNetDevBridgeAddPort(brname, parentVeth) < 0)
goto cleanup;
}
if (virNetDevSetOnline(parentVeth, true) < 0)
goto cleanup;
if (virNetDevBandwidthSet(net->ifname,
virDomainNetGetActualBandwidth(net),
false) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"),
net->ifname);
goto cleanup;
}
if (net->filter &&
virDomainConfNWFilterInstantiate(conn, vm->uuid, net) < 0)
goto cleanup;
ret = containerVeth;
cleanup:
return ret;
}
char *virLXCProcessSetupInterfaceDirect(virConnectPtr conn,
virDomainDefPtr def,
virDomainNetDefPtr net)
{
char *ret = NULL;
char *res_ifname = NULL;
virLXCDriverPtr driver = conn->privateData;
virNetDevBandwidthPtr bw;
virNetDevVPortProfilePtr prof;
/* XXX how todo bandwidth controls ?
* Since the 'net-ifname' is about to be moved to a different
* namespace & renamed, there will be no host side visible
* interface for the container to attach rules to
*/
bw = virDomainNetGetActualBandwidth(net);
if (bw) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Unable to set network bandwidth on direct interfaces"));
return NULL;
}
/* XXX how todo port profiles ?
* Although we can do the association during container
* startup, at shutdown we are unable to disassociate
* because the macvlan device was moved to the container
* and automagically dies when the container dies. So
* we have no dev to perform disassociation with.
*/
prof = virDomainNetGetActualVirtPortProfile(net);
if (prof) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Unable to set port profile on direct interfaces"));
return NULL;
}
if (virNetDevMacVLanCreateWithVPortProfile(
net->ifname, &net->mac,
virDomainNetGetActualDirectDev(net),
virDomainNetGetActualDirectMode(net),
false, false, def->uuid,
virDomainNetGetActualVirtPortProfile(net),
&res_ifname,
VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
driver->stateDir,
virDomainNetGetActualBandwidth(net)) < 0)
goto cleanup;
ret = res_ifname;
cleanup:
return ret;
}
/**
* virLXCProcessSetupInterfaces:
* @conn: pointer to connection
* @def: pointer to virtual machine structure
* @nveths: number of interfaces
* @veths: interface names
*
* Sets up the container interfaces by creating the veth device pairs and
* attaching the parent end to the appropriate bridge. The container end
* will moved into the container namespace later after clone has been called.
*
* Returns 0 on success or -1 in case of error
*/
static int virLXCProcessSetupInterfaces(virConnectPtr conn,
virDomainDefPtr def,
size_t *nveths,
char ***veths)
{
int ret = -1;
size_t i;
for (i = 0 ; i < def->nnets ; i++) {
char *veth = NULL;
/* If appropriate, grab a physical device from the configured
* network's pool of devices, or resolve bridge device name
* to the one defined in the network definition.
*/
if (networkAllocateActualDevice(def->nets[i]) < 0)
goto cleanup;
if (VIR_EXPAND_N(*veths, *nveths, 1) < 0) {
virReportOOMError();
goto cleanup;
}
switch (virDomainNetGetActualType(def->nets[i])) {
case VIR_DOMAIN_NET_TYPE_NETWORK: {
virNetworkPtr network;
char *brname = NULL;
bool fail = false;
int active;
virErrorPtr errobj;
if (!(network = virNetworkLookupByName(conn,
def->nets[i]->data.network.name)))
goto cleanup;
active = virNetworkIsActive(network);
if (active != 1) {
fail = true;
if (active == 0)
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Network '%s' is not active."),
def->nets[i]->data.network.name);
}
if (!fail) {
brname = virNetworkGetBridgeName(network);
if (brname == NULL)
fail = true;
}
/* Make sure any above failure is preserved */
errobj = virSaveLastError();
virNetworkFree(network);
virSetError(errobj);
virFreeError(errobj);
if (fail)
goto cleanup;
if (!(veth = virLXCProcessSetupInterfaceBridged(conn,
def,
def->nets[i],
brname))) {
VIR_FREE(brname);
goto cleanup;
}
VIR_FREE(brname);
break;
}
case VIR_DOMAIN_NET_TYPE_BRIDGE: {
const char *brname = virDomainNetGetActualBridgeName(def->nets[i]);
if (!brname) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("No bridge name specified"));
goto cleanup;
}
if (!(veth = virLXCProcessSetupInterfaceBridged(conn,
def,
def->nets[i],
brname)))
goto cleanup;
} break;
case VIR_DOMAIN_NET_TYPE_DIRECT:
if (!(veth = virLXCProcessSetupInterfaceDirect(conn,
def,
def->nets[i])))
goto cleanup;
break;
case VIR_DOMAIN_NET_TYPE_USER:
case VIR_DOMAIN_NET_TYPE_ETHERNET:
case VIR_DOMAIN_NET_TYPE_SERVER:
case VIR_DOMAIN_NET_TYPE_CLIENT:
case VIR_DOMAIN_NET_TYPE_MCAST:
case VIR_DOMAIN_NET_TYPE_INTERNAL:
case VIR_DOMAIN_NET_TYPE_LAST:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unsupported network type %s"),
virDomainNetTypeToString(
virDomainNetGetActualType(def->nets[i])
));
goto cleanup;
}
(*veths)[(*nveths)-1] = veth;
}
ret = 0;
cleanup:
if (ret < 0) {
for (i = 0 ; i < def->nnets ; i++) {
virDomainNetDefPtr iface = def->nets[i];
virNetDevVPortProfilePtr vport = virDomainNetGetActualVirtPortProfile(iface);
if (vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)
ignore_value(virNetDevOpenvswitchRemovePort(
virDomainNetGetActualBridgeName(iface),
iface->ifname));
networkReleaseActualDevice(iface);
}
}
return ret;
}
extern virLXCDriverPtr lxc_driver;
static void virLXCProcessMonitorEOFNotify(virLXCMonitorPtr mon,
virDomainObjPtr vm)
{
virLXCDriverPtr driver = lxc_driver;
virDomainEventPtr event = NULL;
virLXCDomainObjPrivatePtr priv;
VIR_DEBUG("mon=%p vm=%p", mon, vm);
lxcDriverLock(driver);
virObjectLock(vm);
lxcDriverUnlock(driver);
priv = vm->privateData;
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
if (!priv->wantReboot) {
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
if (!priv->doneStopEvent) {
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
priv->stopReason);
virDomainAuditStop(vm, "shutdown");
} else {
VIR_DEBUG("Stop event has already been sent");
}
if (!vm->persistent) {
virDomainObjListRemove(driver->domains, vm);
vm = NULL;
}
} else {
int ret = virLXCProcessReboot(driver, vm);
virDomainAuditStop(vm, "reboot");
virDomainAuditStart(vm, "reboot", ret == 0);
if (ret == 0) {
event = virDomainEventRebootNewFromObj(vm);
} else {
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
priv->stopReason);
if (!vm->persistent) {
virDomainObjListRemove(driver->domains, vm);
vm = NULL;
}
}
}
if (vm)
virObjectUnlock(vm);
if (event) {
lxcDriverLock(driver);
virDomainEventStateQueue(driver->domainEventState, event);
lxcDriverUnlock(driver);
}
}
static void virLXCProcessMonitorExitNotify(virLXCMonitorPtr mon ATTRIBUTE_UNUSED,
virLXCProtocolExitStatus status,
virDomainObjPtr vm)
{
virLXCDomainObjPrivatePtr priv = vm->privateData;
switch (status) {
case VIR_LXC_PROTOCOL_EXIT_STATUS_SHUTDOWN:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
break;
case VIR_LXC_PROTOCOL_EXIT_STATUS_ERROR:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
break;
case VIR_LXC_PROTOCOL_EXIT_STATUS_REBOOT:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
priv->wantReboot = true;
break;
default:
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
break;
}
VIR_DEBUG("Domain shutoff reason %d (from status %d)",
priv->stopReason, status);
}
static int
virLXCProcessGetNsInode(pid_t pid,
const char *nsname,
ino_t *inode)
{
char *path = NULL;
struct stat sb;
int ret = -1;
if (virAsprintf(&path, "/proc/%llu/ns/%s",
(unsigned long long)pid, nsname) < 0) {
virReportOOMError();
goto cleanup;
}
if (stat(path, &sb) < 0) {
virReportSystemError(errno,
_("Unable to stat %s"), path);
goto cleanup;
}
*inode = sb.st_ino;
ret = 0;
cleanup:
VIR_FREE(path);
return ret;
}
/* XXX a little evil */
extern virLXCDriverPtr lxc_driver;
static void virLXCProcessMonitorInitNotify(virLXCMonitorPtr mon ATTRIBUTE_UNUSED,
pid_t initpid,
virDomainObjPtr vm)
{
virLXCDomainObjPrivatePtr priv = vm->privateData;
ino_t inode;
priv->initpid = initpid;
if (virLXCProcessGetNsInode(initpid, "pid", &inode) < 0) {
virErrorPtr err = virGetLastError();
VIR_WARN("Cannot obtain pid NS inode for %llu: %s",
(unsigned long long)initpid,
err && err->message ? err->message : "<unknown>");
virResetLastError();
inode = 0;
}
virDomainAuditInit(vm, initpid, inode);
if (virDomainSaveStatus(lxc_driver->caps, lxc_driver->stateDir, vm) < 0)
VIR_WARN("Cannot update XML with PID for LXC %s", vm->def->name);
}
static virLXCMonitorCallbacks monitorCallbacks = {
.eofNotify = virLXCProcessMonitorEOFNotify,
.exitNotify = virLXCProcessMonitorExitNotify,
.initNotify = virLXCProcessMonitorInitNotify,
};
static virLXCMonitorPtr virLXCProcessConnectMonitor(virLXCDriverPtr driver,
virDomainObjPtr vm)
{
virLXCMonitorPtr monitor = NULL;
if (virSecurityManagerSetSocketLabel(driver->securityManager, vm->def) < 0)
goto cleanup;
/* Hold an extra reference because we can't allow 'vm' to be
* deleted while the monitor is active */
virObjectRef(vm);
monitor = virLXCMonitorNew(vm, driver->stateDir, &monitorCallbacks);
if (monitor == NULL)
virObjectUnref(vm);
if (virSecurityManagerClearSocketLabel(driver->securityManager, vm->def) < 0) {
if (monitor) {
virObjectUnref(monitor);
monitor = NULL;
}
goto cleanup;
}
cleanup:
return monitor;
}
int virLXCProcessStop(virLXCDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason)
{
virCgroupPtr group = NULL;
int rc;
VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d",
vm->def->name, (int)vm->pid, (int)reason);
if (!virDomainObjIsActive(vm)) {
VIR_DEBUG("VM '%s' not active", vm->def->name);
return 0;
}
if (vm->pid <= 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Invalid PID %d for container"), vm->pid);
return -1;
}
virSecurityManagerRestoreAllLabel(driver->securityManager,
vm->def, false);
virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
/* Clear out dynamically assigned labels */
if (vm->def->nseclabels &&
vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
VIR_FREE(vm->def->seclabels[0]->model);
VIR_FREE(vm->def->seclabels[0]->label);
VIR_FREE(vm->def->seclabels[0]->imagelabel);
}
if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) == 0) {
rc = virCgroupKillPainfully(group);
if (rc < 0) {
virReportSystemError(-rc, "%s",
_("Failed to kill container PIDs"));
rc = -1;
goto cleanup;
}
if (rc == 1) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Some container PIDs refused to die"));
rc = -1;
goto cleanup;
}
} else {
/* If cgroup doesn't exist, the VM pids must have already
* died and so we're just cleaning up stale state
*/
}
virLXCProcessCleanup(driver, vm, reason);
rc = 0;
cleanup:
virCgroupFree(&group);
return rc;
}
static virCommandPtr
virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
virDomainObjPtr vm,
int nveths,
char **veths,
int *ttyFDs,
size_t nttyFDs,
int handshakefd)
{
size_t i;
char *filterstr;
char *outputstr;
virCommandPtr cmd;
cmd = virCommandNew(vm->def->emulator);
/* The controller may call ip command, so we have to retain PATH. */
virCommandAddEnvPass(cmd, "PATH");
virCommandAddEnvFormat(cmd, "LIBVIRT_DEBUG=%d",
virLogGetDefaultPriority());
if (virLogGetNbFilters() > 0) {
filterstr = virLogGetFilters();
if (!filterstr) {
virReportOOMError();
goto cleanup;
}
virCommandAddEnvPair(cmd, "LIBVIRT_LOG_FILTERS", filterstr);
VIR_FREE(filterstr);
}
if (driver->log_libvirtd) {
if (virLogGetNbOutputs() > 0) {
outputstr = virLogGetOutputs();
if (!outputstr) {
virReportOOMError();
goto cleanup;
}
virCommandAddEnvPair(cmd, "LIBVIRT_LOG_OUTPUTS", outputstr);
VIR_FREE(outputstr);
}
} else {
virCommandAddEnvFormat(cmd,
"LIBVIRT_LOG_OUTPUTS=%d:stderr",
virLogGetDefaultPriority());
}
virCommandAddArgList(cmd, "--name", vm->def->name, NULL);
for (i = 0 ; i < nttyFDs ; i++) {
virCommandAddArg(cmd, "--console");
virCommandAddArgFormat(cmd, "%d", ttyFDs[i]);
virCommandPreserveFD(cmd, ttyFDs[i]);
}
virCommandAddArgPair(cmd, "--security",
virSecurityManagerGetModel(driver->securityManager));
virCommandAddArg(cmd, "--handshake");
virCommandAddArgFormat(cmd, "%d", handshakefd);
virCommandAddArg(cmd, "--background");
for (i = 0 ; i < nveths ; i++) {
virCommandAddArgList(cmd, "--veth", veths[i], NULL);
}
virCommandPreserveFD(cmd, handshakefd);
return cmd;
cleanup:
virCommandFree(cmd);
return NULL;
}
static int
virLXCProcessReadLogOutputData(virDomainObjPtr vm,
int fd,
char *buf,
size_t buflen)
{
int retries = 10;
int got = 0;
int ret = -1;
char *filter_next = buf;
buf[0] = '\0';
while (retries) {
ssize_t bytes;
int isdead = 0;
char *eol;
if (vm->pid <= 0 ||
(kill(vm->pid, 0) == -1 && errno == ESRCH))
isdead = 1;
/* Any failures should be detected before we read the log, so we
* always have something useful to report on failure. */
bytes = saferead(fd, buf+got, buflen-got-1);
if (bytes < 0) {
virReportSystemError(errno, "%s",
_("Failure while reading log output"));
goto cleanup;
}
got += bytes;
buf[got] = '\0';
/* Filter out debug messages from intermediate libvirt process */
while ((eol = strchr(filter_next, '\n'))) {
*eol = '\0';
if (virLogProbablyLogMessage(filter_next)) {
memmove(filter_next, eol + 1, got - (eol - buf));
got -= eol + 1 - filter_next;
} else {
filter_next = eol + 1;
*eol = '\n';
}
}
if (got == buflen-1) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Out of space while reading log output: %s"),
buf);
goto cleanup;
}
if (isdead) {
ret = got;
goto cleanup;
}
usleep(100*1000);
retries--;
}
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Timed out while reading log output: %s"),
buf);
cleanup:
return ret;
}
static int
virLXCProcessReadLogOutput(virDomainObjPtr vm,
char *logfile,
off_t pos,
char *buf,
size_t buflen)
{
int fd = -1;
int ret;
if ((fd = open(logfile, O_RDONLY)) < 0) {
virReportSystemError(errno,
_("Unable to open log file %s"),
logfile);
return -1;
}
if (lseek(fd, pos, SEEK_SET) < 0) {
virReportSystemError(errno,
_("Unable to seek log file %s to %llu"),
logfile, (unsigned long long)pos);
VIR_FORCE_CLOSE(fd);
return -1;
}
ret = virLXCProcessReadLogOutputData(vm,
fd,
buf,
buflen);
VIR_FORCE_CLOSE(fd);
return ret;
}
/**
* virLXCProcessStart:
* @conn: pointer to connection
* @driver: pointer to driver structure
* @vm: pointer to virtual machine structure
* @autoDestroy: mark the domain for auto destruction
* @reason: reason for switching vm to running state
*
* Starts a vm
*
* Returns 0 on success or -1 in case of error
*/
int virLXCProcessStart(virConnectPtr conn,
virLXCDriverPtr driver,
virDomainObjPtr vm,
bool autoDestroy,
virDomainRunningReason reason)
{
int rc = -1, r;
size_t nttyFDs = 0;
int *ttyFDs = NULL;
size_t i;
char *logfile = NULL;
int logfd = -1;
size_t nveths = 0;
char **veths = NULL;
int handshakefds[2] = { -1, -1 };
off_t pos = -1;
char ebuf[1024];
char *timestamp;
virCommandPtr cmd = NULL;
virLXCDomainObjPrivatePtr priv = vm->privateData;
virErrorPtr err = NULL;
if (!lxc_driver->cgroup) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("The 'cpuacct', 'devices' & 'memory' cgroups controllers must be mounted"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_CPUACCT)) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'cpuacct' cgroups controller mount"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_DEVICES)) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'devices' cgroups controller mount"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_MEMORY)) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'memory' cgroups controller mount"));
return -1;
}
if (virFileMakePath(driver->logDir) < 0) {
virReportSystemError(errno,
_("Cannot create log directory '%s'"),
driver->logDir);
return -1;
}
if (virAsprintf(&logfile, "%s/%s.log",
driver->logDir, vm->def->name) < 0) {
virReportOOMError();
return -1;
}
/* Do this up front, so any part of the startup process can add
* runtime state to vm->def that won't be persisted. This let's us
* report implicit runtime defaults in the XML, like vnc listen/socket
*/
VIR_DEBUG("Setting current domain def as transient");
if (virDomainObjSetDefTransient(driver->caps, vm, true) < 0)
goto cleanup;
/* Run an early hook to set-up missing devices */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_PREPARE, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto cleanup;
}
/* Must be run before security labelling */
VIR_DEBUG("Preparing host devices");
if (virLXCPrepareHostDevices(driver, vm->def) < 0)
goto cleanup;
/* Here we open all the PTYs we need on the host OS side.
* The LXC controller will open the guest OS side PTYs
* and forward I/O between them.
*/
nttyFDs = vm->def->nconsoles;
if (VIR_ALLOC_N(ttyFDs, nttyFDs) < 0) {
virReportOOMError();
goto cleanup;
}
/* If you are using a SecurityDriver with dynamic labelling,
then generate a security label for isolation */
VIR_DEBUG("Generating domain security label (if required)");
if (vm->def->nseclabels &&
vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DEFAULT)
vm->def->seclabels[0]->type = VIR_DOMAIN_SECLABEL_NONE;
if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) {
virDomainAuditSecurityLabel(vm, false);
goto cleanup;
}
virDomainAuditSecurityLabel(vm, true);
VIR_DEBUG("Setting domain security labels");
if (virSecurityManagerSetAllLabel(driver->securityManager,
vm->def, NULL) < 0)
goto cleanup;
for (i = 0 ; i < vm->def->nconsoles ; i++)
ttyFDs[i] = -1;
for (i = 0 ; i < vm->def->nconsoles ; i++) {
char *ttyPath;
if (vm->def->consoles[i]->source.type != VIR_DOMAIN_CHR_TYPE_PTY) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Only PTY console types are supported"));
goto cleanup;
}
if (virFileOpenTty(&ttyFDs[i], &ttyPath, 1) < 0) {
virReportSystemError(errno, "%s",
_("Failed to allocate tty"));
goto cleanup;
}
VIR_FREE(vm->def->consoles[i]->source.data.file.path);
vm->def->consoles[i]->source.data.file.path = ttyPath;
VIR_FREE(vm->def->consoles[i]->info.alias);
if (virAsprintf(&vm->def->consoles[i]->info.alias, "console%zu", i) < 0) {
virReportOOMError();
goto cleanup;
}
}
if (virLXCProcessSetupInterfaces(conn, vm->def, &nveths, &veths) < 0)
goto cleanup;
/* Save the configuration for the controller */
if (virDomainSaveConfig(driver->stateDir, vm->def) < 0)
goto cleanup;
if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT,
S_IRUSR|S_IWUSR)) < 0) {
virReportSystemError(errno,
_("Failed to open '%s'"),
logfile);
goto cleanup;
}
if (pipe(handshakefds) < 0) {
virReportSystemError(errno, "%s",
_("Unable to create pipe"));
goto cleanup;
}
if (!(cmd = virLXCProcessBuildControllerCmd(driver,
vm,
nveths, veths,
ttyFDs, nttyFDs,
handshakefds[1])))
goto cleanup;
virCommandSetOutputFD(cmd, &logfd);
virCommandSetErrorFD(cmd, &logfd);
/* now that we know it is about to start call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_START, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto cleanup;
}
/* Log timestamp */
if ((timestamp = virTimeStringNow()) == NULL) {
virReportOOMError();
goto cleanup;
}
if (safewrite(logfd, timestamp, strlen(timestamp)) < 0 ||
safewrite(logfd, START_POSTFIX, strlen(START_POSTFIX)) < 0) {
VIR_WARN("Unable to write timestamp to logfile: %s",
virStrerror(errno, ebuf, sizeof(ebuf)));
}
VIR_FREE(timestamp);
/* Log generated command line */
virCommandWriteArgLog(cmd, logfd);
if ((pos = lseek(logfd, 0, SEEK_END)) < 0)
VIR_WARN("Unable to seek to end of logfile: %s",
virStrerror(errno, ebuf, sizeof(ebuf)));
if (virCommandRun(cmd, NULL) < 0)
goto cleanup;
if (VIR_CLOSE(handshakefds[1]) < 0) {
virReportSystemError(errno, "%s", _("could not close handshake fd"));
goto cleanup;
}
/* Connect to the controller as a client *first* because
* this will block until the child has written their
* pid file out to disk */
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm)))
goto cleanup;
/* And get its pid */
if ((r = virPidFileRead(driver->stateDir, vm->def->name, &vm->pid)) < 0) {
char out[1024];
if (virLXCProcessReadLogOutput(vm, logfile, pos, out, 1024) > 0)
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), out);
else
virReportSystemError(-r,
_("Failed to read pid file %s/%s.pid"),
driver->stateDir, vm->def->name);
goto cleanup;
}
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
priv->wantReboot = false;
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
priv->doneStopEvent = false;
if (!driver->nactive && driver->inhibitCallback)
driver->inhibitCallback(true, driver->inhibitOpaque);
driver->nactive++;
if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
char out[1024];
if (!(virLXCProcessReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("guest failed to start: %s"), out);
}
goto error;
}
if (autoDestroy &&
virLXCProcessAutoDestroyAdd(driver, vm, conn) < 0)
goto error;
if (virDomainObjSetDefTransient(driver->caps, vm, false) < 0)
goto error;
/* Write domain status to disk.
*
* XXX: Earlier we wrote the plain "live" domain XML to this
* location for the benefit of libvirt_lxc. We're now overwriting
* it with the live status XML instead. This is a (currently
* harmless) inconsistency we should fix one day */
if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
goto error;
/* finally we can call the 'started' hook script if any */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
int hookret;
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_STARTED, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
/*
* If the script raised an error abort the launch
*/
if (hookret < 0)
goto error;
}
rc = 0;
cleanup:
if (rc != 0 && !err)
err = virSaveLastError();
virCommandFree(cmd);
if (VIR_CLOSE(logfd) < 0) {
virReportSystemError(errno, "%s", _("could not close logfile"));
rc = -1;
}
for (i = 0 ; i < nveths ; i++) {
if (rc != 0)
ignore_value(virNetDevVethDelete(veths[i]));
VIR_FREE(veths[i]);
}
if (rc != 0) {
if (vm->newDef) {
virDomainDefFree(vm->newDef);
vm->newDef = NULL;
}
if (priv->monitor) {
virObjectUnref(priv->monitor);
priv->monitor = NULL;
}
virDomainConfVMNWFilterTeardown(vm);
virSecurityManagerRestoreAllLabel(driver->securityManager,
vm->def, false);
virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
/* Clear out dynamically assigned labels */
if (vm->def->nseclabels &&
vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
VIR_FREE(vm->def->seclabels[0]->model);
VIR_FREE(vm->def->seclabels[0]->label);
VIR_FREE(vm->def->seclabels[0]->imagelabel);
}
}
for (i = 0 ; i < nttyFDs ; i++)
VIR_FORCE_CLOSE(ttyFDs[i]);
VIR_FREE(ttyFDs);
VIR_FORCE_CLOSE(handshakefds[0]);
VIR_FORCE_CLOSE(handshakefds[1]);
VIR_FREE(logfile);
if (err) {
virSetError(err);
virFreeError(err);
}
return rc;
error:
err = virSaveLastError();
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
goto cleanup;
}
struct virLXCProcessAutostartData {
virLXCDriverPtr driver;
virConnectPtr conn;
};
static int
virLXCProcessAutostartDomain(virDomainObjPtr vm,
void *opaque)
{
const struct virLXCProcessAutostartData *data = opaque;
int ret = 0;
virObjectLock(vm);
if (vm->autostart &&
!virDomainObjIsActive(vm)) {
ret = virLXCProcessStart(data->conn, data->driver, vm, false,
VIR_DOMAIN_RUNNING_BOOTED);
virDomainAuditStart(vm, "booted", ret >= 0);
if (ret < 0) {
virErrorPtr err = virGetLastError();
VIR_ERROR(_("Failed to autostart VM '%s': %s"),
vm->def->name,
err ? err->message : "");
} else {
virDomainEventPtr event =
virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STARTED,
VIR_DOMAIN_EVENT_STARTED_BOOTED);
if (event)
virDomainEventStateQueue(data->driver->domainEventState, event);
}
}
virObjectUnlock(vm);
return ret;
}
void
virLXCProcessAutostartAll(virLXCDriverPtr driver)
{
/* XXX: Figure out a better way todo this. The domain
* startup code needs a connection handle in order
* to lookup the bridge associated with a virtual
* network
*/
virConnectPtr conn = virConnectOpen("lxc:///");
/* Ignoring NULL conn which is mostly harmless here */
struct virLXCProcessAutostartData data = { driver, conn };
lxcDriverLock(driver);
virDomainObjListForEach(driver->domains,
virLXCProcessAutostartDomain,
&data);
lxcDriverUnlock(driver);
if (conn)
virConnectClose(conn);
}
static int
virLXCProcessReconnectDomain(virDomainObjPtr vm,
void *opaque)
{
virLXCDriverPtr driver = opaque;
virLXCDomainObjPrivatePtr priv;
int ret = -1;
virObjectLock(vm);
VIR_DEBUG("Reconnect id=%d pid=%d state=%d", vm->def->id, vm->pid, vm->state.state);
priv = vm->privateData;
if (vm->pid != 0) {
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
VIR_DOMAIN_RUNNING_UNKNOWN);
if (!driver->nactive && driver->inhibitCallback)
driver->inhibitCallback(true, driver->inhibitOpaque);
driver->nactive++;
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm)))
goto error;
if (virLXCUpdateActiveUsbHostdevs(driver, vm->def) < 0)
goto error;
if (virSecurityManagerReserveLabel(driver->securityManager,
vm->def, vm->pid) < 0)
goto error;
/* now that we know it's reconnected call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
int hookret;
/* we can't stop the operation even if the script raised an error */
hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
VIR_HOOK_LXC_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
NULL, xml, NULL);
VIR_FREE(xml);
if (hookret < 0)
goto error;
}
} else {
vm->def->id = -1;
}
ret = 0;
cleanup:
virObjectUnlock(vm);
return ret;
error:
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
virDomainAuditStop(vm, "failed");
goto cleanup;
}
int virLXCProcessReconnectAll(virLXCDriverPtr driver,
virDomainObjListPtr doms)
{
virDomainObjListForEach(doms, virLXCProcessReconnectDomain, driver);
return 0;
}