libvirt/src/qemu/qemu_domain.c

2217 lines
65 KiB
C

/*
* qemu_domain.h: QEMU domain private state
*
* Copyright (C) 2006-2013 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*
* Author: Daniel P. Berrange <berrange@redhat.com>
*/
#include <config.h>
#include "qemu_domain.h"
#include "qemu_command.h"
#include "qemu_capabilities.h"
#include "qemu_migration.h"
#include "viralloc.h"
#include "virlog.h"
#include "virerror.h"
#include "c-ctype.h"
#include "cpu/cpu.h"
#include "viruuid.h"
#include "virfile.h"
#include "domain_event.h"
#include "virtime.h"
#include "virstoragefile.h"
#include "virstring.h"
#include <sys/time.h>
#include <fcntl.h>
#include <libxml/xpathInternals.h>
#define VIR_FROM_THIS VIR_FROM_QEMU
#define QEMU_NAMESPACE_HREF "http://libvirt.org/schemas/domain/qemu/1.0"
VIR_ENUM_IMPL(qemuDomainJob, QEMU_JOB_LAST,
"none",
"query",
"destroy",
"suspend",
"modify",
"abort",
"migration operation",
"none", /* async job is never stored in job.active */
"async nested",
);
VIR_ENUM_IMPL(qemuDomainAsyncJob, QEMU_ASYNC_JOB_LAST,
"none",
"migration out",
"migration in",
"save",
"dump",
"snapshot",
);
const char *
qemuDomainAsyncJobPhaseToString(enum qemuDomainAsyncJob job,
int phase ATTRIBUTE_UNUSED)
{
switch (job) {
case QEMU_ASYNC_JOB_MIGRATION_OUT:
case QEMU_ASYNC_JOB_MIGRATION_IN:
return qemuMigrationJobPhaseTypeToString(phase);
case QEMU_ASYNC_JOB_SAVE:
case QEMU_ASYNC_JOB_DUMP:
case QEMU_ASYNC_JOB_SNAPSHOT:
case QEMU_ASYNC_JOB_NONE:
case QEMU_ASYNC_JOB_LAST:
; /* fall through */
}
return "none";
}
int
qemuDomainAsyncJobPhaseFromString(enum qemuDomainAsyncJob job,
const char *phase)
{
if (!phase)
return 0;
switch (job) {
case QEMU_ASYNC_JOB_MIGRATION_OUT:
case QEMU_ASYNC_JOB_MIGRATION_IN:
return qemuMigrationJobPhaseTypeFromString(phase);
case QEMU_ASYNC_JOB_SAVE:
case QEMU_ASYNC_JOB_DUMP:
case QEMU_ASYNC_JOB_SNAPSHOT:
case QEMU_ASYNC_JOB_NONE:
case QEMU_ASYNC_JOB_LAST:
; /* fall through */
}
if (STREQ(phase, "none"))
return 0;
else
return -1;
}
void qemuDomainEventQueue(virQEMUDriverPtr driver,
virDomainEventPtr event)
{
virDomainEventStateQueue(driver->domainEventState, event);
}
static int
qemuDomainObjInitJob(qemuDomainObjPrivatePtr priv)
{
memset(&priv->job, 0, sizeof(priv->job));
if (virCondInit(&priv->job.cond) < 0)
return -1;
if (virCondInit(&priv->job.asyncCond) < 0) {
virCondDestroy(&priv->job.cond);
return -1;
}
return 0;
}
static void
qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
{
struct qemuDomainJobObj *job = &priv->job;
job->active = QEMU_JOB_NONE;
job->owner = 0;
}
static void
qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
{
struct qemuDomainJobObj *job = &priv->job;
job->asyncJob = QEMU_ASYNC_JOB_NONE;
job->asyncOwner = 0;
job->phase = 0;
job->mask = DEFAULT_JOB_MASK;
job->start = 0;
job->dump_memory_only = false;
job->asyncAbort = false;
memset(&job->status, 0, sizeof(job->status));
memset(&job->info, 0, sizeof(job->info));
}
void
qemuDomainObjRestoreJob(virDomainObjPtr obj,
struct qemuDomainJobObj *job)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
memset(job, 0, sizeof(*job));
job->active = priv->job.active;
job->owner = priv->job.owner;
job->asyncJob = priv->job.asyncJob;
job->asyncOwner = priv->job.asyncOwner;
job->phase = priv->job.phase;
qemuDomainObjResetJob(priv);
qemuDomainObjResetAsyncJob(priv);
}
void
qemuDomainObjTransferJob(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
VIR_DEBUG("Changing job owner from %llu to %llu",
priv->job.owner, virThreadSelfID());
priv->job.owner = virThreadSelfID();
}
static void
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
{
virCondDestroy(&priv->job.cond);
virCondDestroy(&priv->job.asyncCond);
}
static bool
qemuDomainTrackJob(enum qemuDomainJob job)
{
return (QEMU_DOMAIN_TRACK_JOBS & JOB_MASK(job)) != 0;
}
static void *
qemuDomainObjPrivateAlloc(void)
{
qemuDomainObjPrivatePtr priv;
if (VIR_ALLOC(priv) < 0)
return NULL;
if (qemuDomainObjInitJob(priv) < 0) {
virReportSystemError(errno, "%s",
_("Unable to init qemu driver mutexes"));
goto error;
}
if (virCondInit(&priv->unplugFinished) < 0)
goto error;
if (!(priv->devs = virChrdevAlloc()))
goto error;
priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
return priv;
error:
VIR_FREE(priv);
return NULL;
}
static void
qemuDomainObjPrivateFree(void *data)
{
qemuDomainObjPrivatePtr priv = data;
virObjectUnref(priv->qemuCaps);
virCgroupFree(&priv->cgroup);
qemuDomainPCIAddressSetFree(priv->pciaddrs);
qemuDomainCCWAddressSetFree(priv->ccwaddrs);
virDomainChrSourceDefFree(priv->monConfig);
qemuDomainObjFreeJob(priv);
VIR_FREE(priv->vcpupids);
VIR_FREE(priv->lockState);
VIR_FREE(priv->origname);
virCondDestroy(&priv->unplugFinished);
virChrdevFree(priv->devs);
/* This should never be non-NULL if we get here, but just in case... */
if (priv->mon) {
VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
qemuMonitorClose(priv->mon);
}
if (priv->agent) {
VIR_ERROR(_("Unexpected QEMU agent still active during domain deletion"));
qemuAgentClose(priv->agent);
}
VIR_FREE(priv->cleanupCallbacks);
VIR_FREE(priv);
}
static int
qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data)
{
qemuDomainObjPrivatePtr priv = data;
const char *monitorpath;
enum qemuDomainJob job;
/* priv->monitor_chr is set only for qemu */
if (priv->monConfig) {
switch (priv->monConfig->type) {
case VIR_DOMAIN_CHR_TYPE_UNIX:
monitorpath = priv->monConfig->data.nix.path;
break;
default:
case VIR_DOMAIN_CHR_TYPE_PTY:
monitorpath = priv->monConfig->data.file.path;
break;
}
virBufferEscapeString(buf, " <monitor path='%s'", monitorpath);
if (priv->monJSON)
virBufferAddLit(buf, " json='1'");
virBufferAsprintf(buf, " type='%s'/>\n",
virDomainChrTypeToString(priv->monConfig->type));
}
if (priv->nvcpupids) {
size_t i;
virBufferAddLit(buf, " <vcpus>\n");
for (i = 0; i < priv->nvcpupids; i++) {
virBufferAsprintf(buf, " <vcpu pid='%d'/>\n", priv->vcpupids[i]);
}
virBufferAddLit(buf, " </vcpus>\n");
}
if (priv->qemuCaps) {
size_t i;
virBufferAddLit(buf, " <qemuCaps>\n");
for (i = 0; i < QEMU_CAPS_LAST; i++) {
if (virQEMUCapsGet(priv->qemuCaps, i)) {
virBufferAsprintf(buf, " <flag name='%s'/>\n",
virQEMUCapsTypeToString(i));
}
}
virBufferAddLit(buf, " </qemuCaps>\n");
}
if (priv->lockState)
virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState);
job = priv->job.active;
if (!qemuDomainTrackJob(job))
priv->job.active = QEMU_JOB_NONE;
if (priv->job.active || priv->job.asyncJob) {
virBufferAsprintf(buf, " <job type='%s' async='%s'",
qemuDomainJobTypeToString(priv->job.active),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
if (priv->job.phase) {
virBufferAsprintf(buf, " phase='%s'",
qemuDomainAsyncJobPhaseToString(
priv->job.asyncJob, priv->job.phase));
}
virBufferAddLit(buf, "/>\n");
}
priv->job.active = job;
if (priv->fakeReboot)
virBufferAddLit(buf, " <fakereboot/>\n");
return 0;
}
static int
qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data)
{
qemuDomainObjPrivatePtr priv = data;
char *monitorpath;
char *tmp;
int n;
size_t i;
xmlNodePtr *nodes = NULL;
virQEMUCapsPtr qemuCaps = NULL;
if (VIR_ALLOC(priv->monConfig) < 0)
goto error;
if (!(monitorpath =
virXPathString("string(./monitor[1]/@path)", ctxt))) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("no monitor path"));
goto error;
}
tmp = virXPathString("string(./monitor[1]/@type)", ctxt);
if (tmp)
priv->monConfig->type = virDomainChrTypeFromString(tmp);
else
priv->monConfig->type = VIR_DOMAIN_CHR_TYPE_PTY;
VIR_FREE(tmp);
priv->monJSON = virXPathBoolean("count(./monitor[@json = '1']) > 0",
ctxt) > 0;
switch (priv->monConfig->type) {
case VIR_DOMAIN_CHR_TYPE_PTY:
priv->monConfig->data.file.path = monitorpath;
break;
case VIR_DOMAIN_CHR_TYPE_UNIX:
priv->monConfig->data.nix.path = monitorpath;
break;
default:
VIR_FREE(monitorpath);
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unsupported monitor type '%s'"),
virDomainChrTypeToString(priv->monConfig->type));
goto error;
}
n = virXPathNodeSet("./vcpus/vcpu", ctxt, &nodes);
if (n < 0)
goto error;
if (n) {
priv->nvcpupids = n;
if (VIR_REALLOC_N(priv->vcpupids, priv->nvcpupids) < 0)
goto error;
for (i = 0; i < n; i++) {
char *pidstr = virXMLPropString(nodes[i], "pid");
if (!pidstr)
goto error;
if (virStrToLong_i(pidstr, NULL, 10, &(priv->vcpupids[i])) < 0) {
VIR_FREE(pidstr);
goto error;
}
VIR_FREE(pidstr);
}
VIR_FREE(nodes);
}
if ((n = virXPathNodeSet("./qemuCaps/flag", ctxt, &nodes)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("failed to parse qemu capabilities flags"));
goto error;
}
if (n > 0) {
if (!(qemuCaps = virQEMUCapsNew()))
goto error;
for (i = 0; i < n; i++) {
char *str = virXMLPropString(nodes[i], "name");
if (str) {
int flag = virQEMUCapsTypeFromString(str);
if (flag < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown qemu capabilities flag %s"), str);
VIR_FREE(str);
goto error;
}
VIR_FREE(str);
virQEMUCapsSet(qemuCaps, flag);
}
}
priv->qemuCaps = qemuCaps;
}
VIR_FREE(nodes);
priv->lockState = virXPathString("string(./lockstate)", ctxt);
if ((tmp = virXPathString("string(./job[1]/@type)", ctxt))) {
int type;
if ((type = qemuDomainJobTypeFromString(tmp)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown job type %s"), tmp);
VIR_FREE(tmp);
goto error;
}
VIR_FREE(tmp);
priv->job.active = type;
}
if ((tmp = virXPathString("string(./job[1]/@async)", ctxt))) {
int async;
if ((async = qemuDomainAsyncJobTypeFromString(tmp)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown async job type %s"), tmp);
VIR_FREE(tmp);
goto error;
}
VIR_FREE(tmp);
priv->job.asyncJob = async;
if ((tmp = virXPathString("string(./job[1]/@phase)", ctxt))) {
priv->job.phase = qemuDomainAsyncJobPhaseFromString(async, tmp);
if (priv->job.phase < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown job phase %s"), tmp);
VIR_FREE(tmp);
goto error;
}
VIR_FREE(tmp);
}
}
priv->fakeReboot = virXPathBoolean("boolean(./fakereboot)", ctxt) == 1;
return 0;
error:
virDomainChrSourceDefFree(priv->monConfig);
priv->monConfig = NULL;
VIR_FREE(nodes);
virObjectUnref(qemuCaps);
return -1;
}
virDomainXMLPrivateDataCallbacks virQEMUDriverPrivateDataCallbacks = {
.alloc = qemuDomainObjPrivateAlloc,
.free = qemuDomainObjPrivateFree,
.parse = qemuDomainObjPrivateXMLParse,
.format = qemuDomainObjPrivateXMLFormat,
};
static void
qemuDomainDefNamespaceFree(void *nsdata)
{
qemuDomainCmdlineDefPtr cmd = nsdata;
size_t i;
if (!cmd)
return;
for (i = 0; i < cmd->num_args; i++)
VIR_FREE(cmd->args[i]);
for (i = 0; i < cmd->num_env; i++) {
VIR_FREE(cmd->env_name[i]);
VIR_FREE(cmd->env_value[i]);
}
VIR_FREE(cmd->args);
VIR_FREE(cmd->env_name);
VIR_FREE(cmd->env_value);
VIR_FREE(cmd);
}
static int
qemuDomainDefNamespaceParse(xmlDocPtr xml ATTRIBUTE_UNUSED,
xmlNodePtr root ATTRIBUTE_UNUSED,
xmlXPathContextPtr ctxt,
void **data)
{
qemuDomainCmdlineDefPtr cmd = NULL;
bool uses_qemu_ns = false;
xmlNodePtr *nodes = NULL;
int n;
size_t i;
if (xmlXPathRegisterNs(ctxt, BAD_CAST "qemu", BAD_CAST QEMU_NAMESPACE_HREF) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to register xml namespace '%s'"),
QEMU_NAMESPACE_HREF);
return -1;
}
if (VIR_ALLOC(cmd) < 0)
return -1;
/* first handle the extra command-line arguments */
n = virXPathNodeSet("./qemu:commandline/qemu:arg", ctxt, &nodes);
if (n < 0)
goto error;
uses_qemu_ns |= n > 0;
if (n && VIR_ALLOC_N(cmd->args, n) < 0)
goto error;
for (i = 0; i < n; i++) {
cmd->args[cmd->num_args] = virXMLPropString(nodes[i], "value");
if (cmd->args[cmd->num_args] == NULL) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("No qemu command-line argument specified"));
goto error;
}
cmd->num_args++;
}
VIR_FREE(nodes);
/* now handle the extra environment variables */
n = virXPathNodeSet("./qemu:commandline/qemu:env", ctxt, &nodes);
if (n < 0)
goto error;
uses_qemu_ns |= n > 0;
if (n && VIR_ALLOC_N(cmd->env_name, n) < 0)
goto error;
if (n && VIR_ALLOC_N(cmd->env_value, n) < 0)
goto error;
for (i = 0; i < n; i++) {
char *tmp;
tmp = virXMLPropString(nodes[i], "name");
if (tmp == NULL) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("No qemu environment name specified"));
goto error;
}
if (tmp[0] == '\0') {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Empty qemu environment name specified"));
goto error;
}
if (!c_isalpha(tmp[0]) && tmp[0] != '_') {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Invalid environment name, it must begin with a letter or underscore"));
goto error;
}
if (strspn(tmp, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_") != strlen(tmp)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Invalid environment name, it must contain only alphanumerics and underscore"));
goto error;
}
cmd->env_name[cmd->num_env] = tmp;
cmd->env_value[cmd->num_env] = virXMLPropString(nodes[i], "value");
/* a NULL value for command is allowed, since it might be empty */
cmd->num_env++;
}
VIR_FREE(nodes);
if (uses_qemu_ns)
*data = cmd;
else
VIR_FREE(cmd);
return 0;
error:
VIR_FREE(nodes);
qemuDomainDefNamespaceFree(cmd);
return -1;
}
static int
qemuDomainDefNamespaceFormatXML(virBufferPtr buf,
void *nsdata)
{
qemuDomainCmdlineDefPtr cmd = nsdata;
size_t i;
if (!cmd->num_args && !cmd->num_env)
return 0;
virBufferAddLit(buf, " <qemu:commandline>\n");
for (i = 0; i < cmd->num_args; i++)
virBufferEscapeString(buf, " <qemu:arg value='%s'/>\n",
cmd->args[i]);
for (i = 0; i < cmd->num_env; i++) {
virBufferAsprintf(buf, " <qemu:env name='%s'", cmd->env_name[i]);
if (cmd->env_value[i])
virBufferEscapeString(buf, " value='%s'", cmd->env_value[i]);
virBufferAddLit(buf, "/>\n");
}
virBufferAddLit(buf, " </qemu:commandline>\n");
return 0;
}
static const char *
qemuDomainDefNamespaceHref(void)
{
return "xmlns:qemu='" QEMU_NAMESPACE_HREF "'";
}
virDomainXMLNamespace virQEMUDriverDomainXMLNamespace = {
.parse = qemuDomainDefNamespaceParse,
.free = qemuDomainDefNamespaceFree,
.format = qemuDomainDefNamespaceFormatXML,
.href = qemuDomainDefNamespaceHref,
};
static int
qemuDomainDefPostParse(virDomainDefPtr def,
virCapsPtr caps,
void *opaque ATTRIBUTE_UNUSED)
{
bool addPCIRoot = false;
/* check for emulator and create a default one if needed */
if (!def->emulator &&
!(def->emulator = virDomainDefGetDefaultEmulator(def, caps)))
return -1;
/* Add implicit PCI root controller if the machine has one */
switch (def->os.arch) {
case VIR_ARCH_I686:
case VIR_ARCH_X86_64:
if (!def->os.machine)
break;
if (STRPREFIX(def->os.machine, "pc-q35") ||
STREQ(def->os.machine, "q35") ||
STREQ(def->os.machine, "isapc"))
break;
if (!STRPREFIX(def->os.machine, "pc-0.") &&
!STRPREFIX(def->os.machine, "pc-1.") &&
!STRPREFIX(def->os.machine, "pc-i440") &&
!STREQ(def->os.machine, "pc") &&
!STRPREFIX(def->os.machine, "rhel"))
break;
addPCIRoot = true;
break;
case VIR_ARCH_ALPHA:
case VIR_ARCH_PPC:
case VIR_ARCH_PPC64:
case VIR_ARCH_PPCEMB:
case VIR_ARCH_SH4:
case VIR_ARCH_SH4EB:
addPCIRoot = true;
break;
default:
break;
}
if (addPCIRoot &&
virDomainDefMaybeAddController(
def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0,
VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) < 0)
return -1;
return 0;
}
static int
qemuDomainDeviceDefPostParse(virDomainDeviceDefPtr dev,
virDomainDefPtr def,
virCapsPtr caps ATTRIBUTE_UNUSED,
void *opaque)
{
int ret = -1;
virQEMUDriverPtr driver = opaque;
virQEMUDriverConfigPtr cfg = NULL;
if (dev->type == VIR_DOMAIN_DEVICE_NET &&
dev->data.net->type != VIR_DOMAIN_NET_TYPE_HOSTDEV &&
!dev->data.net->model) {
if (VIR_STRDUP(dev->data.net->model,
def->os.arch == VIR_ARCH_S390 ||
def->os.arch == VIR_ARCH_S390X ? "virtio" : "rtl8139") < 0)
goto cleanup;
}
/* set default disk types and drivers */
if (dev->type == VIR_DOMAIN_DEVICE_DISK) {
virDomainDiskDefPtr disk = dev->data.disk;
/* both of these require data from the driver config */
if (driver && (cfg = virQEMUDriverGetConfig(driver))) {
/* assign default storage format and driver according to config */
if (cfg->allowDiskFormatProbing) {
/* default disk format for drives */
if (disk->format == VIR_STORAGE_FILE_NONE &&
(disk->type == VIR_DOMAIN_DISK_TYPE_FILE ||
disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK))
disk->format = VIR_STORAGE_FILE_AUTO;
/* default disk format for mirrored drive */
if (disk->mirror &&
disk->mirrorFormat == VIR_STORAGE_FILE_NONE)
disk->mirrorFormat = VIR_STORAGE_FILE_AUTO;
} else {
/* default driver if probing is forbidden */
if (!disk->driverName &&
VIR_STRDUP(disk->driverName, "qemu") < 0)
goto cleanup;
/* default disk format for drives */
if (disk->format == VIR_STORAGE_FILE_NONE &&
(disk->type == VIR_DOMAIN_DISK_TYPE_FILE ||
disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK))
disk->format = VIR_STORAGE_FILE_RAW;
/* default disk format for mirrored drive */
if (disk->mirror &&
disk->mirrorFormat == VIR_STORAGE_FILE_NONE)
disk->mirrorFormat = VIR_STORAGE_FILE_RAW;
}
}
}
/* set the default console type for S390 arches */
if (dev->type == VIR_DOMAIN_DEVICE_CHR &&
dev->data.chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
dev->data.chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_NONE &&
(def->os.arch == VIR_ARCH_S390 || def->os.arch == VIR_ARCH_S390X))
dev->data.chr->targetType = VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_VIRTIO;
/* set the default USB model to none for s390 unless an address is found */
if (dev->type == VIR_DOMAIN_DEVICE_CONTROLLER &&
dev->data.controller->type == VIR_DOMAIN_CONTROLLER_TYPE_USB &&
dev->data.controller->model == -1 &&
dev->data.controller->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE &&
(def->os.arch == VIR_ARCH_S390 || def->os.arch == VIR_ARCH_S390X))
dev->data.controller->model = VIR_DOMAIN_CONTROLLER_MODEL_USB_NONE;
/* auto generate unix socket path */
if (dev->type == VIR_DOMAIN_DEVICE_CHR &&
dev->data.chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CHANNEL &&
dev->data.chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO &&
dev->data.chr->source.type == VIR_DOMAIN_CHR_TYPE_UNIX &&
!dev->data.chr->source.data.nix.path &&
(driver && (cfg = virQEMUDriverGetConfig(driver)))) {
if (virAsprintf(&dev->data.chr->source.data.nix.path,
"%s/channel/target/%s.%s",
cfg->libDir, def->name,
dev->data.chr->target.name) < 0)
goto cleanup;
dev->data.chr->source.data.nix.listen = true;
}
ret = 0;
cleanup:
virObjectUnref(cfg);
return ret;
}
virDomainDefParserConfig virQEMUDriverDomainDefParserConfig = {
.devicesPostParseCallback = qemuDomainDeviceDefPostParse,
.domainPostParseCallback = qemuDomainDefPostParse,
};
static void
qemuDomainObjSaveJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
{
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (virDomainObjIsActive(obj)) {
if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, obj) < 0)
VIR_WARN("Failed to save status on vm %s", obj->def->name);
}
virObjectUnref(cfg);
}
void
qemuDomainObjSetJobPhase(virQEMUDriverPtr driver,
virDomainObjPtr obj,
int phase)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
unsigned long long me = virThreadSelfID();
if (!priv->job.asyncJob)
return;
VIR_DEBUG("Setting '%s' phase to '%s'",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
qemuDomainAsyncJobPhaseToString(priv->job.asyncJob, phase));
if (priv->job.asyncOwner && me != priv->job.asyncOwner) {
VIR_WARN("'%s' async job is owned by thread %llu",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
priv->job.asyncOwner);
}
priv->job.phase = phase;
priv->job.asyncOwner = me;
qemuDomainObjSaveJob(driver, obj);
}
void
qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
unsigned long long allowedJobs)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
if (!priv->job.asyncJob)
return;
priv->job.mask = allowedJobs | JOB_MASK(QEMU_JOB_DESTROY);
}
void
qemuDomainObjDiscardAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
if (priv->job.active == QEMU_JOB_ASYNC_NESTED)
qemuDomainObjResetJob(priv);
qemuDomainObjResetAsyncJob(priv);
qemuDomainObjSaveJob(driver, obj);
}
void
qemuDomainObjReleaseAsyncJob(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
VIR_DEBUG("Releasing ownership of '%s' async job",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
if (priv->job.asyncOwner != virThreadSelfID()) {
VIR_WARN("'%s' async job is owned by thread %llu",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
priv->job.asyncOwner);
}
priv->job.asyncOwner = 0;
}
static bool
qemuDomainNestedJobAllowed(qemuDomainObjPrivatePtr priv, enum qemuDomainJob job)
{
return !priv->job.asyncJob || (priv->job.mask & JOB_MASK(job)) != 0;
}
bool
qemuDomainJobAllowed(qemuDomainObjPrivatePtr priv, enum qemuDomainJob job)
{
return !priv->job.active && qemuDomainNestedJobAllowed(priv, job);
}
/* Give up waiting for mutex after 30 seconds */
#define QEMU_JOB_WAIT_TIME (1000ull * 30)
/*
* obj must be locked before calling
*/
static int ATTRIBUTE_NONNULL(1)
qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainJob job,
enum qemuDomainAsyncJob asyncJob)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
unsigned long long now;
unsigned long long then;
bool nested = job == QEMU_JOB_ASYNC_NESTED;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
priv->jobs_queued++;
if (virTimeMillisNow(&now) < 0) {
virObjectUnref(cfg);
return -1;
}
then = now + QEMU_JOB_WAIT_TIME;
virObjectRef(obj);
retry:
if (cfg->maxQueuedJobs &&
priv->jobs_queued > cfg->maxQueuedJobs) {
goto error;
}
while (!nested && !qemuDomainNestedJobAllowed(priv, job)) {
if (virCondWaitUntil(&priv->job.asyncCond, &obj->parent.lock, then) < 0)
goto error;
}
while (priv->job.active) {
if (virCondWaitUntil(&priv->job.cond, &obj->parent.lock, then) < 0)
goto error;
}
/* No job is active but a new async job could have been started while obj
* was unlocked, so we need to recheck it. */
if (!nested && !qemuDomainNestedJobAllowed(priv, job))
goto retry;
qemuDomainObjResetJob(priv);
if (job != QEMU_JOB_ASYNC) {
VIR_DEBUG("Starting job: %s (async=%s)",
qemuDomainJobTypeToString(job),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
priv->job.active = job;
priv->job.owner = virThreadSelfID();
} else {
VIR_DEBUG("Starting async job: %s",
qemuDomainAsyncJobTypeToString(asyncJob));
qemuDomainObjResetAsyncJob(priv);
priv->job.asyncJob = asyncJob;
priv->job.asyncOwner = virThreadSelfID();
priv->job.start = now;
}
if (qemuDomainTrackJob(job))
qemuDomainObjSaveJob(driver, obj);
virObjectUnref(cfg);
return 0;
error:
VIR_WARN("Cannot start job (%s, %s) for domain %s;"
" current job is (%s, %s) owned by (%llu, %llu)",
qemuDomainJobTypeToString(job),
qemuDomainAsyncJobTypeToString(asyncJob),
obj->def->name,
qemuDomainJobTypeToString(priv->job.active),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
priv->job.owner, priv->job.asyncOwner);
if (errno == ETIMEDOUT)
virReportError(VIR_ERR_OPERATION_TIMEOUT,
"%s", _("cannot acquire state change lock"));
else if (cfg->maxQueuedJobs &&
priv->jobs_queued > cfg->maxQueuedJobs)
virReportError(VIR_ERR_OPERATION_FAILED,
"%s", _("cannot acquire state change lock "
"due to max_queued limit"));
else
virReportSystemError(errno,
"%s", _("cannot acquire job mutex"));
priv->jobs_queued--;
virObjectUnref(obj);
virObjectUnref(cfg);
return -1;
}
/*
* obj must be locked before calling
*
* This must be called by anything that will change the VM state
* in any way, or anything that will use the QEMU monitor.
*
* Upon successful return, the object will have its ref count increased,
* successful calls must be followed by EndJob eventually
*/
int qemuDomainObjBeginJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainJob job)
{
return qemuDomainObjBeginJobInternal(driver, obj, job,
QEMU_ASYNC_JOB_NONE);
}
int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainAsyncJob asyncJob)
{
return qemuDomainObjBeginJobInternal(driver, obj, QEMU_JOB_ASYNC,
asyncJob);
}
int
qemuDomainObjBeginNestedJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainAsyncJob asyncJob)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
if (asyncJob != priv->job.asyncJob) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unexpected async job %d"), asyncJob);
return -1;
}
if (priv->job.asyncOwner != virThreadSelfID()) {
VIR_WARN("This thread doesn't seem to be the async job owner: %llu",
priv->job.asyncOwner);
}
return qemuDomainObjBeginJobInternal(driver, obj,
QEMU_JOB_ASYNC_NESTED,
QEMU_ASYNC_JOB_NONE);
}
/*
* obj must be locked before calling
*
* To be called after completing the work associated with the
* earlier qemuDomainBeginJob() call
*
* Returns true if @obj was still referenced, false if it was
* disposed of.
*/
bool qemuDomainObjEndJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
enum qemuDomainJob job = priv->job.active;
priv->jobs_queued--;
VIR_DEBUG("Stopping job: %s (async=%s)",
qemuDomainJobTypeToString(job),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
qemuDomainObjResetJob(priv);
if (qemuDomainTrackJob(job))
qemuDomainObjSaveJob(driver, obj);
virCondSignal(&priv->job.cond);
return virObjectUnref(obj);
}
bool
qemuDomainObjEndAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
priv->jobs_queued--;
VIR_DEBUG("Stopping async job: %s",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
qemuDomainObjResetAsyncJob(priv);
qemuDomainObjSaveJob(driver, obj);
virCondBroadcast(&priv->job.asyncCond);
return virObjectUnref(obj);
}
void
qemuDomainObjAbortAsyncJob(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
VIR_DEBUG("Requesting abort of async job: %s",
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
priv->job.asyncAbort = true;
}
/*
* obj must be locked before calling
*
* To be called immediately before any QEMU monitor API call
* Must have already either called qemuDomainObjBeginJob() and checked
* that the VM is still active; may not be used for nested async jobs.
*
* To be followed with qemuDomainObjExitMonitor() once complete
*/
static int
qemuDomainObjEnterMonitorInternal(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainAsyncJob asyncJob)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
if (asyncJob != QEMU_ASYNC_JOB_NONE) {
if (qemuDomainObjBeginNestedJob(driver, obj, asyncJob) < 0)
return -1;
if (!virDomainObjIsActive(obj)) {
virReportError(VIR_ERR_OPERATION_FAILED, "%s",
_("domain is no longer running"));
/* Still referenced by the containing async job. */
ignore_value(qemuDomainObjEndJob(driver, obj));
return -1;
}
} else if (priv->job.asyncOwner == virThreadSelfID()) {
VIR_WARN("This thread seems to be the async job owner; entering"
" monitor without asking for a nested job is dangerous");
}
virObjectLock(priv->mon);
virObjectRef(priv->mon);
ignore_value(virTimeMillisNow(&priv->monStart));
virObjectUnlock(obj);
return 0;
}
static void ATTRIBUTE_NONNULL(1)
qemuDomainObjExitMonitorInternal(virQEMUDriverPtr driver,
virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
bool hasRefs;
hasRefs = virObjectUnref(priv->mon);
if (hasRefs)
virObjectUnlock(priv->mon);
virObjectLock(obj);
priv->monStart = 0;
if (!hasRefs)
priv->mon = NULL;
if (priv->job.active == QEMU_JOB_ASYNC_NESTED) {
qemuDomainObjResetJob(priv);
qemuDomainObjSaveJob(driver, obj);
virCondSignal(&priv->job.cond);
virObjectUnref(obj);
}
}
void qemuDomainObjEnterMonitor(virQEMUDriverPtr driver,
virDomainObjPtr obj)
{
ignore_value(qemuDomainObjEnterMonitorInternal(driver, obj,
QEMU_ASYNC_JOB_NONE));
}
/* obj must NOT be locked before calling
*
* Should be paired with an earlier qemuDomainObjEnterMonitor() call
*/
void qemuDomainObjExitMonitor(virQEMUDriverPtr driver,
virDomainObjPtr obj)
{
qemuDomainObjExitMonitorInternal(driver, obj);
}
/*
* obj must be locked before calling
*
* To be called immediately before any QEMU monitor API call.
* Must have already either called qemuDomainObjBeginJob()
* and checked that the VM is still active, with asyncJob of
* QEMU_ASYNC_JOB_NONE; or already called qemuDomainObjBeginAsyncJob,
* with the same asyncJob.
*
* Returns 0 if job was started, in which case this must be followed with
* qemuDomainObjExitMonitor(); or -1 if the job could not be
* started (probably because the vm exited in the meantime).
*/
int
qemuDomainObjEnterMonitorAsync(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum qemuDomainAsyncJob asyncJob)
{
return qemuDomainObjEnterMonitorInternal(driver, obj, asyncJob);
}
/*
* obj must be locked before calling
*
* To be called immediately before any QEMU agent API call.
* Must have already called qemuDomainObjBeginJob() and checked
* that the VM is still active.
*
* To be followed with qemuDomainObjExitAgent() once complete
*/
void
qemuDomainObjEnterAgent(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
virObjectLock(priv->agent);
virObjectRef(priv->agent);
ignore_value(virTimeMillisNow(&priv->agentStart));
virObjectUnlock(obj);
}
/* obj must NOT be locked before calling
*
* Should be paired with an earlier qemuDomainObjEnterAgent() call
*/
void
qemuDomainObjExitAgent(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
bool hasRefs;
hasRefs = virObjectUnref(priv->agent);
if (hasRefs)
virObjectUnlock(priv->agent);
virObjectLock(obj);
priv->agentStart = 0;
if (!hasRefs)
priv->agent = NULL;
}
void qemuDomainObjEnterRemote(virDomainObjPtr obj)
{
virObjectRef(obj);
virObjectUnlock(obj);
}
void qemuDomainObjExitRemote(virDomainObjPtr obj)
{
virObjectLock(obj);
virObjectUnref(obj);
}
virDomainDefPtr
qemuDomainDefCopy(virQEMUDriverPtr driver,
virDomainDefPtr src,
unsigned int flags)
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
virDomainDefPtr ret = NULL;
virCapsPtr caps = NULL;
const char *xml = NULL;
if (qemuDomainDefFormatBuf(driver, src, flags, &buf) < 0)
goto cleanup;
xml = virBufferContentAndReset(&buf);
if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
goto cleanup;
if (!(ret = virDomainDefParseString(xml, caps, driver->xmlopt,
QEMU_EXPECTED_VIRT_TYPES,
VIR_DOMAIN_XML_INACTIVE)))
goto cleanup;
cleanup:
VIR_FREE(xml);
virObjectUnref(caps);
return ret;
}
int
qemuDomainDefFormatBuf(virQEMUDriverPtr driver,
virDomainDefPtr def,
unsigned int flags,
virBuffer *buf)
{
int ret = -1;
virCPUDefPtr cpu = NULL;
virCPUDefPtr def_cpu = def->cpu;
virDomainControllerDefPtr *controllers = NULL;
int ncontrollers = 0;
virCapsPtr caps = NULL;
if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
goto cleanup;
/* Update guest CPU requirements according to host CPU */
if ((flags & VIR_DOMAIN_XML_UPDATE_CPU) &&
def_cpu &&
(def_cpu->mode != VIR_CPU_MODE_CUSTOM || def_cpu->model)) {
if (!caps->host.cpu ||
!caps->host.cpu->model) {
virReportError(VIR_ERR_OPERATION_FAILED,
"%s", _("cannot get host CPU capabilities"));
goto cleanup;
}
if (!(cpu = virCPUDefCopy(def_cpu)) ||
cpuUpdate(cpu, caps->host.cpu) < 0)
goto cleanup;
def->cpu = cpu;
}
if ((flags & VIR_DOMAIN_XML_MIGRATABLE)) {
size_t i;
int toremove = 0;
virDomainControllerDefPtr usb = NULL, pci = NULL;
/* If only the default USB controller is present, we can remove it
* and make the XML compatible with older versions of libvirt which
* didn't support USB controllers in the XML but always added the
* default one to qemu anyway.
*/
for (i = 0; i < def->ncontrollers; i++) {
if (def->controllers[i]->type == VIR_DOMAIN_CONTROLLER_TYPE_USB) {
if (usb) {
usb = NULL;
break;
}
usb = def->controllers[i];
}
}
if (usb && usb->idx == 0 && usb->model == -1) {
VIR_DEBUG("Removing default USB controller from domain '%s'"
" for migration compatibility", def->name);
toremove++;
} else {
usb = NULL;
}
/* Remove the default PCI controller if there is only one present
* and its model is pci-root */
for (i = 0; i < def->ncontrollers; i++) {
if (def->controllers[i]->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI) {
if (pci) {
pci = NULL;
break;
}
pci = def->controllers[i];
}
}
if (pci && pci->idx == 0 &&
pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
VIR_DEBUG("Removing default 'pci-root' from domain '%s'"
" for migration compatibility", def->name);
toremove++;
} else {
pci = NULL;
}
if (toremove) {
controllers = def->controllers;
ncontrollers = def->ncontrollers;
if (VIR_ALLOC_N(def->controllers, ncontrollers - toremove) < 0) {
controllers = NULL;
goto cleanup;
}
def->ncontrollers = 0;
for (i = 0; i < ncontrollers; i++) {
if (controllers[i] != usb && controllers[i] != pci)
def->controllers[def->ncontrollers++] = controllers[i];
}
}
}
ret = virDomainDefFormatInternal(def, flags, buf);
cleanup:
def->cpu = def_cpu;
virCPUDefFree(cpu);
if (controllers) {
VIR_FREE(def->controllers);
def->controllers = controllers;
def->ncontrollers = ncontrollers;
}
virObjectUnref(caps);
return ret;
}
char *qemuDomainDefFormatXML(virQEMUDriverPtr driver,
virDomainDefPtr def,
unsigned int flags)
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
if (qemuDomainDefFormatBuf(driver, def, flags, &buf) < 0) {
virBufferFreeAndReset(&buf);
return NULL;
}
if (virBufferError(&buf)) {
virReportOOMError();
virBufferFreeAndReset(&buf);
return NULL;
}
return virBufferContentAndReset(&buf);
}
char *qemuDomainFormatXML(virQEMUDriverPtr driver,
virDomainObjPtr vm,
unsigned int flags)
{
virDomainDefPtr def;
if ((flags & VIR_DOMAIN_XML_INACTIVE) && vm->newDef)
def = vm->newDef;
else
def = vm->def;
return qemuDomainDefFormatXML(driver, def, flags);
}
char *
qemuDomainDefFormatLive(virQEMUDriverPtr driver,
virDomainDefPtr def,
bool inactive,
bool compatible)
{
unsigned int flags = QEMU_DOMAIN_FORMAT_LIVE_FLAGS;
if (inactive)
flags |= VIR_DOMAIN_XML_INACTIVE;
if (compatible)
flags |= VIR_DOMAIN_XML_MIGRATABLE;
return qemuDomainDefFormatXML(driver, def, flags);
}
void qemuDomainObjTaint(virQEMUDriverPtr driver,
virDomainObjPtr obj,
enum virDomainTaintFlags taint,
int logFD)
{
virErrorPtr orig_err = NULL;
if (virDomainObjTaint(obj, taint)) {
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(obj->def->uuid, uuidstr);
VIR_WARN("Domain id=%d name='%s' uuid=%s is tainted: %s",
obj->def->id,
obj->def->name,
uuidstr,
virDomainTaintTypeToString(taint));
/* We don't care about errors logging taint info, so
* preserve original error, and clear any error that
* is raised */
orig_err = virSaveLastError();
if (qemuDomainAppendLog(driver, obj, logFD,
"Domain id=%d is tainted: %s\n",
obj->def->id,
virDomainTaintTypeToString(taint)) < 0)
virResetLastError();
if (orig_err) {
virSetError(orig_err);
virFreeError(orig_err);
}
}
}
void qemuDomainObjCheckTaint(virQEMUDriverPtr driver,
virDomainObjPtr obj,
int logFD)
{
size_t i;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (cfg->privileged &&
(!cfg->clearEmulatorCapabilities ||
cfg->user == 0 ||
cfg->group == 0))
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_HIGH_PRIVILEGES, logFD);
if (obj->def->namespaceData) {
qemuDomainCmdlineDefPtr qemucmd = obj->def->namespaceData;
if (qemucmd->num_args || qemucmd->num_env)
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_CUSTOM_ARGV, logFD);
}
if (obj->def->cpu && obj->def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH)
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_HOST_CPU, logFD);
for (i = 0; i < obj->def->ndisks; i++)
qemuDomainObjCheckDiskTaint(driver, obj, obj->def->disks[i], logFD);
for (i = 0; i < obj->def->nnets; i++)
qemuDomainObjCheckNetTaint(driver, obj, obj->def->nets[i], logFD);
virObjectUnref(cfg);
}
void qemuDomainObjCheckDiskTaint(virQEMUDriverPtr driver,
virDomainObjPtr obj,
virDomainDiskDefPtr disk,
int logFD)
{
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if ((!disk->format || disk->format == VIR_STORAGE_FILE_AUTO) &&
cfg->allowDiskFormatProbing)
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_DISK_PROBING, logFD);
if (disk->rawio == 1)
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_HIGH_PRIVILEGES, logFD);
virObjectUnref(cfg);
}
void qemuDomainObjCheckNetTaint(virQEMUDriverPtr driver,
virDomainObjPtr obj,
virDomainNetDefPtr net,
int logFD)
{
/* script is only useful for NET_TYPE_ETHERNET (qemu) and
* NET_TYPE_BRIDGE (xen), but could be (incorrectly) specified for
* any interface type. In any case, it's adding user sauce into
* the soup, so it should taint the domain.
*/
if (net->script != NULL)
qemuDomainObjTaint(driver, obj, VIR_DOMAIN_TAINT_SHELL_SCRIPTS, logFD);
}
static int
qemuDomainOpenLogHelper(virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm,
int oflags,
mode_t mode)
{
char *logfile;
int fd = -1;
bool trunc = false;
if (virAsprintf(&logfile, "%s/%s.log", cfg->logDir, vm->def->name) < 0)
return -1;
/* To make SELinux happy we always need to open in append mode.
* So we fake O_TRUNC by calling ftruncate after open instead
*/
if (oflags & O_TRUNC) {
oflags &= ~O_TRUNC;
oflags |= O_APPEND;
trunc = true;
}
if ((fd = open(logfile, oflags, mode)) < 0) {
virReportSystemError(errno, _("failed to create logfile %s"),
logfile);
goto cleanup;
}
if (virSetCloseExec(fd) < 0) {
virReportSystemError(errno, _("failed to set close-on-exec flag on %s"),
logfile);
VIR_FORCE_CLOSE(fd);
goto cleanup;
}
if (trunc &&
ftruncate(fd, 0) < 0) {
virReportSystemError(errno, _("failed to truncate %s"),
logfile);
VIR_FORCE_CLOSE(fd);
goto cleanup;
}
cleanup:
VIR_FREE(logfile);
return fd;
}
int
qemuDomainCreateLog(virQEMUDriverPtr driver, virDomainObjPtr vm,
bool append)
{
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
int oflags;
int ret;
oflags = O_CREAT | O_WRONLY;
/* Only logrotate files in /var/log, so only append if running privileged */
if (cfg->privileged || append)
oflags |= O_APPEND;
else
oflags |= O_TRUNC;
ret = qemuDomainOpenLogHelper(cfg, vm, oflags, S_IRUSR | S_IWUSR);
virObjectUnref(cfg);
return ret;
}
int
qemuDomainOpenLog(virQEMUDriverPtr driver, virDomainObjPtr vm, off_t pos)
{
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
int fd;
off_t off;
int whence;
fd = qemuDomainOpenLogHelper(cfg, vm, O_RDONLY, 0);
virObjectUnref(cfg);
if (fd < 0)
return -1;
if (pos < 0) {
off = 0;
whence = SEEK_END;
} else {
off = pos;
whence = SEEK_SET;
}
if (lseek(fd, off, whence) < 0) {
if (whence == SEEK_END)
virReportSystemError(errno,
_("unable to seek to end of log for %s"),
vm->def->name);
else
virReportSystemError(errno,
_("unable to seek to %lld from start for %s"),
(long long)off, vm->def->name);
VIR_FORCE_CLOSE(fd);
}
return fd;
}
int qemuDomainAppendLog(virQEMUDriverPtr driver,
virDomainObjPtr obj,
int logFD,
const char *fmt, ...)
{
int fd = logFD;
va_list argptr;
char *message = NULL;
int ret = -1;
va_start(argptr, fmt);
if ((fd == -1) &&
(fd = qemuDomainCreateLog(driver, obj, true)) < 0)
goto cleanup;
if (virVasprintf(&message, fmt, argptr) < 0)
goto cleanup;
if (safewrite(fd, message, strlen(message)) < 0) {
virReportSystemError(errno, _("Unable to write to domain logfile %s"),
obj->def->name);
goto cleanup;
}
ret = 0;
cleanup:
va_end(argptr);
if (fd != logFD)
VIR_FORCE_CLOSE(fd);
VIR_FREE(message);
return ret;
}
/* Locate an appropriate 'qemu-img' binary. */
const char *
qemuFindQemuImgBinary(virQEMUDriverPtr driver)
{
if (!driver->qemuImgBinary)
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("unable to find kvm-img or qemu-img"));
return driver->qemuImgBinary;
}
int
qemuDomainSnapshotWriteMetadata(virDomainObjPtr vm,
virDomainSnapshotObjPtr snapshot,
char *snapshotDir)
{
char *newxml = NULL;
int ret = -1;
char *snapDir = NULL;
char *snapFile = NULL;
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(vm->def->uuid, uuidstr);
newxml = virDomainSnapshotDefFormat(uuidstr, snapshot->def,
QEMU_DOMAIN_FORMAT_LIVE_FLAGS, 1);
if (newxml == NULL)
return -1;
if (virAsprintf(&snapDir, "%s/%s", snapshotDir, vm->def->name) < 0)
goto cleanup;
if (virFileMakePath(snapDir) < 0) {
virReportSystemError(errno, _("cannot create snapshot directory '%s'"),
snapDir);
goto cleanup;
}
if (virAsprintf(&snapFile, "%s/%s.xml", snapDir, snapshot->def->name) < 0)
goto cleanup;
ret = virXMLSaveFile(snapFile, NULL, "snapshot-edit", newxml);
cleanup:
VIR_FREE(snapFile);
VIR_FREE(snapDir);
VIR_FREE(newxml);
return ret;
}
/* The domain is expected to be locked and inactive. Return -1 on normal
* failure, 1 if we skipped a disk due to try_all. */
static int
qemuDomainSnapshotForEachQcow2Raw(virQEMUDriverPtr driver,
virDomainDefPtr def,
const char *name,
const char *op,
bool try_all,
int ndisks)
{
const char *qemuimgarg[] = { NULL, "snapshot", NULL, NULL, NULL, NULL };
size_t i;
bool skipped = false;
qemuimgarg[0] = qemuFindQemuImgBinary(driver);
if (qemuimgarg[0] == NULL) {
/* qemuFindQemuImgBinary set the error */
return -1;
}
qemuimgarg[2] = op;
qemuimgarg[3] = name;
for (i = 0; i < ndisks; i++) {
/* FIXME: we also need to handle LVM here */
if (def->disks[i]->device == VIR_DOMAIN_DISK_DEVICE_DISK) {
if (def->disks[i]->format > 0 &&
def->disks[i]->format != VIR_STORAGE_FILE_QCOW2) {
if (try_all) {
/* Continue on even in the face of error, since other
* disks in this VM may have the same snapshot name.
*/
VIR_WARN("skipping snapshot action on %s",
def->disks[i]->dst);
skipped = true;
continue;
} else if (STREQ(op, "-c") && i) {
/* We must roll back partial creation by deleting
* all earlier snapshots. */
qemuDomainSnapshotForEachQcow2Raw(driver, def, name,
"-d", false, i);
}
virReportError(VIR_ERR_OPERATION_INVALID,
_("Disk device '%s' does not support"
" snapshotting"),
def->disks[i]->dst);
return -1;
}
qemuimgarg[4] = def->disks[i]->src;
if (virRun(qemuimgarg, NULL) < 0) {
if (try_all) {
VIR_WARN("skipping snapshot action on %s",
def->disks[i]->dst);
skipped = true;
continue;
} else if (STREQ(op, "-c") && i) {
/* We must roll back partial creation by deleting
* all earlier snapshots. */
qemuDomainSnapshotForEachQcow2Raw(driver, def, name,
"-d", false, i);
}
return -1;
}
}
}
return skipped ? 1 : 0;
}
/* The domain is expected to be locked and inactive. Return -1 on normal
* failure, 1 if we skipped a disk due to try_all. */
int
qemuDomainSnapshotForEachQcow2(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virDomainSnapshotObjPtr snap,
const char *op,
bool try_all)
{
/* Prefer action on the disks in use at the time the snapshot was
* created; but fall back to current definition if dealing with a
* snapshot created prior to libvirt 0.9.5. */
virDomainDefPtr def = snap->def->dom;
if (!def)
def = vm->def;
return qemuDomainSnapshotForEachQcow2Raw(driver, def, snap->def->name,
op, try_all, def->ndisks);
}
/* Discard one snapshot (or its metadata), without reparenting any children. */
int
qemuDomainSnapshotDiscard(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virDomainSnapshotObjPtr snap,
bool update_current,
bool metadata_only)
{
char *snapFile = NULL;
int ret = -1;
qemuDomainObjPrivatePtr priv;
virDomainSnapshotObjPtr parentsnap = NULL;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (!metadata_only) {
if (!virDomainObjIsActive(vm)) {
/* Ignore any skipped disks */
if (qemuDomainSnapshotForEachQcow2(driver, vm, snap, "-d",
true) < 0)
goto cleanup;
} else {
priv = vm->privateData;
qemuDomainObjEnterMonitor(driver, vm);
/* we continue on even in the face of error */
qemuMonitorDeleteSnapshot(priv->mon, snap->def->name);
qemuDomainObjExitMonitor(driver, vm);
}
}
if (virAsprintf(&snapFile, "%s/%s/%s.xml", cfg->snapshotDir,
vm->def->name, snap->def->name) < 0)
goto cleanup;
if (snap == vm->current_snapshot) {
if (update_current && snap->def->parent) {
parentsnap = virDomainSnapshotFindByName(vm->snapshots,
snap->def->parent);
if (!parentsnap) {
VIR_WARN("missing parent snapshot matching name '%s'",
snap->def->parent);
} else {
parentsnap->def->current = true;
if (qemuDomainSnapshotWriteMetadata(vm, parentsnap,
cfg->snapshotDir) < 0) {
VIR_WARN("failed to set parent snapshot '%s' as current",
snap->def->parent);
parentsnap->def->current = false;
parentsnap = NULL;
}
}
}
vm->current_snapshot = parentsnap;
}
if (unlink(snapFile) < 0)
VIR_WARN("Failed to unlink %s", snapFile);
virDomainSnapshotObjListRemove(vm->snapshots, snap);
ret = 0;
cleanup:
VIR_FREE(snapFile);
virObjectUnref(cfg);
return ret;
}
/* Hash iterator callback to discard multiple snapshots. */
void qemuDomainSnapshotDiscardAll(void *payload,
const void *name ATTRIBUTE_UNUSED,
void *data)
{
virDomainSnapshotObjPtr snap = payload;
virQEMUSnapRemovePtr curr = data;
int err;
if (snap->def->current)
curr->current = true;
err = qemuDomainSnapshotDiscard(curr->driver, curr->vm, snap, false,
curr->metadata_only);
if (err && !curr->err)
curr->err = err;
}
int
qemuDomainSnapshotDiscardAllMetadata(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
virQEMUSnapRemove rem;
rem.driver = driver;
rem.vm = vm;
rem.metadata_only = true;
rem.err = 0;
virDomainSnapshotForEach(vm->snapshots, qemuDomainSnapshotDiscardAll,
&rem);
return rem.err;
}
/*
* The caller must hold a lock the vm and there must
* be no remaining references to vm.
*/
void
qemuDomainRemoveInactive(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
char *snapDir;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
/* Remove any snapshot metadata prior to removing the domain */
if (qemuDomainSnapshotDiscardAllMetadata(driver, vm) < 0) {
VIR_WARN("unable to remove all snapshots for domain %s",
vm->def->name);
}
else if (virAsprintf(&snapDir, "%s/%s", cfg->snapshotDir,
vm->def->name) < 0) {
VIR_WARN("unable to remove snapshot directory %s/%s",
cfg->snapshotDir, vm->def->name);
} else {
if (rmdir(snapDir) < 0 && errno != ENOENT)
VIR_WARN("unable to remove snapshot directory %s", snapDir);
VIR_FREE(snapDir);
}
virDomainObjListRemove(driver->domains, vm);
virObjectUnref(cfg);
}
void
qemuDomainSetFakeReboot(virQEMUDriverPtr driver,
virDomainObjPtr vm,
bool value)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->fakeReboot == value)
goto cleanup;
priv->fakeReboot = value;
if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
VIR_WARN("Failed to save status on vm %s", vm->def->name);
cleanup:
virObjectUnref(cfg);
}
int
qemuDomainCheckDiskPresence(virQEMUDriverPtr driver,
virDomainObjPtr vm,
bool cold_boot)
{
int ret = -1;
size_t i;
virDomainDiskDefPtr disk;
char uuid[VIR_UUID_STRING_BUFLEN];
virDomainEventPtr event = NULL;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
virUUIDFormat(vm->def->uuid, uuid);
for (i = 0; i < vm->def->ndisks; i++) {
disk = vm->def->disks[i];
if (!disk->startupPolicy || !disk->src)
continue;
if (virFileAccessibleAs(disk->src, F_OK,
cfg->user,
cfg->group) >= 0) {
/* disk accessible */
continue;
}
switch ((enum virDomainStartupPolicy) disk->startupPolicy) {
case VIR_DOMAIN_STARTUP_POLICY_OPTIONAL:
break;
case VIR_DOMAIN_STARTUP_POLICY_MANDATORY:
virReportSystemError(errno,
_("cannot access file '%s'"),
disk->src);
goto cleanup;
break;
case VIR_DOMAIN_STARTUP_POLICY_REQUISITE:
if (cold_boot) {
virReportSystemError(errno,
_("cannot access file '%s'"),
disk->src);
goto cleanup;
}
break;
case VIR_DOMAIN_STARTUP_POLICY_DEFAULT:
case VIR_DOMAIN_STARTUP_POLICY_LAST:
/* this should never happen */
break;
}
VIR_DEBUG("Dropping disk '%s' on domain '%s' (UUID '%s') "
"due to inaccessible source '%s'",
disk->dst, vm->def->name, uuid, disk->src);
event = virDomainEventDiskChangeNewFromObj(vm, disk->src, NULL, disk->info.alias,
VIR_DOMAIN_EVENT_DISK_CHANGE_MISSING_ON_START);
if (event)
qemuDomainEventQueue(driver, event);
VIR_FREE(disk->src);
}
ret = 0;
cleanup:
virObjectUnref(cfg);
return ret;
}
/*
* The vm must be locked when any of the following cleanup functions is
* called.
*/
int
qemuDomainCleanupAdd(virDomainObjPtr vm,
qemuDomainCleanupCallback cb)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
size_t i;
VIR_DEBUG("vm=%s, cb=%p", vm->def->name, cb);
for (i = 0; i < priv->ncleanupCallbacks; i++) {
if (priv->cleanupCallbacks[i] == cb)
return 0;
}
if (VIR_RESIZE_N(priv->cleanupCallbacks,
priv->ncleanupCallbacks_max,
priv->ncleanupCallbacks, 1) < 0)
return -1;
priv->cleanupCallbacks[priv->ncleanupCallbacks++] = cb;
return 0;
}
void
qemuDomainCleanupRemove(virDomainObjPtr vm,
qemuDomainCleanupCallback cb)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
size_t i;
VIR_DEBUG("vm=%s, cb=%p", vm->def->name, cb);
for (i = 0; i < priv->ncleanupCallbacks; i++) {
if (priv->cleanupCallbacks[i] == cb) {
memmove(priv->cleanupCallbacks + i,
priv->cleanupCallbacks + i + 1,
priv->ncleanupCallbacks - i - 1);
priv->ncleanupCallbacks--;
}
}
VIR_SHRINK_N(priv->cleanupCallbacks,
priv->ncleanupCallbacks_max,
priv->ncleanupCallbacks_max - priv->ncleanupCallbacks);
}
void
qemuDomainCleanupRun(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
size_t i;
VIR_DEBUG("driver=%p, vm=%s", driver, vm->def->name);
/* run cleanup callbacks in reverse order */
for (i = 0; i < priv->ncleanupCallbacks; i++) {
if (priv->cleanupCallbacks[priv->ncleanupCallbacks - (i + 1)])
priv->cleanupCallbacks[i](driver, vm);
}
VIR_FREE(priv->cleanupCallbacks);
priv->ncleanupCallbacks = 0;
priv->ncleanupCallbacks_max = 0;
}
int
qemuDomainDetermineDiskChain(virQEMUDriverPtr driver,
virDomainDiskDefPtr disk,
bool force)
{
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
int ret = 0;
if (!disk->src ||
disk->type == VIR_DOMAIN_DISK_TYPE_NETWORK ||
disk->type == VIR_DOMAIN_DISK_TYPE_VOLUME)
goto cleanup;
if (disk->backingChain) {
if (force) {
virStorageFileFreeMetadata(disk->backingChain);
disk->backingChain = NULL;
} else {
goto cleanup;
}
}
disk->backingChain = virStorageFileGetMetadata(disk->src, disk->format,
cfg->user, cfg->group,
cfg->allowDiskFormatProbing);
if (!disk->backingChain)
ret = -1;
cleanup:
virObjectUnref(cfg);
return ret;
}
unsigned long long
qemuDomainMemoryLimit(virDomainDefPtr def)
{
unsigned long long mem;
size_t i;
if (def->mem.hard_limit) {
mem = def->mem.hard_limit;
} else {
/* If there is no hard_limit set, compute a reasonable one to avoid
* system thrashing caused by exploited qemu. A 'reasonable
* limit' has been chosen:
* (1 + k) * (domain memory + total video memory) + (32MB for
* cache per each disk) + F
* where k = 0.5 and F = 400MB. The cache for disks is important as
* kernel cache on the host side counts into the RSS limit.
* Moreover, VFIO requires some amount for IO space. Alex Williamson
* suggested adding 1GiB for IO space just to be safe (some finer
* tuning might be nice, though).
*
* Technically, the disk cache does not have to be included in
* RLIMIT_MEMLOCK but it doesn't hurt as it's just an upper limit and
* it makes this function and its usage simpler.
*/
mem = def->mem.max_balloon;
for (i = 0; i < def->nvideos; i++)
mem += def->videos[i]->vram;
mem *= 1.5;
mem += def->ndisks * 32768;
mem += 409600;
for (i = 0; i < def->nhostdevs; i++) {
virDomainHostdevDefPtr hostdev = def->hostdevs[i];
if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
hostdev->source.subsys.type ==
VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
hostdev->source.subsys.u.pci.backend ==
VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
mem += 1024 * 1024;
break;
}
}
}
return mem;
}