/*
* qemu_namespace.c: QEMU domain namespace helpers
*
* Copyright (C) 2006-2020 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* .
*/
#include
#ifdef __linux__
# include
#endif
#if defined(WITH_SYS_MOUNT_H)
# include
#endif
#ifdef WITH_SELINUX
# include
#endif
#include "qemu_namespace.h"
#include "qemu_domain.h"
#include "qemu_cgroup.h"
#include "qemu_security.h"
#include "qemu_hostdev.h"
#include "viralloc.h"
#include "virlog.h"
#include "virstring.h"
#include "virdevmapper.h"
#include "virglibutil.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
VIR_LOG_INIT("qemu.qemu_domain");
VIR_ENUM_IMPL(qemuDomainNamespace,
QEMU_DOMAIN_NS_LAST,
"mount",
);
/**
* qemuDomainGetPreservedMountPath:
* @cfg: driver configuration data
* @vm: domain object
* @mountpoint: mount point path to convert
*
* For given @mountpoint return new path where the mount point
* should be moved temporarily whilst building the namespace.
*
* Returns: allocated string on success which the caller must free,
* NULL on failure.
*/
static char *
qemuDomainGetPreservedMountPath(virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm,
const char *mountpoint)
{
char *path = NULL;
char *tmp;
const char *suffix = mountpoint + strlen(QEMU_DEVPREFIX);
g_autofree char *domname = virDomainDefGetShortName(vm->def);
size_t off;
if (!domname)
return NULL;
if (STREQ(mountpoint, "/dev"))
suffix = "dev";
path = g_strdup_printf("%s/%s.%s", cfg->stateDir, domname, suffix);
/* Now consider that @mountpoint is "/dev/blah/blah2".
* @suffix then points to "blah/blah2". However, caller
* expects all the @paths to be the same depth. The
* caller doesn't always do `mkdir -p` but sometimes bare
* `touch`. Therefore fix all the suffixes. */
off = strlen(path) - strlen(suffix);
tmp = path + off;
while (*tmp) {
if (*tmp == '/')
*tmp = '.';
tmp++;
}
return path;
}
/**
* qemuDomainGetPreservedMounts:
*
* Process list of mounted filesystems and:
* a) save all FSs mounted under /dev to @devPath
* b) generate backup path for all the entries in a)
*
* Any of the return pointers can be NULL. Both arrays are NULL-terminated.
*
* Returns 0 on success, -1 otherwise (with error reported)
*/
static int
qemuDomainGetPreservedMounts(virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm,
char ***devPath,
char ***devSavePath,
size_t *ndevPath)
{
g_auto(GStrv) mounts = NULL;
size_t nmounts = 0;
g_auto(GStrv) paths = NULL;
g_auto(GStrv) savePaths = NULL;
size_t i;
if (ndevPath)
*ndevPath = 0;
if (virFileGetMountSubtree(QEMU_PROC_MOUNTS, "/dev", &mounts, &nmounts) < 0)
return -1;
if (nmounts == 0)
return 0;
/* There can be nested mount points. For instance
* /dev/shm/blah can be a mount point and /dev/shm too. It
* doesn't make much sense to return the former path because
* caller preserves the latter (and with that the former
* too). Therefore prune nested mount points.
* NB mounts[0] is "/dev". Should we start the outer loop
* from the beginning of the array all we'd be left with is
* just the first element. Think about it.
*/
for (i = 1; i < nmounts; i++) {
size_t j = i + 1;
while (j < nmounts) {
char *c = STRSKIP(mounts[j], mounts[i]);
if (c && (*c == '/' || *c == '\0')) {
VIR_DEBUG("Dropping path %s because of %s", mounts[j], mounts[i]);
VIR_DELETE_ELEMENT(mounts, j, nmounts);
} else {
j++;
}
}
}
/* mounts may not be NULL-terminated at this point, but we convert it into
* 'paths' which is NULL-terminated */
paths = g_new0(char *, nmounts + 1);
for (i = 0; i < nmounts; i++)
paths[i] = g_steal_pointer(&mounts[i]);
if (devSavePath) {
savePaths = g_new0(char *, nmounts + 1);
for (i = 0; i < nmounts; i++) {
if (!(savePaths[i] = qemuDomainGetPreservedMountPath(cfg, vm, paths[i])))
return -1;
}
}
if (devPath)
*devPath = g_steal_pointer(&paths);
if (devSavePath)
*devSavePath = g_steal_pointer(&savePaths);
if (ndevPath)
*ndevPath = nmounts;
return 0;
}
static int
qemuDomainPopulateDevices(virQEMUDriverConfigPtr cfg,
GSList **paths)
{
const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
size_t i;
if (!devices)
devices = defaultDeviceACL;
for (i = 0; devices[i]; i++) {
*paths = g_slist_prepend(*paths, g_strdup(devices[i]));
}
return 0;
}
static int
qemuDomainSetupDev(virSecurityManagerPtr mgr,
virDomainObjPtr vm,
const char *path)
{
g_autofree char *mount_options = NULL;
g_autofree char *opts = NULL;
VIR_DEBUG("Setting up /dev/ for domain %s", vm->def->name);
mount_options = qemuSecurityGetMountOptions(mgr, vm->def);
if (!mount_options)
mount_options = g_strdup("");
/*
* tmpfs is limited to 64kb, since we only have device nodes in there
* and don't want to DOS the entire OS RAM usage
*/
opts = g_strdup_printf("mode=755,size=65536%s", mount_options);
if (virFileSetupDev(path, opts) < 0)
return -1;
return 0;
}
static int
qemuDomainSetupDisk(virStorageSourcePtr src,
GSList **paths)
{
virStorageSourcePtr next;
bool hasNVMe = false;
for (next = src; virStorageSourceIsBacking(next); next = next->backingStore) {
g_autofree char *tmpPath = NULL;
if (next->type == VIR_STORAGE_TYPE_NVME) {
hasNVMe = true;
if (!(tmpPath = virPCIDeviceAddressGetIOMMUGroupDev(&next->nvme->pciAddr)))
return -1;
} else {
g_auto(GStrv) targetPaths = NULL;
GStrv tmp;
if (virStorageSourceIsEmpty(next) ||
!virStorageSourceIsLocalStorage(next)) {
/* Not creating device. Just continue. */
continue;
}
tmpPath = g_strdup(next->path);
if (virDevMapperGetTargets(next->path, &targetPaths) < 0 &&
errno != ENOSYS) {
virReportSystemError(errno,
_("Unable to get devmapper targets for %s"),
next->path);
return -1;
}
if (targetPaths) {
for (tmp = targetPaths; *tmp; tmp++)
*paths = g_slist_prepend(*paths, g_steal_pointer(tmp));
}
}
*paths = g_slist_prepend(*paths, g_steal_pointer(&tmpPath));
}
/* qemu-pr-helper might require access to /dev/mapper/control. */
if (src->pr)
*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEVICE_MAPPER_CONTROL_PATH));
if (hasNVMe)
*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO));
return 0;
}
static int
qemuDomainSetupAllDisks(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up disks");
for (i = 0; i < vm->def->ndisks; i++) {
if (qemuDomainSetupDisk(vm->def->disks[i]->src,
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all disks");
return 0;
}
static int
qemuDomainSetupHostdev(virDomainObjPtr vm,
virDomainHostdevDefPtr hostdev,
bool hotplug,
GSList **paths)
{
g_autofree char *path = NULL;
if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0)
return -1;
if (path)
*paths = g_slist_prepend(*paths, g_steal_pointer(&path));
if (qemuHostdevNeedsVFIO(hostdev) &&
(!hotplug || !qemuDomainNeedsVFIO(vm->def)))
*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO));
return 0;
}
static int
qemuDomainSetupAllHostdevs(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up hostdevs");
for (i = 0; i < vm->def->nhostdevs; i++) {
if (qemuDomainSetupHostdev(vm,
vm->def->hostdevs[i],
false,
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all hostdevs");
return 0;
}
static int
qemuDomainSetupMemory(virDomainMemoryDefPtr mem,
GSList **paths)
{
if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM)
return 0;
*paths = g_slist_prepend(*paths, g_strdup(mem->nvdimmPath));
return 0;
}
static int
qemuDomainSetupAllMemories(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up memories");
for (i = 0; i < vm->def->nmems; i++) {
if (qemuDomainSetupMemory(vm->def->mems[i],
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all memories");
return 0;
}
static int
qemuDomainSetupChardev(virDomainDefPtr def G_GNUC_UNUSED,
virDomainChrDefPtr dev,
void *opaque)
{
GSList **paths = opaque;
const char *path = NULL;
if (!(path = virDomainChrSourceDefGetPath(dev->source)))
return 0;
/* Socket created by qemu. It doesn't exist upfront. */
if (dev->source->type == VIR_DOMAIN_CHR_TYPE_UNIX &&
dev->source->data.nix.listen)
return 0;
*paths = g_slist_prepend(*paths, g_strdup(path));
return 0;
}
static int
qemuDomainSetupAllChardevs(virDomainObjPtr vm,
GSList **paths)
{
VIR_DEBUG("Setting up chardevs");
if (virDomainChrDefForeach(vm->def,
true,
qemuDomainSetupChardev,
paths) < 0)
return -1;
VIR_DEBUG("Setup all chardevs");
return 0;
}
static int
qemuDomainSetupTPM(virDomainTPMDefPtr dev,
GSList **paths)
{
switch (dev->type) {
case VIR_DOMAIN_TPM_TYPE_PASSTHROUGH:
*paths = g_slist_prepend(*paths, g_strdup(dev->data.passthrough.source.data.file.path));
break;
case VIR_DOMAIN_TPM_TYPE_EMULATOR:
case VIR_DOMAIN_TPM_TYPE_LAST:
/* nada */
break;
}
return 0;
}
static int
qemuDomainSetupAllTPMs(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up TPMs");
for (i = 0; i < vm->def->ntpms; i++) {
if (qemuDomainSetupTPM(vm->def->tpms[i], paths) < 0)
return -1;
}
VIR_DEBUG("Setup all TPMs");
return 0;
}
static int
qemuDomainSetupGraphics(virDomainGraphicsDefPtr gfx,
GSList **paths)
{
const char *rendernode = virDomainGraphicsGetRenderNode(gfx);
if (!rendernode)
return 0;
*paths = g_slist_prepend(*paths, g_strdup(rendernode));
return 0;
}
static int
qemuDomainSetupAllGraphics(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up graphics");
for (i = 0; i < vm->def->ngraphics; i++) {
if (qemuDomainSetupGraphics(vm->def->graphics[i],
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all graphics");
return 0;
}
static int
qemuDomainSetupInput(virDomainInputDefPtr input,
GSList **paths)
{
const char *path = virDomainInputDefGetPath(input);
if (!path)
return 0;
*paths = g_slist_prepend(*paths, g_strdup(path));
return 0;
}
static int
qemuDomainSetupAllInputs(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up inputs");
for (i = 0; i < vm->def->ninputs; i++) {
if (qemuDomainSetupInput(vm->def->inputs[i],
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all inputs");
return 0;
}
static int
qemuDomainSetupRNG(virDomainRNGDefPtr rng,
GSList **paths)
{
switch ((virDomainRNGBackend) rng->backend) {
case VIR_DOMAIN_RNG_BACKEND_RANDOM:
*paths = g_slist_prepend(*paths, g_strdup(rng->source.file));
break;
case VIR_DOMAIN_RNG_BACKEND_EGD:
case VIR_DOMAIN_RNG_BACKEND_BUILTIN:
case VIR_DOMAIN_RNG_BACKEND_LAST:
/* nada */
break;
}
return 0;
}
static int
qemuDomainSetupAllRNGs(virDomainObjPtr vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up RNGs");
for (i = 0; i < vm->def->nrngs; i++) {
if (qemuDomainSetupRNG(vm->def->rngs[i],
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all RNGs");
return 0;
}
static int
qemuDomainSetupLoader(virDomainObjPtr vm,
GSList **paths)
{
virDomainLoaderDefPtr loader = vm->def->os.loader;
VIR_DEBUG("Setting up loader");
if (loader) {
switch ((virDomainLoader) loader->type) {
case VIR_DOMAIN_LOADER_TYPE_ROM:
*paths = g_slist_prepend(*paths, g_strdup(loader->path));
break;
case VIR_DOMAIN_LOADER_TYPE_PFLASH:
*paths = g_slist_prepend(*paths, g_strdup(loader->path));
if (loader->nvram)
*paths = g_slist_prepend(*paths, g_strdup(loader->nvram));
break;
case VIR_DOMAIN_LOADER_TYPE_NONE:
case VIR_DOMAIN_LOADER_TYPE_LAST:
break;
}
}
VIR_DEBUG("Setup loader");
return 0;
}
static int
qemuDomainSetupLaunchSecurity(virDomainObjPtr vm,
GSList **paths)
{
virDomainSEVDefPtr sev = vm->def->sev;
if (!sev || sev->sectype != VIR_DOMAIN_LAUNCH_SECURITY_SEV)
return 0;
VIR_DEBUG("Setting up launch security");
*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SEV));
VIR_DEBUG("Set up launch security");
return 0;
}
static int
qemuNamespaceMknodPaths(virDomainObjPtr vm,
GSList *paths);
int
qemuDomainBuildNamespace(virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
VIR_DEBUG("namespaces disabled for domain %s", vm->def->name);
return 0;
}
if (qemuDomainPopulateDevices(cfg, &paths) < 0)
return -1;
if (qemuDomainSetupAllDisks(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllHostdevs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllMemories(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllChardevs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllTPMs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllGraphics(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllInputs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllRNGs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupLoader(vm, &paths) < 0)
return -1;
if (qemuDomainSetupLaunchSecurity(vm, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainUnshareNamespace(virQEMUDriverConfigPtr cfg,
virSecurityManagerPtr mgr,
virDomainObjPtr vm)
{
const char *devPath = NULL;
char **devMountsPath = NULL, **devMountsSavePath = NULL;
size_t ndevMountsPath = 0, i;
int ret = -1;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
ret = 0;
goto cleanup;
}
if (qemuDomainGetPreservedMounts(cfg, vm,
&devMountsPath, &devMountsSavePath,
&ndevMountsPath) < 0)
goto cleanup;
for (i = 0; i < ndevMountsPath; i++) {
if (STREQ(devMountsPath[i], "/dev")) {
devPath = devMountsSavePath[i];
break;
}
}
if (!devPath) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find any /dev mount"));
goto cleanup;
}
if (virProcessSetupPrivateMountNS() < 0)
goto cleanup;
if (qemuDomainSetupDev(mgr, vm, devPath) < 0)
goto cleanup;
/* Save some mount points because we want to share them with the host */
for (i = 0; i < ndevMountsPath; i++) {
struct stat sb;
if (devMountsSavePath[i] == devPath)
continue;
if (stat(devMountsPath[i], &sb) < 0) {
virReportSystemError(errno,
_("Unable to stat: %s"),
devMountsPath[i]);
goto cleanup;
}
/* At this point, devMountsPath is either:
* a file (regular or special), or
* a directory. */
if ((S_ISDIR(sb.st_mode) && g_mkdir_with_parents(devMountsSavePath[i], 0777) < 0) ||
(!S_ISDIR(sb.st_mode) && virFileTouch(devMountsSavePath[i], sb.st_mode) < 0)) {
virReportSystemError(errno,
_("Failed to create %s"),
devMountsSavePath[i]);
goto cleanup;
}
if (virFileMoveMount(devMountsPath[i], devMountsSavePath[i]) < 0)
goto cleanup;
}
if (virFileMoveMount(devPath, "/dev") < 0)
goto cleanup;
for (i = 0; i < ndevMountsPath; i++) {
struct stat sb;
if (devMountsSavePath[i] == devPath)
continue;
if (stat(devMountsSavePath[i], &sb) < 0) {
virReportSystemError(errno,
_("Unable to stat: %s"),
devMountsSavePath[i]);
goto cleanup;
}
if (S_ISDIR(sb.st_mode)) {
if (g_mkdir_with_parents(devMountsPath[i], 0777) < 0) {
virReportSystemError(errno, _("Cannot create %s"),
devMountsPath[i]);
goto cleanup;
}
} else {
if (virFileMakeParentPath(devMountsPath[i]) < 0 ||
virFileTouch(devMountsPath[i], sb.st_mode) < 0) {
virReportSystemError(errno, _("Cannot create %s"),
devMountsPath[i]);
goto cleanup;
}
}
if (virFileMoveMount(devMountsSavePath[i], devMountsPath[i]) < 0)
goto cleanup;
}
ret = 0;
cleanup:
for (i = 0; i < ndevMountsPath; i++) {
#if defined(__linux__)
umount(devMountsSavePath[i]);
#endif /* defined(__linux__) */
/* The path can be either a regular file or a dir. */
if (virFileIsDir(devMountsSavePath[i]))
virFileDeleteTree(devMountsSavePath[i]);
else
unlink(devMountsSavePath[i]);
}
virStringListFreeCount(devMountsPath, ndevMountsPath);
virStringListFreeCount(devMountsSavePath, ndevMountsPath);
return ret;
}
bool
qemuDomainNamespaceEnabled(virDomainObjPtr vm,
qemuDomainNamespace ns)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
return priv->namespaces &&
virBitmapIsBitSet(priv->namespaces, ns);
}
int
qemuDomainEnableNamespace(virDomainObjPtr vm,
qemuDomainNamespace ns)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
if (!priv->namespaces)
priv->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST);
if (virBitmapSetBit(priv->namespaces, ns) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unable to enable namespace: %s"),
qemuDomainNamespaceTypeToString(ns));
return -1;
}
return 0;
}
static void
qemuDomainDisableNamespace(virDomainObjPtr vm,
qemuDomainNamespace ns)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
if (priv->namespaces) {
ignore_value(virBitmapClearBit(priv->namespaces, ns));
if (virBitmapIsAllClear(priv->namespaces)) {
virBitmapFree(priv->namespaces);
priv->namespaces = NULL;
}
}
}
void
qemuDomainDestroyNamespace(virQEMUDriverPtr driver G_GNUC_UNUSED,
virDomainObjPtr vm)
{
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
qemuDomainDisableNamespace(vm, QEMU_DOMAIN_NS_MOUNT);
}
bool
qemuDomainNamespaceAvailable(qemuDomainNamespace ns G_GNUC_UNUSED)
{
#if !defined(__linux__)
/* Namespaces are Linux specific. */
return false;
#else /* defined(__linux__) */
switch (ns) {
case QEMU_DOMAIN_NS_MOUNT:
# if !defined(WITH_SYS_ACL_H) || !defined(WITH_SELINUX)
/* We can't create the exact copy of paths if either of
* these is not available. */
return false;
# else
if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_MNT) < 0)
return false;
# endif
break;
case QEMU_DOMAIN_NS_LAST:
break;
}
return true;
#endif /* defined(__linux__) */
}
typedef struct _qemuNamespaceMknodItem qemuNamespaceMknodItem;
typedef qemuNamespaceMknodItem *qemuNamespaceMknodItemPtr;
struct _qemuNamespaceMknodItem {
char *file;
char *target;
bool bindmounted;
GStatBuf sb;
void *acl;
char *tcon;
};
typedef struct _qemuNamespaceMknodData qemuNamespaceMknodData;
typedef qemuNamespaceMknodData *qemuNamespaceMknodDataPtr;
struct _qemuNamespaceMknodData {
virQEMUDriverPtr driver;
virDomainObjPtr vm;
qemuNamespaceMknodItemPtr items;
size_t nitems;
};
static void
qemuNamespaceMknodItemClear(qemuNamespaceMknodItemPtr item)
{
VIR_FREE(item->file);
VIR_FREE(item->target);
virFileFreeACLs(&item->acl);
#ifdef WITH_SELINUX
freecon(item->tcon);
#endif
}
G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(qemuNamespaceMknodItem, qemuNamespaceMknodItemClear);
static void
qemuNamespaceMknodDataClear(qemuNamespaceMknodDataPtr data)
{
size_t i;
for (i = 0; i < data->nitems; i++) {
qemuNamespaceMknodItemPtr item = &data->items[i];
qemuNamespaceMknodItemClear(item);
}
VIR_FREE(data->items);
}
/* Our way of creating devices is highly linux specific */
#if defined(__linux__)
static int
qemuNamespaceMknodOne(qemuNamespaceMknodItemPtr data)
{
int ret = -1;
bool delDevice = false;
bool isLink = S_ISLNK(data->sb.st_mode);
bool isDev = S_ISCHR(data->sb.st_mode) || S_ISBLK(data->sb.st_mode);
bool isReg = S_ISREG(data->sb.st_mode) || S_ISFIFO(data->sb.st_mode) || S_ISSOCK(data->sb.st_mode);
bool isDir = S_ISDIR(data->sb.st_mode);
if (virFileMakeParentPath(data->file) < 0) {
virReportSystemError(errno,
_("Unable to create %s"), data->file);
goto cleanup;
}
if (isLink) {
VIR_DEBUG("Creating symlink %s -> %s", data->file, data->target);
/* First, unlink the symlink target. Symlinks change and
* therefore we have no guarantees that pre-existing
* symlink is still valid. */
if (unlink(data->file) < 0 &&
errno != ENOENT) {
virReportSystemError(errno,
_("Unable to remove symlink %s"),
data->file);
goto cleanup;
}
if (symlink(data->target, data->file) < 0) {
virReportSystemError(errno,
_("Unable to create symlink %s (pointing to %s)"),
data->file, data->target);
goto cleanup;
} else {
delDevice = true;
}
} else if (isDev) {
VIR_DEBUG("Creating dev %s (%d,%d)",
data->file, major(data->sb.st_rdev), minor(data->sb.st_rdev));
unlink(data->file);
if (mknod(data->file, data->sb.st_mode, data->sb.st_rdev) < 0) {
virReportSystemError(errno,
_("Unable to create device %s"),
data->file);
goto cleanup;
} else {
delDevice = true;
}
} else if (isReg || isDir) {
/* We are not cleaning up disks on virDomainDetachDevice
* because disk might be still in use by different disk
* as its backing chain. This might however clash here.
* Therefore do the cleanup here. */
if (umount(data->file) < 0 &&
errno != ENOENT && errno != EINVAL) {
virReportSystemError(errno,
_("Unable to umount %s"),
data->file);
goto cleanup;
}
if ((isReg && virFileTouch(data->file, data->sb.st_mode) < 0) ||
(isDir && g_mkdir_with_parents(data->file, data->sb.st_mode) < 0))
goto cleanup;
delDevice = true;
/* Just create the file here so that code below sets
* proper owner and mode. Move the mount only after that. */
} else {
virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
_("unsupported device type %s 0%o"),
data->file, data->sb.st_mode);
goto cleanup;
}
if (lchown(data->file, data->sb.st_uid, data->sb.st_gid) < 0) {
virReportSystemError(errno,
_("Failed to chown device %s"),
data->file);
goto cleanup;
}
/* Symlinks don't have mode */
if (!isLink &&
chmod(data->file, data->sb.st_mode) < 0) {
virReportSystemError(errno,
_("Failed to set permissions for device %s"),
data->file);
goto cleanup;
}
/* Symlinks don't have ACLs. */
if (!isLink &&
virFileSetACLs(data->file, data->acl) < 0 &&
errno != ENOTSUP) {
virReportSystemError(errno,
_("Unable to set ACLs on %s"), data->file);
goto cleanup;
}
# ifdef WITH_SELINUX
if (data->tcon &&
lsetfilecon_raw(data->file, (const char *)data->tcon) < 0) {
VIR_WARNINGS_NO_WLOGICALOP_EQUAL_EXPR
if (errno != EOPNOTSUPP && errno != ENOTSUP) {
VIR_WARNINGS_RESET
virReportSystemError(errno,
_("Unable to set SELinux label on %s"),
data->file);
goto cleanup;
}
}
# endif
/* Finish mount process started earlier. */
if ((isReg || isDir) &&
virFileMoveMount(data->target, data->file) < 0)
goto cleanup;
ret = 0;
cleanup:
if (ret < 0 && delDevice) {
if (isDir)
virFileDeleteTree(data->file);
else
unlink(data->file);
}
return ret;
}
static bool
qemuNamespaceMknodItemNeedsBindMount(mode_t st_mode)
{
/* A block device S_ISBLK() or a chardev S_ISCHR() is intentionally not
* handled. We want to mknod() it instead of passing in through bind
* mounting. */
return S_ISREG(st_mode) || S_ISFIFO(st_mode) ||
S_ISSOCK(st_mode) || S_ISDIR(st_mode);
}
static int
qemuNamespaceMknodHelper(pid_t pid G_GNUC_UNUSED,
void *opaque)
{
qemuNamespaceMknodDataPtr data = opaque;
size_t i;
int ret = -1;
qemuSecurityPostFork(data->driver->securityManager);
for (i = 0; i < data->nitems; i++) {
if (qemuNamespaceMknodOne(&data->items[i]) < 0)
goto cleanup;
}
ret = 0;
cleanup:
qemuNamespaceMknodDataClear(data);
return ret;
}
static int
qemuNamespaceMknodItemInit(qemuNamespaceMknodItemPtr item,
virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm,
const char *file)
{
g_autofree char *target = NULL;
bool isLink;
bool needsBindMount;
item->file = g_strdup(file);
if (g_lstat(file, &item->sb) < 0) {
if (errno == ENOENT)
return -2;
virReportSystemError(errno,
_("Unable to access %s"), file);
return -1;
}
isLink = S_ISLNK(item->sb.st_mode);
needsBindMount = qemuNamespaceMknodItemNeedsBindMount(item->sb.st_mode);
if (needsBindMount && STRPREFIX(file, QEMU_DEVPREFIX)) {
if (!(target = qemuDomainGetPreservedMountPath(cfg, vm, file)))
return -1;
item->target = g_steal_pointer(&target);
} else if (isLink) {
g_autoptr(GError) gerr = NULL;
if (!(target = g_file_read_link(file, &gerr))) {
virReportError(VIR_ERR_SYSTEM_ERROR,
_("failed to resolve symlink %s: %s"), file, gerr->message);
return -1;
}
if (!g_path_is_absolute(target)) {
g_autofree char *fileTmp = g_strdup(file);
char *c = NULL;
char *tmp = NULL;
if ((c = strrchr(fileTmp, '/')))
*(c + 1) = '\0';
tmp = g_strdup_printf("%s%s", fileTmp, target);
VIR_FREE(target);
target = g_steal_pointer(&tmp);
}
item->target = g_steal_pointer(&target);
}
/* Symlinks don't have ACLs. */
if (!isLink &&
virFileGetACLs(file, &item->acl) < 0 &&
errno != ENOTSUP) {
virReportSystemError(errno,
_("Unable to get ACLs on %s"), file);
return -1;
}
# ifdef WITH_SELINUX
if (lgetfilecon_raw(file, &item->tcon) < 0 &&
(errno != ENOTSUP && errno != ENODATA)) {
virReportSystemError(errno,
_("Unable to get SELinux label from %s"), file);
return -1;
}
# endif
return 0;
}
static int
qemuNamespacePrepareOneItem(qemuNamespaceMknodDataPtr data,
virQEMUDriverConfigPtr cfg,
virDomainObjPtr vm,
const char *file,
char * const *devMountsPath,
size_t ndevMountsPath)
{
long ttl = sysconf(_SC_SYMLOOP_MAX);
g_autofree char *next = g_strdup(file);
size_t i;
while (1) {
g_auto(qemuNamespaceMknodItem) item = { 0 };
bool isLink;
bool addToData = false;
int rc;
rc = qemuNamespaceMknodItemInit(&item, cfg, vm, next);
if (rc == -2) {
/* @file doesn't exist. We can break here. */
break;
} else if (rc < 0) {
/* Some other (critical) error. */
return -1;
}
isLink = S_ISLNK(item.sb.st_mode);
if (STRPREFIX(next, QEMU_DEVPREFIX)) {
for (i = 0; i < ndevMountsPath; i++) {
if (STREQ(devMountsPath[i], "/dev"))
continue;
if (STRPREFIX(next, devMountsPath[i]))
break;
}
if (i == ndevMountsPath)
addToData = true;
}
g_free(next);
next = g_strdup(item.target);
if (addToData &&
VIR_APPEND_ELEMENT(data->items, data->nitems, item) < 0)
return -1;
if (!isLink)
break;
if (ttl-- == 0) {
virReportSystemError(ELOOP,
_("Too many levels of symbolic links: %s"),
next);
return -1;
}
}
return 0;
}
static int
qemuNamespaceMknodPaths(virDomainObjPtr vm,
GSList *paths)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
virQEMUDriverPtr driver = priv->driver;
g_autoptr(virQEMUDriverConfig) cfg = NULL;
char **devMountsPath = NULL;
size_t ndevMountsPath = 0;
qemuNamespaceMknodData data = { 0 };
size_t i;
int ret = -1;
GSList *next;
if (!paths)
return 0;
cfg = virQEMUDriverGetConfig(driver);
if (qemuDomainGetPreservedMounts(cfg, vm,
&devMountsPath, NULL,
&ndevMountsPath) < 0)
return -1;
data.driver = driver;
data.vm = vm;
for (next = paths; next; next = next->next) {
const char *path = next->data;
if (qemuNamespacePrepareOneItem(&data, cfg, vm, path,
devMountsPath, ndevMountsPath) < 0)
goto cleanup;
}
for (i = 0; i < data.nitems; i++) {
qemuNamespaceMknodItemPtr item = &data.items[i];
if (item->target &&
qemuNamespaceMknodItemNeedsBindMount(item->sb.st_mode)) {
if (virFileBindMountDevice(item->file, item->target) < 0)
goto cleanup;
item->bindmounted = true;
}
}
if (qemuSecurityPreFork(driver->securityManager) < 0)
goto cleanup;
if (virProcessRunInMountNamespace(vm->pid,
qemuNamespaceMknodHelper,
&data) < 0) {
qemuSecurityPostFork(driver->securityManager);
goto cleanup;
}
qemuSecurityPostFork(driver->securityManager);
ret = 0;
cleanup:
for (i = 0; i < data.nitems; i++) {
if (data.items[i].bindmounted &&
umount(data.items[i].target) < 0) {
VIR_WARN("Unable to unmount %s", data.items[i].target);
}
}
qemuNamespaceMknodDataClear(&data);
virStringListFreeCount(devMountsPath, ndevMountsPath);
return ret;
}
#else /* !defined(__linux__) */
static int
qemuNamespaceMknodPaths(virDomainObjPtr vm G_GNUC_UNUSED,
GSList *paths G_GNUC_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Namespaces are not supported on this platform."));
return -1;
}
#endif /* !defined(__linux__) */
static int
qemuNamespaceUnlinkHelper(pid_t pid G_GNUC_UNUSED,
void *opaque)
{
g_autoptr(virGSListString) paths = opaque;
GSList *next;
for (next = paths; next; next = next->next) {
const char *path = next->data;
VIR_DEBUG("Unlinking %s", path);
if (unlink(path) < 0 && errno != ENOENT) {
virReportSystemError(errno,
_("Unable to remove device %s"), path);
return -1;
}
}
return 0;
}
static int
qemuNamespaceUnlinkPaths(virDomainObjPtr vm,
GSList *paths)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
virQEMUDriverPtr driver = priv->driver;
g_autoptr(virQEMUDriverConfig) cfg = NULL;
g_auto(GStrv) devMountsPath = NULL;
g_autoptr(virGSListString) unlinkPaths = NULL;
GSList *next;
if (!paths)
return 0;
cfg = virQEMUDriverGetConfig(driver);
if (qemuDomainGetPreservedMounts(cfg, vm, &devMountsPath, NULL, NULL) < 0)
return -1;
for (next = paths; next; next = next->next) {
const char *path = next->data;
if (STRPREFIX(path, QEMU_DEVPREFIX)) {
GStrv mount;
bool inSubmount = false;
for (mount = devMountsPath; *mount; mount++) {
if (STREQ(*mount, "/dev"))
continue;
if (STRPREFIX(path, *mount)) {
inSubmount = true;
break;
}
}
if (!inSubmount)
unlinkPaths = g_slist_prepend(unlinkPaths, g_strdup(path));
}
}
if (unlinkPaths &&
virProcessRunInMountNamespace(vm->pid,
qemuNamespaceUnlinkHelper,
unlinkPaths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceSetupDisk(virDomainObjPtr vm,
virStorageSourcePtr src)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupDisk(src, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceTeardownDisk(virDomainObjPtr vm G_GNUC_UNUSED,
virStorageSourcePtr src G_GNUC_UNUSED)
{
/* While in hotplug case we create the whole backing chain,
* here we must limit ourselves. The disk we want to remove
* might be a part of backing chain of another disk.
* If you are reading these lines and have some spare time
* you can come up with and algorithm that checks for that.
* I don't, therefore: */
return 0;
}
/**
* qemuDomainNamespaceSetupHostdev:
* @vm: domain object
* @hostdev: hostdev to create in @vm's namespace
*
* For given @hostdev, create its devfs representation (if it has one) in
* domain namespace. Note, @hostdev must not be in @vm's definition.
*
* Returns: 0 on success,
* -1 otherwise.
*/
int
qemuDomainNamespaceSetupHostdev(virDomainObjPtr vm,
virDomainHostdevDefPtr hostdev)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupHostdev(vm,
hostdev,
true,
&paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
/**
* qemuDomainNamespaceTeardownHostdev:
* @vm: domain object
* @hostdev: hostdev to remove in @vm's namespace
*
* For given @hostdev, remove its devfs representation (if it has one) in
* domain namespace. Note, @hostdev must not be in @vm's definition.
*
* Returns: 0 on success,
* -1 otherwise.
*/
int
qemuDomainNamespaceTeardownHostdev(virDomainObjPtr vm,
virDomainHostdevDefPtr hostdev)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupHostdev(vm,
hostdev,
true,
&paths) < 0)
return -1;
if (qemuNamespaceUnlinkPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceSetupMemory(virDomainObjPtr vm,
virDomainMemoryDefPtr mem)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupMemory(mem, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceTeardownMemory(virDomainObjPtr vm,
virDomainMemoryDefPtr mem)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupMemory(mem, &paths) < 0)
return -1;
if (qemuNamespaceUnlinkPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceSetupChardev(virDomainObjPtr vm,
virDomainChrDefPtr chr)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupChardev(vm->def, chr, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceTeardownChardev(virDomainObjPtr vm,
virDomainChrDefPtr chr)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupChardev(vm->def, chr, &paths) < 0)
return -1;
if (qemuNamespaceUnlinkPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceSetupRNG(virDomainObjPtr vm,
virDomainRNGDefPtr rng)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupRNG(rng, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceTeardownRNG(virDomainObjPtr vm,
virDomainRNGDefPtr rng)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupRNG(rng, &paths) < 0)
return -1;
if (qemuNamespaceUnlinkPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceSetupInput(virDomainObjPtr vm,
virDomainInputDefPtr input)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupInput(input, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths) < 0)
return -1;
return 0;
}
int
qemuDomainNamespaceTeardownInput(virDomainObjPtr vm,
virDomainInputDefPtr input)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
return 0;
if (qemuDomainSetupInput(input, &paths) < 0)
return -1;
if (qemuNamespaceUnlinkPaths(vm, paths) < 0)
return -1;
return 0;
}