/* * qemu_namespace.c: QEMU domain namespace helpers * * Copyright (C) 2006-2020 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . */ #include #ifdef __linux__ # include #endif #if defined(WITH_SYS_MOUNT_H) # include #endif #ifdef WITH_SELINUX # include #endif #include "qemu_namespace.h" #include "qemu_domain.h" #include "qemu_cgroup.h" #include "qemu_security.h" #include "qemu_hostdev.h" #include "viralloc.h" #include "virlog.h" #include "virdevmapper.h" #include "virglibutil.h" #define VIR_FROM_THIS VIR_FROM_QEMU VIR_LOG_INIT("qemu.qemu_domain"); VIR_ENUM_IMPL(qemuDomainNamespace, QEMU_DOMAIN_NS_LAST, "mount", ); /** * qemuDomainGetPreservedMountPath: * @cfg: driver configuration data * @vm: domain object * @mountpoint: mount point path to convert * * For given @mountpoint return new path where the mount point * should be moved temporarily whilst building the namespace. * * Returns: allocated string on success which the caller must free, * NULL on failure. */ static char * qemuDomainGetPreservedMountPath(virQEMUDriverConfig *cfg, virDomainObj *vm, const char *mountpoint) { char *path = NULL; char *tmp; const char *suffix = mountpoint + strlen(QEMU_DEVPREFIX); g_autofree char *domname = virDomainDefGetShortName(vm->def); size_t off; if (!domname) return NULL; if (STREQ(mountpoint, "/dev")) suffix = "dev"; path = g_strdup_printf("%s/%s.%s", cfg->stateDir, domname, suffix); /* Now consider that @mountpoint is "/dev/blah/blah2". * @suffix then points to "blah/blah2". However, caller * expects all the @paths to be the same depth. The * caller doesn't always do `mkdir -p` but sometimes bare * `touch`. Therefore fix all the suffixes. */ off = strlen(path) - strlen(suffix); tmp = path + off; while (*tmp) { if (*tmp == '/') *tmp = '.'; tmp++; } return path; } /** * qemuDomainGetPreservedMounts: * * Process list of mounted filesystems and: * a) save all FSs mounted under /dev to @devPath * b) generate backup path for all the entries in a) * * Any of the return pointers can be NULL. Both arrays are NULL-terminated. * Get the mount table either from @vm's PID (if running), or from the * namespace we're in (if @vm's not running). * * Returns 0 on success, -1 otherwise (with error reported) */ static int qemuDomainGetPreservedMounts(virQEMUDriverConfig *cfg, virDomainObj *vm, char ***devPath, char ***devSavePath, size_t *ndevPath) { g_auto(GStrv) mounts = NULL; size_t nmounts = 0; g_auto(GStrv) paths = NULL; g_auto(GStrv) savePaths = NULL; g_autofree char *mountsPath = NULL; size_t i; if (ndevPath) *ndevPath = 0; if (vm->pid > 0) mountsPath = g_strdup_printf("/proc/%lld/mounts", (long long) vm->pid); else mountsPath = g_strdup(QEMU_PROC_MOUNTS); if (virFileGetMountSubtree(mountsPath, "/dev", &mounts, &nmounts) < 0) return -1; if (nmounts == 0) return 0; /* There can be nested mount points. For instance * /dev/shm/blah can be a mount point and /dev/shm too. It * doesn't make much sense to return the former path because * caller preserves the latter (and with that the former * too). Therefore prune nested mount points. * NB mounts[0] is "/dev". Should we start the outer loop * from the beginning of the array all we'd be left with is * just the first element. Think about it. */ for (i = 1; i < nmounts; i++) { size_t j = i + 1; /* If we looked into mount table of already running VM, * we might have found /dev twice. Remove the other * occurrence as it would jeopardize the rest of the prune * algorithm. */ if (STREQ(mounts[i], "/dev")) { VIR_FREE(mounts[i]); VIR_DELETE_ELEMENT_INPLACE(mounts, i, nmounts); continue; } while (j < nmounts) { char *c = STRSKIP(mounts[j], mounts[i]); if (c && (*c == '/' || *c == '\0')) { VIR_DEBUG("Dropping path %s because of %s", mounts[j], mounts[i]); VIR_FREE(mounts[j]); VIR_DELETE_ELEMENT_INPLACE(mounts, j, nmounts); } else { j++; } } } /* mounts may not be NULL-terminated at this point, but we convert it into * 'paths' which is NULL-terminated */ paths = g_new0(char *, nmounts + 1); for (i = 0; i < nmounts; i++) paths[i] = g_steal_pointer(&mounts[i]); if (devSavePath) { savePaths = g_new0(char *, nmounts + 1); for (i = 0; i < nmounts; i++) { if (!(savePaths[i] = qemuDomainGetPreservedMountPath(cfg, vm, paths[i]))) return -1; } } if (devPath) *devPath = g_steal_pointer(&paths); if (devSavePath) *devSavePath = g_steal_pointer(&savePaths); if (ndevPath) *ndevPath = nmounts; return 0; } static int qemuDomainPopulateDevices(virQEMUDriverConfig *cfg, GSList **paths) { const char *const *devices = (const char *const *) cfg->cgroupDeviceACL; size_t i; if (!devices) devices = defaultDeviceACL; for (i = 0; devices[i]; i++) { *paths = g_slist_prepend(*paths, g_strdup(devices[i])); } return 0; } static int qemuDomainSetupDev(virSecurityManager *mgr, virDomainObj *vm, const char *path) { g_autofree char *mount_options = NULL; g_autofree char *opts = NULL; VIR_DEBUG("Setting up /dev/ for domain %s", vm->def->name); mount_options = qemuSecurityGetMountOptions(mgr, vm->def); if (!mount_options) mount_options = g_strdup(""); /* * tmpfs is limited to 64kb, since we only have device nodes in there * and don't want to DOS the entire OS RAM usage */ opts = g_strdup_printf("mode=755,size=65536%s", mount_options); if (virFileSetupDev(path, opts) < 0) return -1; return 0; } static int qemuDomainSetupDisk(virStorageSource *src, GSList **paths) { virStorageSource *next; bool hasNVMe = false; for (next = src; virStorageSourceIsBacking(next); next = next->backingStore) { g_autofree char *tmpPath = NULL; if (next->type == VIR_STORAGE_TYPE_NVME) { hasNVMe = true; if (!(tmpPath = virPCIDeviceAddressGetIOMMUGroupDev(&next->nvme->pciAddr))) return -1; } else { GSList *targetPaths = NULL; if (virStorageSourceIsEmpty(next) || !virStorageSourceIsLocalStorage(next)) { /* Not creating device. Just continue. */ continue; } tmpPath = g_strdup(next->path); if (virDevMapperGetTargets(next->path, &targetPaths) < 0 && errno != ENOSYS) { virReportSystemError(errno, _("Unable to get devmapper targets for %1$s"), next->path); return -1; } if (targetPaths) *paths = g_slist_concat(g_slist_reverse(targetPaths), *paths); } *paths = g_slist_prepend(*paths, g_steal_pointer(&tmpPath)); } /* qemu-pr-helper might require access to /dev/mapper/control. */ if (src->pr) *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEVICE_MAPPER_CONTROL_PATH)); if (hasNVMe) *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); return 0; } static int qemuDomainSetupAllDisks(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up disks"); for (i = 0; i < vm->def->ndisks; i++) { if (qemuDomainSetupDisk(vm->def->disks[i]->src, paths) < 0) return -1; } VIR_DEBUG("Setup all disks"); return 0; } static int qemuDomainSetupHostdev(virDomainObj *vm, virDomainHostdevDef *hostdev, bool hotplug, GSList **paths) { g_autofree char *path = NULL; if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) return -1; if (path) *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); if (qemuHostdevNeedsVFIO(hostdev) && (!hotplug || !qemuDomainNeedsVFIO(vm->def))) *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); return 0; } static int qemuDomainSetupAllHostdevs(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up hostdevs"); for (i = 0; i < vm->def->nhostdevs; i++) { if (qemuDomainSetupHostdev(vm, vm->def->hostdevs[i], false, paths) < 0) return -1; } VIR_DEBUG("Setup all hostdevs"); return 0; } static int qemuDomainSetupMemory(virDomainMemoryDef *mem, GSList **paths) { switch (mem->model) { case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: *paths = g_slist_prepend(*paths, g_strdup(mem->source.nvdimm.path)); break; case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM: *paths = g_slist_prepend(*paths, g_strdup(mem->source.virtio_pmem.path)); break; case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC: *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SGX_VEPVC)); *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SGX_PROVISION)); break; case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_DIMM: case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; } return 0; } static int qemuDomainSetupAllMemories(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up memories"); for (i = 0; i < vm->def->nmems; i++) { if (qemuDomainSetupMemory(vm->def->mems[i], paths) < 0) return -1; } VIR_DEBUG("Setup all memories"); return 0; } static int qemuDomainSetupChardev(virDomainDef *def G_GNUC_UNUSED, virDomainChrDef *dev, void *opaque) { GSList **paths = opaque; const char *path = NULL; if (!(path = virDomainChrSourceDefGetPath(dev->source))) return 0; /* Socket created by qemu. It doesn't exist upfront. */ if (dev->source->type == VIR_DOMAIN_CHR_TYPE_UNIX && dev->source->data.nix.listen) return 0; *paths = g_slist_prepend(*paths, g_strdup(path)); return 0; } static int qemuDomainSetupAllChardevs(virDomainObj *vm, GSList **paths) { VIR_DEBUG("Setting up chardevs"); if (virDomainChrDefForeach(vm->def, true, qemuDomainSetupChardev, paths) < 0) return -1; VIR_DEBUG("Setup all chardevs"); return 0; } static int qemuDomainSetupTPM(virDomainTPMDef *dev, GSList **paths) { switch (dev->type) { case VIR_DOMAIN_TPM_TYPE_PASSTHROUGH: *paths = g_slist_prepend(*paths, g_strdup(dev->data.passthrough.source->data.file.path)); break; case VIR_DOMAIN_TPM_TYPE_EMULATOR: case VIR_DOMAIN_TPM_TYPE_EXTERNAL: case VIR_DOMAIN_TPM_TYPE_LAST: /* nada */ break; } return 0; } static int qemuDomainSetupAllTPMs(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up TPMs"); for (i = 0; i < vm->def->ntpms; i++) { if (qemuDomainSetupTPM(vm->def->tpms[i], paths) < 0) return -1; } VIR_DEBUG("Setup all TPMs"); return 0; } static int qemuDomainSetupGraphics(virDomainGraphicsDef *gfx, GSList **paths) { const char *rendernode = virDomainGraphicsGetRenderNode(gfx); if (!rendernode) return 0; *paths = g_slist_prepend(*paths, g_strdup(rendernode)); return 0; } static int qemuDomainSetupAllGraphics(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up graphics"); for (i = 0; i < vm->def->ngraphics; i++) { if (qemuDomainSetupGraphics(vm->def->graphics[i], paths) < 0) return -1; } VIR_DEBUG("Setup all graphics"); return 0; } static int qemuDomainSetupAllVideos(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up video devices"); for (i = 0; i < vm->def->nvideos; i++) { virDomainVideoDef *video = vm->def->videos[i]; if (video->blob == VIR_TRISTATE_SWITCH_ON) { *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_UDMABUF)); break; } } return 0; } static int qemuDomainSetupInput(virDomainInputDef *input, GSList **paths) { const char *path = virDomainInputDefGetPath(input); if (!path) return 0; *paths = g_slist_prepend(*paths, g_strdup(path)); return 0; } static int qemuDomainSetupAllInputs(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up inputs"); for (i = 0; i < vm->def->ninputs; i++) { if (qemuDomainSetupInput(vm->def->inputs[i], paths) < 0) return -1; } VIR_DEBUG("Setup all inputs"); return 0; } static int qemuDomainSetupRNG(virDomainRNGDef *rng, GSList **paths) { switch (rng->backend) { case VIR_DOMAIN_RNG_BACKEND_RANDOM: *paths = g_slist_prepend(*paths, g_strdup(rng->source.file)); break; case VIR_DOMAIN_RNG_BACKEND_EGD: case VIR_DOMAIN_RNG_BACKEND_BUILTIN: case VIR_DOMAIN_RNG_BACKEND_LAST: /* nada */ break; } return 0; } static int qemuDomainSetupAllRNGs(virDomainObj *vm, GSList **paths) { size_t i; VIR_DEBUG("Setting up RNGs"); for (i = 0; i < vm->def->nrngs; i++) { if (qemuDomainSetupRNG(vm->def->rngs[i], paths) < 0) return -1; } VIR_DEBUG("Setup all RNGs"); return 0; } static int qemuDomainSetupLoader(virDomainObj *vm, GSList **paths) { virDomainLoaderDef *loader = vm->def->os.loader; VIR_DEBUG("Setting up loader"); if (loader) { switch ((virDomainLoader) loader->type) { case VIR_DOMAIN_LOADER_TYPE_ROM: *paths = g_slist_prepend(*paths, g_strdup(loader->path)); break; case VIR_DOMAIN_LOADER_TYPE_PFLASH: *paths = g_slist_prepend(*paths, g_strdup(loader->path)); if (loader->nvram && qemuDomainSetupDisk(loader->nvram, paths) < 0) return -1; break; case VIR_DOMAIN_LOADER_TYPE_NONE: case VIR_DOMAIN_LOADER_TYPE_LAST: break; } } VIR_DEBUG("Setup loader"); return 0; } static int qemuDomainSetupLaunchSecurity(virDomainObj *vm, GSList **paths) { virDomainSecDef *sec = vm->def->sec; if (!sec) return 0; switch ((virDomainLaunchSecurity) sec->sectype) { case VIR_DOMAIN_LAUNCH_SECURITY_SEV: VIR_DEBUG("Setting up launch security for SEV"); *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SEV)); VIR_DEBUG("Set up launch security for SEV"); break; case VIR_DOMAIN_LAUNCH_SECURITY_PV: break; case VIR_DOMAIN_LAUNCH_SECURITY_NONE: case VIR_DOMAIN_LAUNCH_SECURITY_LAST: virReportEnumRangeError(virDomainLaunchSecurity, sec->sectype); return -1; } return 0; } static int qemuNamespaceMknodPaths(virDomainObj *vm, GSList *paths, bool *created); int qemuDomainBuildNamespace(virQEMUDriverConfig *cfg, virDomainObj *vm) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) { VIR_DEBUG("namespaces disabled for domain %s", vm->def->name); return 0; } if (qemuDomainPopulateDevices(cfg, &paths) < 0) return -1; if (qemuDomainSetupAllDisks(vm, &paths) < 0) return -1; if (qemuDomainSetupAllHostdevs(vm, &paths) < 0) return -1; if (qemuDomainSetupAllMemories(vm, &paths) < 0) return -1; if (qemuDomainSetupAllChardevs(vm, &paths) < 0) return -1; if (qemuDomainSetupAllTPMs(vm, &paths) < 0) return -1; if (qemuDomainSetupAllGraphics(vm, &paths) < 0) return -1; if (qemuDomainSetupAllVideos(vm, &paths) < 0) return -1; if (qemuDomainSetupAllInputs(vm, &paths) < 0) return -1; if (qemuDomainSetupAllRNGs(vm, &paths) < 0) return -1; if (qemuDomainSetupLoader(vm, &paths) < 0) return -1; if (qemuDomainSetupLaunchSecurity(vm, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, NULL) < 0) return -1; return 0; } int qemuDomainUnshareNamespace(virQEMUDriverConfig *cfg, virSecurityManager *mgr, virDomainObj *vm) { const char *devPath = NULL; g_auto(GStrv) devMountsPath = NULL; g_auto(GStrv) devMountsSavePath = NULL; size_t ndevMountsPath = 0, i; int ret = -1; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) { ret = 0; goto cleanup; } if (qemuDomainGetPreservedMounts(cfg, vm, &devMountsPath, &devMountsSavePath, &ndevMountsPath) < 0) goto cleanup; for (i = 0; i < ndevMountsPath; i++) { if (STREQ(devMountsPath[i], "/dev")) { devPath = devMountsSavePath[i]; break; } } if (!devPath) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Unable to find any /dev mount")); goto cleanup; } if (virProcessSetupPrivateMountNS() < 0) goto cleanup; if (qemuDomainSetupDev(mgr, vm, devPath) < 0) goto cleanup; /* Save some mount points because we want to share them with the host */ for (i = 0; i < ndevMountsPath; i++) { struct stat sb; if (devMountsSavePath[i] == devPath) continue; if (stat(devMountsPath[i], &sb) < 0) { virReportSystemError(errno, _("Unable to stat: %1$s"), devMountsPath[i]); goto cleanup; } /* At this point, devMountsPath is either: * a file (regular or special), or * a directory. */ if ((S_ISDIR(sb.st_mode) && g_mkdir_with_parents(devMountsSavePath[i], 0777) < 0) || (!S_ISDIR(sb.st_mode) && virFileTouch(devMountsSavePath[i], sb.st_mode) < 0)) { virReportSystemError(errno, _("Failed to create %1$s"), devMountsSavePath[i]); goto cleanup; } if (virFileMoveMount(devMountsPath[i], devMountsSavePath[i]) < 0) goto cleanup; } #if defined(__linux__) if (umount2("/dev", MNT_DETACH) < 0) { virReportSystemError(errno, "%s", _("failed to umount devfs on /dev")); goto cleanup; } #endif /* !defined(__linux__) */ if (virFileMoveMount(devPath, "/dev") < 0) goto cleanup; for (i = 0; i < ndevMountsPath; i++) { struct stat sb; if (devMountsSavePath[i] == devPath) continue; if (stat(devMountsSavePath[i], &sb) < 0) { virReportSystemError(errno, _("Unable to stat: %1$s"), devMountsSavePath[i]); goto cleanup; } if (S_ISDIR(sb.st_mode)) { if (g_mkdir_with_parents(devMountsPath[i], 0777) < 0) { virReportSystemError(errno, _("Cannot create %1$s"), devMountsPath[i]); goto cleanup; } } else { if (virFileMakeParentPath(devMountsPath[i]) < 0 || virFileTouch(devMountsPath[i], sb.st_mode) < 0) { virReportSystemError(errno, _("Cannot create %1$s"), devMountsPath[i]); goto cleanup; } } if (virFileMoveMount(devMountsSavePath[i], devMountsPath[i]) < 0) goto cleanup; } ret = 0; cleanup: for (i = 0; i < ndevMountsPath; i++) { #if defined(__linux__) umount(devMountsSavePath[i]); #endif /* defined(__linux__) */ /* The path can be either a regular file or a dir. */ if (virFileIsDir(devMountsSavePath[i])) virFileDeleteTree(devMountsSavePath[i]); else unlink(devMountsSavePath[i]); } return ret; } bool qemuDomainNamespaceEnabled(virDomainObj *vm, qemuDomainNamespace ns) { qemuDomainObjPrivate *priv = vm->privateData; return priv->namespaces && virBitmapIsBitSet(priv->namespaces, ns); } int qemuDomainEnableNamespace(virDomainObj *vm, qemuDomainNamespace ns) { qemuDomainObjPrivate *priv = vm->privateData; if (!priv->namespaces) priv->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST); if (virBitmapSetBit(priv->namespaces, ns) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Unable to enable namespace: %1$s"), qemuDomainNamespaceTypeToString(ns)); return -1; } return 0; } static void qemuDomainDisableNamespace(virDomainObj *vm, qemuDomainNamespace ns) { qemuDomainObjPrivate *priv = vm->privateData; if (priv->namespaces) { ignore_value(virBitmapClearBit(priv->namespaces, ns)); if (virBitmapIsAllClear(priv->namespaces)) { g_clear_pointer(&priv->namespaces, virBitmapFree); } } } void qemuDomainDestroyNamespace(virQEMUDriver *driver G_GNUC_UNUSED, virDomainObj *vm) { if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) qemuDomainDisableNamespace(vm, QEMU_DOMAIN_NS_MOUNT); } bool qemuDomainNamespaceAvailable(qemuDomainNamespace ns G_GNUC_UNUSED) { #if !defined(__linux__) /* Namespaces are Linux specific. */ return false; #else /* defined(__linux__) */ switch (ns) { case QEMU_DOMAIN_NS_MOUNT: # if !defined(WITH_LIBACL) || !defined(WITH_SELINUX) /* We can't create the exact copy of paths if either of * these is not available. */ return false; # else if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_MNT) < 0) return false; # endif break; case QEMU_DOMAIN_NS_LAST: break; } return true; #endif /* defined(__linux__) */ } typedef struct _qemuNamespaceMknodItem qemuNamespaceMknodItem; struct _qemuNamespaceMknodItem { char *file; char *target; bool bindmounted; GStatBuf sb; void *acl; char *tcon; }; typedef struct _qemuNamespaceMknodData qemuNamespaceMknodData; struct _qemuNamespaceMknodData { virQEMUDriver *driver; virDomainObj *vm; qemuNamespaceMknodItem *items; size_t nitems; }; static void qemuNamespaceMknodItemClear(qemuNamespaceMknodItem *item) { VIR_FREE(item->file); VIR_FREE(item->target); virFileFreeACLs(&item->acl); #ifdef WITH_SELINUX freecon(item->tcon); #endif } G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(qemuNamespaceMknodItem, qemuNamespaceMknodItemClear); static void qemuNamespaceMknodDataClear(qemuNamespaceMknodData *data) { size_t i; for (i = 0; i < data->nitems; i++) { qemuNamespaceMknodItem *item = &data->items[i]; qemuNamespaceMknodItemClear(item); } VIR_FREE(data->items); } /* Our way of creating devices is highly linux specific */ #if defined(__linux__) static int qemuNamespaceMknodOne(qemuNamespaceMknodItem *data) { int ret = -1; bool delDevice = false; bool isLink = S_ISLNK(data->sb.st_mode); bool isDev = S_ISCHR(data->sb.st_mode) || S_ISBLK(data->sb.st_mode); bool isReg = S_ISREG(data->sb.st_mode) || S_ISFIFO(data->sb.st_mode) || S_ISSOCK(data->sb.st_mode); bool isDir = S_ISDIR(data->sb.st_mode); bool exists = false; if (virFileExists(data->file)) exists = true; if (virFileMakeParentPath(data->file) < 0) { virReportSystemError(errno, _("Unable to create %1$s"), data->file); goto cleanup; } if (isLink) { g_autofree char *target = NULL; if ((target = g_file_read_link(data->file, NULL)) && STREQ(target, data->target)) { VIR_DEBUG("Skipping symlink %s -> %s which exists and points to correct target", data->file, data->target); } else { VIR_DEBUG("Creating symlink %s -> %s", data->file, data->target); /* First, unlink the symlink target. Symlinks change and * therefore we have no guarantees that pre-existing * symlink is still valid. */ if (unlink(data->file) < 0 && errno != ENOENT) { virReportSystemError(errno, _("Unable to remove symlink %1$s"), data->file); goto cleanup; } if (symlink(data->target, data->file) < 0) { virReportSystemError(errno, _("Unable to create symlink %1$s (pointing to %2$s)"), data->file, data->target); goto cleanup; } else { delDevice = true; } } } else if (isDev) { GStatBuf sb; if (g_lstat(data->file, &sb) >= 0 && sb.st_rdev == data->sb.st_rdev) { VIR_DEBUG("Skipping dev %s (%d,%d) which exists and has correct MAJ:MIN", data->file, major(data->sb.st_rdev), minor(data->sb.st_rdev)); } else { VIR_DEBUG("Creating dev %s (%d,%d)", data->file, major(data->sb.st_rdev), minor(data->sb.st_rdev)); unlink(data->file); if (mknod(data->file, data->sb.st_mode, data->sb.st_rdev) < 0) { virReportSystemError(errno, _("Unable to create device %1$s"), data->file); goto cleanup; } else { delDevice = true; } } } else if (isReg || isDir) { /* We are not cleaning up disks on virDomainDetachDevice * because disk might be still in use by different disk * as its backing chain. This might however clash here. * Therefore do the cleanup here. */ if (umount(data->file) < 0 && errno != ENOENT && errno != EINVAL) { virReportSystemError(errno, _("Unable to umount %1$s"), data->file); goto cleanup; } if ((isReg && virFileTouch(data->file, data->sb.st_mode) < 0) || (isDir && g_mkdir_with_parents(data->file, data->sb.st_mode) < 0)) goto cleanup; delDevice = true; /* Just create the file here so that code below sets * proper owner and mode. Move the mount only after that. */ } else { virReportError(VIR_ERR_OPERATION_UNSUPPORTED, _("unsupported device type %1$s 0%2$o"), data->file, data->sb.st_mode); goto cleanup; } if (lchown(data->file, data->sb.st_uid, data->sb.st_gid) < 0) { virReportSystemError(errno, _("Failed to chown device %1$s"), data->file); goto cleanup; } /* Symlinks don't have mode */ if (!isLink && chmod(data->file, data->sb.st_mode) < 0) { virReportSystemError(errno, _("Failed to set permissions for device %1$s"), data->file); goto cleanup; } if (data->acl && virFileSetACLs(data->file, data->acl) < 0 && errno != ENOTSUP) { virReportSystemError(errno, _("Unable to set ACLs on %1$s"), data->file); goto cleanup; } # ifdef WITH_SELINUX if (data->tcon && lsetfilecon_raw(data->file, (const char *)data->tcon) < 0) { VIR_WARNINGS_NO_WLOGICALOP_EQUAL_EXPR if (errno != EOPNOTSUPP && errno != ENOTSUP) { VIR_WARNINGS_RESET virReportSystemError(errno, _("Unable to set SELinux label on %1$s"), data->file); goto cleanup; } } # endif /* Finish mount process started earlier. */ if ((isReg || isDir) && virFileMoveMount(data->target, data->file) < 0) goto cleanup; ret = exists; cleanup: if (ret < 0 && delDevice) { if (isDir) virFileDeleteTree(data->file); else unlink(data->file); } return ret; } static bool qemuNamespaceMknodItemNeedsBindMount(mode_t st_mode) { /* A block device S_ISBLK() or a chardev S_ISCHR() is intentionally not * handled. We want to mknod() it instead of passing in through bind * mounting. */ return S_ISREG(st_mode) || S_ISFIFO(st_mode) || S_ISSOCK(st_mode) || S_ISDIR(st_mode); } static int qemuNamespaceMknodHelper(pid_t pid G_GNUC_UNUSED, void *opaque) { qemuNamespaceMknodData *data = opaque; size_t i; int ret = -1; bool exists = false; qemuSecurityPostFork(data->driver->securityManager); for (i = 0; i < data->nitems; i++) { int rc = 0; if ((rc = qemuNamespaceMknodOne(&data->items[i])) < 0) goto cleanup; if (rc > 0) exists = true; } ret = exists; cleanup: qemuNamespaceMknodDataClear(data); return ret; } static int qemuNamespaceMknodItemInit(qemuNamespaceMknodItem *item, virQEMUDriverConfig *cfg, virDomainObj *vm, const char *file) { g_autofree char *target = NULL; bool isLink; bool needsBindMount; item->file = g_strdup(file); if (g_lstat(file, &item->sb) < 0) { if (errno == ENOENT) return -2; virReportSystemError(errno, _("Unable to access %1$s"), file); return -1; } isLink = S_ISLNK(item->sb.st_mode); needsBindMount = qemuNamespaceMknodItemNeedsBindMount(item->sb.st_mode); if (needsBindMount && STRPREFIX(file, QEMU_DEVPREFIX)) { if (!(target = qemuDomainGetPreservedMountPath(cfg, vm, file))) return -1; item->target = g_steal_pointer(&target); } else if (isLink) { g_autoptr(GError) gerr = NULL; if (!(target = g_file_read_link(file, &gerr))) { virReportError(VIR_ERR_SYSTEM_ERROR, _("failed to resolve symlink %1$s: %2$s"), file, gerr->message); return -1; } if (!g_path_is_absolute(target)) { g_autofree char *fileTmp = g_strdup(file); char *c = NULL; char *tmp = NULL; if ((c = strrchr(fileTmp, '/'))) *(c + 1) = '\0'; tmp = g_strdup_printf("%s%s", fileTmp, target); VIR_FREE(target); target = g_steal_pointer(&tmp); } item->target = g_steal_pointer(&target); } /* Symlinks don't have ACLs. */ if (!isLink && virFileGetACLs(file, &item->acl) < 0 && errno != ENOTSUP) { virReportSystemError(errno, _("Unable to get ACLs on %1$s"), file); return -1; } # ifdef WITH_SELINUX if (lgetfilecon_raw(file, &item->tcon) < 0 && (errno != ENOTSUP && errno != ENODATA)) { virReportSystemError(errno, _("Unable to get SELinux label from %1$s"), file); return -1; } # endif return 0; } static int qemuNamespacePrepareOneItem(qemuNamespaceMknodData *data, virQEMUDriverConfig *cfg, virDomainObj *vm, const char *file, GStrv devMountsPath) { long ttl = sysconf(_SC_SYMLOOP_MAX); g_autofree char *next = g_strdup(file); while (1) { g_auto(qemuNamespaceMknodItem) item = { 0 }; bool isLink; int rc; rc = qemuNamespaceMknodItemInit(&item, cfg, vm, next); if (rc == -2) { /* @file doesn't exist. We can break here. */ break; } else if (rc < 0) { /* Some other (critical) error. */ return -1; } isLink = S_ISLNK(item.sb.st_mode); g_free(next); next = g_strdup(item.target); if (STRPREFIX(item.file, QEMU_DEVPREFIX)) { GStrv n; bool found = false; for (n = devMountsPath; n && *n; n++) { const char *p; if (STREQ(*n, "/dev")) continue; if ((p = STRSKIP(item.file, *n)) && *p == '/') { found = true; break; } } if (!found) VIR_APPEND_ELEMENT(data->items, data->nitems, item); } if (!isLink) break; if (ttl-- == 0) { virReportSystemError(ELOOP, _("Too many levels of symbolic links: %1$s"), next); return -1; } } return 0; } static int qemuNamespaceMknodPaths(virDomainObj *vm, GSList *paths, bool *created) { qemuDomainObjPrivate *priv = vm->privateData; virQEMUDriver *driver = priv->driver; g_autoptr(virQEMUDriverConfig) cfg = NULL; g_auto(GStrv) devMountsPath = NULL; qemuNamespaceMknodData data = { 0 }; size_t i; int ret = -1; GSList *next; if (!paths) return 0; cfg = virQEMUDriverGetConfig(driver); if (qemuDomainGetPreservedMounts(cfg, vm, &devMountsPath, NULL, NULL) < 0) return -1; data.driver = driver; data.vm = vm; for (next = paths; next; next = next->next) { const char *path = next->data; if (qemuNamespacePrepareOneItem(&data, cfg, vm, path, devMountsPath) < 0) goto cleanup; } if (data.nitems == 0) return 0; for (i = 0; i < data.nitems; i++) { qemuNamespaceMknodItem *item = &data.items[i]; if (item->target && qemuNamespaceMknodItemNeedsBindMount(item->sb.st_mode)) { if (virFileBindMountDevice(item->file, item->target) < 0) goto cleanup; item->bindmounted = true; } } if (qemuSecurityPreFork(driver->securityManager) < 0) goto cleanup; ret = virProcessRunInMountNamespace(vm->pid, qemuNamespaceMknodHelper, &data); qemuSecurityPostFork(driver->securityManager); if (ret == 0 && created != NULL) *created = true; cleanup: for (i = 0; i < data.nitems; i++) { if (data.items[i].bindmounted && umount(data.items[i].target) < 0) { VIR_WARN("Unable to unmount %s", data.items[i].target); } } qemuNamespaceMknodDataClear(&data); return ret; } #else /* !defined(__linux__) */ static int qemuNamespaceMknodPaths(virDomainObj *vm G_GNUC_UNUSED, GSList *paths G_GNUC_UNUSED, bool *created G_GNUC_UNUSED) { virReportSystemError(ENOSYS, "%s", _("Namespaces are not supported on this platform")); return -1; } #endif /* !defined(__linux__) */ static int qemuNamespaceUnlinkHelper(pid_t pid G_GNUC_UNUSED, void *opaque) { g_autoptr(virGSListString) paths = opaque; GSList *next; for (next = paths; next; next = next->next) { const char *path = next->data; VIR_DEBUG("Unlinking %s", path); if (unlink(path) < 0 && errno != ENOENT) { virReportSystemError(errno, _("Unable to remove device %1$s"), path); return -1; } } return 0; } static int qemuNamespaceUnlinkPaths(virDomainObj *vm, GSList *paths) { qemuDomainObjPrivate *priv = vm->privateData; virQEMUDriver *driver = priv->driver; g_autoptr(virQEMUDriverConfig) cfg = NULL; g_auto(GStrv) devMountsPath = NULL; g_autoptr(virGSListString) unlinkPaths = NULL; GSList *next; if (!paths) return 0; cfg = virQEMUDriverGetConfig(driver); if (qemuDomainGetPreservedMounts(cfg, vm, &devMountsPath, NULL, NULL) < 0) return -1; for (next = paths; next; next = next->next) { const char *path = next->data; if (STRPREFIX(path, QEMU_DEVPREFIX)) { GStrv mount; bool inSubmount = false; const char *const *devices = (const char *const *)cfg->cgroupDeviceACL; for (mount = devMountsPath; *mount; mount++) { if (STREQ(*mount, "/dev")) continue; if (STRPREFIX(path, *mount)) { inSubmount = true; break; } } if (inSubmount) continue; if (!devices) devices = defaultDeviceACL; if (g_strv_contains(devices, path)) continue; unlinkPaths = g_slist_prepend(unlinkPaths, g_strdup(path)); } } if (unlinkPaths && virProcessRunInMountNamespace(vm->pid, qemuNamespaceUnlinkHelper, unlinkPaths) < 0) return -1; return 0; } int qemuDomainNamespaceSetupPath(virDomainObj *vm, const char *path, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; paths = g_slist_prepend(paths, g_strdup(path)); if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } int qemuDomainNamespaceSetupDisk(virDomainObj *vm, virStorageSource *src, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupDisk(src, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } int qemuDomainNamespaceTeardownDisk(virDomainObj *vm G_GNUC_UNUSED, virStorageSource *src G_GNUC_UNUSED) { /* While in hotplug case we create the whole backing chain, * here we must limit ourselves. The disk we want to remove * might be a part of backing chain of another disk. * If you are reading these lines and have some spare time * you can come up with and algorithm that checks for that. * I don't, therefore: */ return 0; } /** * qemuDomainNamespaceSetupHostdev: * @vm: domain object * @hostdev: hostdev to create in @vm's namespace * * For given @hostdev, create its devfs representation (if it has one) in * domain namespace. Note, @hostdev must not be in @vm's definition. * * Returns: 0 on success, * -1 otherwise. */ int qemuDomainNamespaceSetupHostdev(virDomainObj *vm, virDomainHostdevDef *hostdev, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupHostdev(vm, hostdev, true, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } /** * qemuDomainNamespaceTeardownHostdev: * @vm: domain object * @hostdev: hostdev to remove in @vm's namespace * * For given @hostdev, remove its devfs representation (if it has one) in * domain namespace. Note, @hostdev must not be in @vm's definition. * * Returns: 0 on success, * -1 otherwise. */ int qemuDomainNamespaceTeardownHostdev(virDomainObj *vm, virDomainHostdevDef *hostdev) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupHostdev(vm, hostdev, true, &paths) < 0) return -1; if (qemuNamespaceUnlinkPaths(vm, paths) < 0) return -1; return 0; } int qemuDomainNamespaceSetupMemory(virDomainObj *vm, virDomainMemoryDef *mem, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupMemory(mem, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } int qemuDomainNamespaceTeardownMemory(virDomainObj *vm, virDomainMemoryDef *mem) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupMemory(mem, &paths) < 0) return -1; if (qemuNamespaceUnlinkPaths(vm, paths) < 0) return -1; return 0; } int qemuDomainNamespaceSetupChardev(virDomainObj *vm, virDomainChrDef *chr, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupChardev(vm->def, chr, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } int qemuDomainNamespaceTeardownChardev(virDomainObj *vm, virDomainChrDef *chr) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupChardev(vm->def, chr, &paths) < 0) return -1; if (qemuNamespaceUnlinkPaths(vm, paths) < 0) return -1; return 0; } int qemuDomainNamespaceSetupRNG(virDomainObj *vm, virDomainRNGDef *rng, bool *created) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupRNG(rng, &paths) < 0) return -1; if (qemuNamespaceMknodPaths(vm, paths, created) < 0) return -1; return 0; } int qemuDomainNamespaceTeardownRNG(virDomainObj *vm, virDomainRNGDef *rng) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupRNG(rng, &paths) < 0) return -1; if (qemuNamespaceUnlinkPaths(vm, paths) < 0) return -1; return 0; } int qemuDomainNamespaceSetupInput(virDomainObj *vm, virDomainInputDef *input, bool *created) { g_autoptr(virGSListString) paths = NULL; int ret = 0; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupInput(input, &paths) < 0) return -1; if ((ret = qemuNamespaceMknodPaths(vm, paths, created)) < 0) return -1; return 0; } int qemuDomainNamespaceTeardownInput(virDomainObj *vm, virDomainInputDef *input) { g_autoptr(virGSListString) paths = NULL; if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) return 0; if (qemuDomainSetupInput(input, &paths) < 0) return -1; if (qemuNamespaceUnlinkPaths(vm, paths) < 0) return -1; return 0; }