libvirt/src/util/virfile.c

3339 lines
86 KiB
C
Raw Normal View History

/*
* virfile.c: safer file handling
*
* Copyright (C) 2010-2014 Red Hat, Inc.
* Copyright (C) 2010 IBM Corporation
* Copyright (C) 2010 Stefan Berger
* Copyright (C) 2010 Eric Blake
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
#include <config.h>
#include "internal.h"
#include <passfd.h>
#include <fcntl.h>
#include <pty.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <dirent.h>
#include <dirname.h>
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
# include <mntent.h>
#endif
#include <stdlib.h>
#if HAVE_MMAP
# include <sys/mman.h>
#endif
#if HAVE_SYS_SYSCALL_H
# include <sys/syscall.h>
#endif
#ifdef __linux__
# if HAVE_LINUX_MAGIC_H
# include <linux/magic.h>
# endif
# include <sys/statfs.h>
#endif
#if defined(__linux__) && HAVE_DECL_LO_FLAGS_AUTOCLEAR
# include <linux/loop.h>
# include <sys/ioctl.h>
#endif
#include "configmake.h"
2012-12-12 18:06:53 +00:00
#include "viralloc.h"
#include "vircommand.h"
#include "virerror.h"
#include "virfile.h"
#include "virkmod.h"
2012-12-12 17:59:27 +00:00
#include "virlog.h"
#include "virprocess.h"
#include "virstring.h"
#include "virstoragefile.h"
#include "virutil.h"
#include "c-ctype.h"
#define VIR_FROM_THIS VIR_FROM_NONE
VIR_LOG_INIT("util.file");
int virFileClose(int *fdptr, virFileCloseFlags flags)
{
int saved_errno = 0;
int rc = 0;
if (*fdptr < 0)
return 0;
if (flags & VIR_FILE_CLOSE_PRESERVE_ERRNO)
saved_errno = errno;
rc = close(*fdptr);
if (!(flags & VIR_FILE_CLOSE_DONT_LOG)) {
if (rc < 0) {
if (errno == EBADF) {
if (!(flags & VIR_FILE_CLOSE_IGNORE_EBADF))
VIR_WARN("Tried to close invalid fd %d", *fdptr);
} else {
char ebuf[1024] ATTRIBUTE_UNUSED;
VIR_DEBUG("Failed to close fd %d: %s",
*fdptr, virStrerror(errno, ebuf, sizeof(ebuf)));
}
} else {
VIR_DEBUG("Closed fd %d", *fdptr);
}
}
*fdptr = -1;
if (flags & VIR_FILE_CLOSE_PRESERVE_ERRNO)
errno = saved_errno;
return rc;
}
int virFileFclose(FILE **file, bool preserve_errno)
{
int saved_errno = 0;
int rc = 0;
if (*file) {
if (preserve_errno)
saved_errno = errno;
rc = fclose(*file);
*file = NULL;
if (preserve_errno)
errno = saved_errno;
}
return rc;
}
FILE *virFileFdopen(int *fdptr, const char *mode)
{
FILE *file = NULL;
if (*fdptr >= 0) {
file = fdopen(*fdptr, mode);
if (file)
*fdptr = -1;
} else {
errno = EBADF;
}
return file;
}
/**
* virFileDirectFdFlag:
*
* Returns 0 if the kernel can avoid file system cache pollution
* without any additional flags, O_DIRECT if the original fd must be
* opened in direct mode, or -1 if there is no support for bypassing
* the file system cache.
*/
int
virFileDirectFdFlag(void)
{
/* XXX For now, Linux posix_fadvise is not powerful enough to
* avoid O_DIRECT. */
return O_DIRECT ? O_DIRECT : -1;
}
/* Opaque type for managing a wrapper around a fd. For now,
* read-write is not supported, just a single direction. */
struct _virFileWrapperFd {
virCommandPtr cmd; /* Child iohelper process to do the I/O. */
char *err_msg; /* stderr of @cmd */
};
#ifndef WIN32
/**
* virFileWrapperFdNew:
* @fd: pointer to fd to wrap
* @name: name of fd, for diagnostics
* @flags: bitwise-OR of virFileWrapperFdFlags
*
* Update @fd so that it meets parameters requested by @flags.
*
* If VIR_FILE_WRAPPER_BYPASS_CACHE bit is set in @flags, @fd will be updated
* in a way that all I/O to that file will bypass the system cache. The
* original fd must have been created with virFileDirectFdFlag() among the
* flags to open().
*
* If VIR_FILE_WRAPPER_NON_BLOCKING bit is set in @flags, @fd will be updated
* to ensure it properly supports non-blocking I/O, i.e., it will report
* EAGAIN.
*
* This must be called after open() and optional fchown() or fchmod(), but
* before any seek or I/O, and only on seekable fd. The file must be O_RDONLY
* (to read the entire existing file) or O_WRONLY (to write to an empty file).
* In some cases, @fd is changed to a non-seekable pipe; in this case, the
* caller must not do anything further with the original fd.
*
* On success, the new wrapper object is returned, which must be later
* freed with virFileWrapperFdFree(). On failure, @fd is unchanged, an
* error message is output, and NULL is returned.
*/
virFileWrapperFdPtr
virFileWrapperFdNew(int *fd, const char *name, unsigned int flags)
{
virFileWrapperFdPtr ret = NULL;
bool output = false;
int pipefd[2] = { -1, -1 };
int mode = -1;
char *iohelper_path = NULL;
if (!flags) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("invalid use with no flags"));
return NULL;
}
/* XXX support posix_fadvise rather than O_DIRECT, if the kernel support
* for that is decent enough. In that case, we will also need to
* explicitly support VIR_FILE_WRAPPER_NON_BLOCKING since
* VIR_FILE_WRAPPER_BYPASS_CACHE alone will no longer require spawning
* iohelper.
*/
if ((flags & VIR_FILE_WRAPPER_BYPASS_CACHE) && !O_DIRECT) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("O_DIRECT unsupported on this platform"));
return NULL;
}
if (VIR_ALLOC(ret) < 0)
return NULL;
mode = fcntl(*fd, F_GETFL);
if (mode < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, _("invalid fd %d for %s"),
*fd, name);
goto error;
} else if ((mode & O_ACCMODE) == O_WRONLY) {
output = true;
} else if ((mode & O_ACCMODE) != O_RDONLY) {
virReportError(VIR_ERR_INTERNAL_ERROR, _("unexpected mode %x for %s"),
mode & O_ACCMODE, name);
goto error;
}
if (pipe2(pipefd, O_CLOEXEC) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unable to create pipe for %s"), name);
goto error;
}
if (!(iohelper_path = virFileFindResource("libvirt_iohelper",
abs_topbuilddir "/src",
LIBEXECDIR)))
goto error;
ret->cmd = virCommandNewArgList(iohelper_path, name, "0", NULL);
VIR_FREE(iohelper_path);
if (output) {
virCommandSetInputFD(ret->cmd, pipefd[0]);
virCommandSetOutputFD(ret->cmd, fd);
virCommandAddArg(ret->cmd, "1");
} else {
virCommandSetInputFD(ret->cmd, *fd);
virCommandSetOutputFD(ret->cmd, &pipefd[1]);
virCommandAddArg(ret->cmd, "0");
}
/* In order to catch iohelper stderr, we must change
* iohelper's env so virLog functions print to stderr
*/
virCommandAddEnvPair(ret->cmd, "LIBVIRT_LOG_OUTPUTS", "1:stderr");
virCommandSetErrorBuffer(ret->cmd, &ret->err_msg);
virCommandDoAsyncIO(ret->cmd);
if (virCommandRunAsync(ret->cmd, NULL) < 0)
goto error;
if (VIR_CLOSE(pipefd[!output]) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unable to close pipe"));
goto error;
}
VIR_FORCE_CLOSE(*fd);
*fd = pipefd[output];
return ret;
error:
VIR_FREE(iohelper_path);
VIR_FORCE_CLOSE(pipefd[0]);
VIR_FORCE_CLOSE(pipefd[1]);
virFileWrapperFdFree(ret);
return NULL;
}
#else
virFileWrapperFdPtr
virFileWrapperFdNew(int *fd ATTRIBUTE_UNUSED,
const char *name ATTRIBUTE_UNUSED,
unsigned int fdflags ATTRIBUTE_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("virFileWrapperFd unsupported on this platform"));
return NULL;
}
#endif
/**
* virFileWrapperFdClose:
* @wfd: fd wrapper, or NULL
*
* If @wfd is valid, then ensure that I/O has completed, which may
* include reaping a child process. Return 0 if all data for the
* wrapped fd is complete, or -1 on failure with an error emitted.
* This function intentionally returns 0 when @wfd is NULL, so that
* callers can conditionally create a virFileWrapperFd wrapper but
* unconditionally call the cleanup code. To avoid deadlock, only
* call this after closing the fd resulting from virFileWrapperFdNew().
*/
int
virFileWrapperFdClose(virFileWrapperFdPtr wfd)
{
int ret;
if (!wfd)
return 0;
ret = virCommandWait(wfd->cmd, NULL);
if (wfd->err_msg)
VIR_WARN("iohelper reports: %s", wfd->err_msg);
return ret;
}
/**
* virFileWrapperFdFree:
* @wfd: fd wrapper, or NULL
*
* Free all remaining resources associated with @wfd. If
* virFileWrapperFdClose() was not previously called, then this may
* discard some previous I/O. To avoid deadlock, only call this after
* closing the fd resulting from virFileWrapperFdNew().
*/
void
virFileWrapperFdFree(virFileWrapperFdPtr wfd)
{
if (!wfd)
return;
VIR_FREE(wfd->err_msg);
virCommandFree(wfd->cmd);
VIR_FREE(wfd);
}
#ifndef WIN32
/**
* virFileLock:
* @fd: file descriptor to acquire the lock on
* @shared: type of lock to acquire
* @start: byte offset to start lock
* @len: length of lock (0 to acquire entire remaining file from @start)
* @waitForLock: wait for previously held lock or not
*
* Attempt to acquire a lock on the file @fd. If @shared
* is true, then a shared lock will be acquired,
* otherwise an exclusive lock will be acquired. If
* the lock cannot be acquired, an error will be
* returned. If @waitForLock is true, this will wait
* for the lock if another process has already acquired it.
*
* The lock will be released when @fd is closed. The lock
* will also be released if *any* other open file descriptor
* pointing to the same underlying file is closed. As such
* this function should not be relied on in multi-threaded
* apps where other threads can be opening/closing arbitrary
* files.
*
* Returns 0 on success, or -errno otherwise
*/
int virFileLock(int fd, bool shared, off_t start, off_t len, bool waitForLock)
{
struct flock fl = {
.l_type = shared ? F_RDLCK : F_WRLCK,
.l_whence = SEEK_SET,
.l_start = start,
.l_len = len,
};
int cmd = waitForLock ? F_SETLKW : F_SETLK;
if (fcntl(fd, cmd, &fl) < 0)
return -errno;
return 0;
}
/**
* virFileUnlock:
* @fd: file descriptor to release the lock on
* @start: byte offset to start unlock
* @len: length of lock (0 to release entire remaining file from @start)
*
* Release a lock previously acquired with virFileUnlock().
* NB the lock will also be released if any open file descriptor
* pointing to the same file as @fd is closed
*
* Returns 0 on succcess, or -errno on error
*/
int virFileUnlock(int fd, off_t start, off_t len)
{
struct flock fl = {
.l_type = F_UNLCK,
.l_whence = SEEK_SET,
.l_start = start,
.l_len = len,
};
if (fcntl(fd, F_SETLK, &fl) < 0)
return -errno;
return 0;
}
#else
int virFileLock(int fd ATTRIBUTE_UNUSED,
bool shared ATTRIBUTE_UNUSED,
off_t start ATTRIBUTE_UNUSED,
off_t len ATTRIBUTE_UNUSED,
bool waitForLock ATTRIBUTE_UNUSED)
{
return -ENOSYS;
}
int virFileUnlock(int fd ATTRIBUTE_UNUSED,
off_t start ATTRIBUTE_UNUSED,
off_t len ATTRIBUTE_UNUSED)
{
return -ENOSYS;
}
#endif
int
virFileRewrite(const char *path,
mode_t mode,
virFileRewriteFunc rewrite,
void *opaque)
{
char *newfile = NULL;
int fd = -1;
int ret = -1;
if (virAsprintf(&newfile, "%s.new", path) < 0)
goto cleanup;
if ((fd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC, mode)) < 0) {
virReportSystemError(errno, _("cannot create file '%s'"),
newfile);
goto cleanup;
}
if (rewrite(fd, opaque) < 0) {
virReportSystemError(errno, _("cannot write data to file '%s'"),
newfile);
goto cleanup;
}
if (fsync(fd) < 0) {
virReportSystemError(errno, _("cannot sync file '%s'"),
newfile);
goto cleanup;
}
if (VIR_CLOSE(fd) < 0) {
virReportSystemError(errno, _("cannot save file '%s'"),
newfile);
goto cleanup;
}
if (rename(newfile, path) < 0) {
virReportSystemError(errno, _("cannot rename file '%s' as '%s'"),
newfile, path);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FORCE_CLOSE(fd);
if (newfile) {
unlink(newfile);
VIR_FREE(newfile);
}
return ret;
}
int virFileTouch(const char *path, mode_t mode)
{
int fd = -1;
if ((fd = open(path, O_WRONLY | O_CREAT, mode)) < 0) {
virReportSystemError(errno, _("cannot create file '%s'"),
path);
return -1;
}
if (VIR_CLOSE(fd) < 0) {
virReportSystemError(errno, _("cannot save file '%s'"),
path);
VIR_FORCE_CLOSE(fd);
return -1;
}
return 0;
}
#define MODE_BITS (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO)
int virFileUpdatePerm(const char *path,
mode_t mode_remove,
mode_t mode_add)
{
struct stat sb;
mode_t mode;
if (mode_remove & ~MODE_BITS || mode_add & ~MODE_BITS) {
virReportError(VIR_ERR_INVALID_ARG, "%s", _("invalid mode"));
return -1;
}
if (stat(path, &sb) < 0) {
virReportSystemError(errno, _("cannot stat '%s'"), path);
return -1;
}
mode = sb.st_mode & MODE_BITS;
if ((mode & mode_remove) == 0 && (mode & mode_add) == mode_add)
return 0;
mode &= MODE_BITS ^ mode_remove;
mode |= mode_add;
if (chmod(path, mode) < 0) {
virReportSystemError(errno, _("cannot change permission of '%s'"),
path);
return -1;
}
return 0;
}
#if defined(__linux__) && HAVE_DECL_LO_FLAGS_AUTOCLEAR && \
!defined(LIBVIRT_SETUID_RPC_CLIENT)
# if HAVE_DECL_LOOP_CTL_GET_FREE
/* virFileLoopDeviceOpenLoopCtl() returns -1 when a real failure has occurred
* while in the process of allocating or opening the loop device. On success
* we return 0 and modify the fd to the appropriate file descriptor.
* If /dev/loop-control does not exist, we return 0 and do not set fd. */
static int virFileLoopDeviceOpenLoopCtl(char **dev_name, int *fd)
{
int devnr;
int ctl_fd;
char *looppath = NULL;
VIR_DEBUG("Opening loop-control device");
if ((ctl_fd = open("/dev/loop-control", O_RDWR)) < 0) {
if (errno == ENOENT)
return 0;
virReportSystemError(errno, "%s",
_("Unable to open /dev/loop-control"));
return -1;
}
if ((devnr = ioctl(ctl_fd, LOOP_CTL_GET_FREE)) < 0) {
virReportSystemError(errno, "%s",
_("Unable to get free loop device via ioctl"));
close(ctl_fd);
return -1;
}
close(ctl_fd);
VIR_DEBUG("Found free loop device number %i", devnr);
if (virAsprintf(&looppath, "/dev/loop%i", devnr) < 0)
return -1;
if ((*fd = open(looppath, O_RDWR)) < 0) {
virReportSystemError(errno,
_("Unable to open %s"), looppath);
VIR_FREE(looppath);
return -1;
}
*dev_name = looppath;
return 0;
}
# endif /* HAVE_DECL_LOOP_CTL_GET_FREE */
static int virFileLoopDeviceOpenSearch(char **dev_name)
{
int fd = -1;
DIR *dh = NULL;
struct dirent *de;
char *looppath = NULL;
struct loop_info64 lo;
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
int direrr;
VIR_DEBUG("Looking for loop devices in /dev");
if (!(dh = opendir("/dev"))) {
virReportSystemError(errno, "%s",
_("Unable to read /dev"));
goto cleanup;
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
while ((direrr = virDirRead(dh, &de, "/dev")) > 0) {
/* Checking 'loop' prefix is insufficient, since
* new kernels have a dev named 'loop-control'
*/
if (!STRPREFIX(de->d_name, "loop") ||
!c_isdigit(de->d_name[4]))
continue;
if (virAsprintf(&looppath, "/dev/%s", de->d_name) < 0)
goto cleanup;
VIR_DEBUG("Checking up on device %s", looppath);
if ((fd = open(looppath, O_RDWR)) < 0) {
virReportSystemError(errno,
_("Unable to open %s"), looppath);
goto cleanup;
}
if (ioctl(fd, LOOP_GET_STATUS64, &lo) < 0) {
/* Got a free device, return the fd */
if (errno == ENXIO)
goto cleanup;
VIR_FORCE_CLOSE(fd);
virReportSystemError(errno,
_("Unable to get loop status on %s"),
looppath);
goto cleanup;
}
/* Oh well, try the next device */
VIR_FORCE_CLOSE(fd);
VIR_FREE(looppath);
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
if (direrr < 0)
goto cleanup;
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find a free loop device in /dev"));
cleanup:
if (fd != -1) {
VIR_DEBUG("Got free loop device %s %d", looppath, fd);
*dev_name = looppath;
} else {
VIR_DEBUG("No free loop devices available");
VIR_FREE(looppath);
}
if (dh)
closedir(dh);
return fd;
}
static int virFileLoopDeviceOpen(char **dev_name)
{
int loop_fd = -1;
# if HAVE_DECL_LOOP_CTL_GET_FREE
if (virFileLoopDeviceOpenLoopCtl(dev_name, &loop_fd) < 0)
return -1;
VIR_DEBUG("Return from loop-control got fd %d\n", loop_fd);
if (loop_fd >= 0)
return loop_fd;
# endif /* HAVE_DECL_LOOP_CTL_GET_FREE */
/* Without the loop control device we just use the old technique. */
loop_fd = virFileLoopDeviceOpenSearch(dev_name);
return loop_fd;
}
int virFileLoopDeviceAssociate(const char *file,
char **dev)
{
int lofd = -1;
int fsfd = -1;
struct loop_info64 lo;
char *loname = NULL;
int ret = -1;
if ((lofd = virFileLoopDeviceOpen(&loname)) < 0)
return -1;
memset(&lo, 0, sizeof(lo));
lo.lo_flags = LO_FLAGS_AUTOCLEAR;
if ((fsfd = open(file, O_RDWR)) < 0) {
virReportSystemError(errno,
_("Unable to open %s"), file);
goto cleanup;
}
if (ioctl(lofd, LOOP_SET_FD, fsfd) < 0) {
virReportSystemError(errno,
_("Unable to attach %s to loop device"),
file);
goto cleanup;
}
if (ioctl(lofd, LOOP_SET_STATUS64, &lo) < 0) {
virReportSystemError(errno, "%s",
_("Unable to mark loop device as autoclear"));
if (ioctl(lofd, LOOP_CLR_FD, 0) < 0)
VIR_WARN("Unable to detach %s from loop device", file);
goto cleanup;
}
VIR_DEBUG("Attached loop device %s %d to %s", file, lofd, loname);
*dev = loname;
loname = NULL;
ret = 0;
cleanup:
VIR_FREE(loname);
VIR_FORCE_CLOSE(fsfd);
if (ret == -1)
VIR_FORCE_CLOSE(lofd);
return lofd;
}
# define SYSFS_BLOCK_DIR "/sys/block"
# define NBD_DRIVER "nbd"
static int
virFileNBDDeviceIsBusy(const char *dev_name)
{
char *path;
int ret = -1;
if (virAsprintf(&path, SYSFS_BLOCK_DIR "/%s/pid",
dev_name) < 0)
return -1;
if (!virFileExists(path)) {
if (errno == ENOENT)
ret = 0;
else
virReportSystemError(errno,
_("Cannot check NBD device %s pid"),
dev_name);
goto cleanup;
}
ret = 1;
cleanup:
VIR_FREE(path);
return ret;
}
static char *
virFileNBDDeviceFindUnused(void)
{
DIR *dh;
char *ret = NULL;
struct dirent *de;
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
int direrr;
if (!(dh = opendir(SYSFS_BLOCK_DIR))) {
virReportSystemError(errno,
_("Cannot read directory %s"),
SYSFS_BLOCK_DIR);
return NULL;
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
while ((direrr = virDirRead(dh, &de, SYSFS_BLOCK_DIR)) > 0) {
if (STRPREFIX(de->d_name, "nbd")) {
int rv = virFileNBDDeviceIsBusy(de->d_name);
if (rv < 0)
goto cleanup;
if (rv == 0) {
ignore_value(virAsprintf(&ret, "/dev/%s", de->d_name));
goto cleanup;
}
}
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
if (direrr < 0)
goto cleanup;
virReportSystemError(EBUSY, "%s",
_("No free NBD devices"));
cleanup:
closedir(dh);
return ret;
}
static bool
virFileNBDLoadDriver(void)
{
if (virKModIsBlacklisted(NBD_DRIVER)) {
2015-07-30 09:10:48 +00:00
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Failed to load nbd module: "
"administratively prohibited"));
return false;
} else {
char *errbuf = NULL;
if ((errbuf = virKModLoad(NBD_DRIVER, true))) {
VIR_FREE(errbuf);
2015-07-30 09:10:48 +00:00
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Failed to load nbd module"));
return false;
}
VIR_FREE(errbuf);
}
return true;
}
int virFileNBDDeviceAssociate(const char *file,
virStorageFileFormat fmt,
bool readonly,
char **dev)
{
char *nbddev = NULL;
char *qemunbd = NULL;
virCommandPtr cmd = NULL;
int ret = -1;
const char *fmtstr = NULL;
if (!virFileNBDLoadDriver())
goto cleanup;
if (!(nbddev = virFileNBDDeviceFindUnused()))
goto cleanup;
if (!(qemunbd = virFindFileInPath("qemu-nbd"))) {
virReportSystemError(ENOENT, "%s",
_("Unable to find 'qemu-nbd' binary in $PATH"));
goto cleanup;
}
if (fmt > 0)
fmtstr = virStorageFileFormatTypeToString(fmt);
cmd = virCommandNew(qemunbd);
/* Explicitly not trying to cope with old qemu-nbd which
* lacked --format. We want to see a fatal error in that
* case since it would be security flaw to continue */
if (fmtstr)
virCommandAddArgList(cmd, "--format", fmtstr, NULL);
if (readonly)
virCommandAddArg(cmd, "-r");
virCommandAddArgList(cmd,
"-n", /* Don't cache in qemu-nbd layer */
"-c", nbddev,
file, NULL);
/* qemu-nbd will daemonize itself */
if (virCommandRun(cmd, NULL) < 0)
goto cleanup;
VIR_DEBUG("Associated NBD device %s with file %s and format %s",
nbddev, file, fmtstr);
*dev = nbddev;
nbddev = NULL;
ret = 0;
cleanup:
VIR_FREE(nbddev);
VIR_FREE(qemunbd);
virCommandFree(cmd);
return ret;
}
#else /* __linux__ */
int virFileLoopDeviceAssociate(const char *file,
char **dev ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS,
_("Unable to associate file %s with loop device"),
file);
*dev = NULL;
return -1;
}
int virFileNBDDeviceAssociate(const char *file,
virStorageFileFormat fmt ATTRIBUTE_UNUSED,
bool readonly ATTRIBUTE_UNUSED,
char **dev ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS,
_("Unable to associate file %s with NBD device"),
file);
return -1;
}
#endif /* __linux__ */
/**
* virFileDeleteTree:
*
* Recursively deletes all files / directories
* starting from the directory @dir. Does not
* follow symlinks
*
* NB the algorithm is not efficient, and is subject to
* race conditions which can be exploited by malicious
* code. It should not be used in any scenarios where
* performance is important, or security is critical.
*/
int virFileDeleteTree(const char *dir)
{
DIR *dh;
struct dirent *de;
char *filepath = NULL;
int ret = -1;
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
int direrr;
/* Silently return 0 if passed NULL or directory doesn't exist */
if (!dir || !virFileExists(dir))
return 0;
if (!(dh = opendir(dir))) {
virReportSystemError(errno, _("Cannot open dir '%s'"),
dir);
return -1;
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
while ((direrr = virDirRead(dh, &de, dir)) > 0) {
struct stat sb;
if (STREQ(de->d_name, ".") ||
STREQ(de->d_name, ".."))
continue;
if (virAsprintf(&filepath, "%s/%s",
dir, de->d_name) < 0)
goto cleanup;
if (lstat(filepath, &sb) < 0) {
virReportSystemError(errno, _("Cannot access '%s'"),
filepath);
goto cleanup;
}
if (S_ISDIR(sb.st_mode)) {
if (virFileDeleteTree(filepath) < 0)
goto cleanup;
} else {
if (unlink(filepath) < 0 && errno != ENOENT) {
virReportSystemError(errno,
_("Cannot delete file '%s'"),
filepath);
goto cleanup;
}
}
VIR_FREE(filepath);
}
util: use virDirRead API In making the conversion to the new API, I fixed a couple bugs: virSCSIDeviceGetSgName would leak memory if a directory unexpectedly contained multiple entries; virNetDevTapGetRealDeviceName could report a spurious error from a stale errno inherited before starting the readdir search. The decision on whether to store the result of virDirRead into a variable is based on whether the end of the loop falls through to cleanup code automatically. In some cases, we have loops that are documented to return NULL on failure, and which raise an error on most failure paths but not in the case where the directory was unexpectedly empty; it may be worth a followup patch to explicitly report an error if readdir was successful but the directory was empty, so that a NULL return always has an error set. * src/util/vircgroup.c (virCgroupRemoveRecursively): Use new interface. (virCgroupKillRecursiveInternal, virCgroupSetOwner): Report readdir failures. * src/util/virfile.c (virFileLoopDeviceOpenSearch) (virFileNBDDeviceFindUnused, virFileDeleteTree): Use new interface. * src/util/virnetdevtap.c (virNetDevTapGetRealDeviceName): Properly check readdir errors. * src/util/virpci.c (virPCIDeviceIterDevices) (virPCIDeviceFileIterate, virPCIGetNetName): Report readdir failures. (virPCIDeviceAddressIOMMUGroupIterate): Use new interface. * src/util/virscsi.c (virSCSIDeviceGetSgName): Report readdir failures, and avoid memory leak. (virSCSIDeviceGetDevName): Report readdir failures. * src/util/virusb.c (virUSBDeviceSearch): Report readdir failures. * src/util/virutil.c (virGetFCHostNameByWWN) (virFindFCHostCapableVport): Report readdir failures. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-04-25 20:45:49 +00:00
if (direrr < 0)
goto cleanup;
if (rmdir(dir) < 0 && errno != ENOENT) {
virReportSystemError(errno,
_("Cannot delete directory '%s'"),
dir);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FREE(filepath);
closedir(dh);
return ret;
}
int
virFileStripSuffix(char *str, const char *suffix)
{
int len = strlen(str);
int suffixlen = strlen(suffix);
if (len < suffixlen)
return 0;
if (!STREQ(str + len - suffixlen, suffix))
return 0;
str[len-suffixlen] = '\0';
return 1;
}
/* Like read(), but restarts after EINTR. Doesn't play
* nicely with nonblocking FD and EAGAIN, in which case
* you want to use bare read(). Or even use virSocket()
* if the FD is related to a socket rather than a plain
* file or pipe. */
ssize_t
saferead(int fd, void *buf, size_t count)
{
size_t nread = 0;
while (count > 0) {
ssize_t r = read(fd, buf, count);
if (r < 0 && errno == EINTR)
continue;
if (r < 0)
return r;
if (r == 0)
return nread;
buf = (char *)buf + r;
count -= r;
nread += r;
}
return nread;
}
/* Like write(), but restarts after EINTR. Doesn't play
* nicely with nonblocking FD and EAGAIN, in which case
* you want to use bare write(). Or even use virSocket()
* if the FD is related to a socket rather than a plain
* file or pipe. */
ssize_t
safewrite(int fd, const void *buf, size_t count)
{
size_t nwritten = 0;
while (count > 0) {
ssize_t r = write(fd, buf, count);
if (r < 0 && errno == EINTR)
continue;
if (r < 0)
return r;
if (r == 0)
return nwritten;
buf = (const char *)buf + r;
count -= r;
nwritten += r;
}
return nwritten;
}
#ifdef HAVE_POSIX_FALLOCATE
static int
safezero_posix_fallocate(int fd, off_t offset, off_t len)
{
int ret = posix_fallocate(fd, offset, len);
if (ret == 0)
return 0;
errno = ret;
return -1;
}
#else /* !HAVE_POSIX_FALLOCATE */
static int
safezero_posix_fallocate(int fd ATTRIBUTE_UNUSED,
off_t offset ATTRIBUTE_UNUSED,
off_t len ATTRIBUTE_UNUSED)
{
return -2;
}
#endif /* !HAVE_POSIX_FALLOCATE */
#if HAVE_SYS_SYSCALL_H && defined(SYS_fallocate)
static int
safezero_sys_fallocate(int fd,
off_t offset,
off_t len)
{
return syscall(SYS_fallocate, fd, 0, offset, len);
}
#else /* !HAVE_SYS_SYSCALL_H || !defined(SYS_fallocate) */
static int
safezero_sys_fallocate(int fd ATTRIBUTE_UNUSED,
off_t offset ATTRIBUTE_UNUSED,
off_t len ATTRIBUTE_UNUSED)
{
return -2;
}
#endif /* !HAVE_SYS_SYSCALL_H || !defined(SYS_fallocate) */
#ifdef HAVE_MMAP
static int
safezero_mmap(int fd, off_t offset, off_t len)
{
int r;
char *buf;
static long pagemask;
off_t map_skip;
/* align offset and length, rounding offset down and length up */
if (pagemask == 0)
pagemask = ~(virGetSystemPageSize() - 1);
map_skip = offset - (offset & pagemask);
/* memset wants the mmap'ed file to be present on disk so create a
* sparse file
*/
r = ftruncate(fd, offset + len);
if (r < 0)
return -1;
buf = mmap(NULL, len + map_skip, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, offset - map_skip);
if (buf != MAP_FAILED) {
memset(buf + map_skip, 0, len);
munmap(buf, len + map_skip);
return 0;
}
/* fall back to writing zeroes using safewrite if mmap fails (for
* example because of virtual memory limits) */
return -2;
}
#else /* !HAVE_MMAP */
static int
safezero_mmap(int fd ATTRIBUTE_UNUSED,
off_t offset ATTRIBUTE_UNUSED,
off_t len ATTRIBUTE_UNUSED)
{
return -2;
}
#endif /* !HAVE_MMAP */
static int
safezero_slow(int fd, off_t offset, off_t len)
{
int r;
char *buf;
unsigned long long remain, bytes;
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
/* Split up the write in small chunks so as not to allocate lots of RAM */
remain = len;
bytes = MIN(1024 * 1024, len);
r = VIR_ALLOC_N(buf, bytes);
if (r < 0) {
errno = ENOMEM;
return -1;
}
while (remain) {
if (bytes > remain)
bytes = remain;
r = safewrite(fd, buf, bytes);
if (r < 0) {
VIR_FREE(buf);
return -1;
}
/* safewrite() guarantees all data will be written */
remain -= bytes;
}
VIR_FREE(buf);
return 0;
}
int safezero(int fd, off_t offset, off_t len)
{
int ret;
ret = safezero_posix_fallocate(fd, offset, len);
if (ret != -2)
return ret;
if (safezero_sys_fallocate(fd, offset, len) == 0)
return 0;
ret = safezero_mmap(fd, offset, len);
if (ret != -2)
return ret;
return safezero_slow(fd, offset, len);
}
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
/* search /proc/mounts for mount point of *type; return pointer to
* malloc'ed string of the path if found, otherwise return NULL
* with errno set to an appropriate value.
*/
char *
virFileFindMountPoint(const char *type)
{
FILE *f;
struct mntent mb;
char mntbuf[1024];
char *ret = NULL;
f = setmntent("/proc/mounts", "r");
if (!f)
return NULL;
while (getmntent_r(f, &mb, mntbuf, sizeof(mntbuf))) {
if (STREQ(mb.mnt_type, type)) {
ignore_value(VIR_STRDUP_QUIET(ret, mb.mnt_dir));
goto cleanup;
}
}
if (!ret)
errno = ENOENT;
cleanup:
endmntent(f);
return ret;
}
#else /* defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R */
char *
virFileFindMountPoint(const char *type ATTRIBUTE_UNUSED)
{
errno = ENOSYS;
return NULL;
}
#endif /* defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R */
int
virBuildPathInternal(char **path, ...)
{
char *path_component = NULL;
virBuffer buf = VIR_BUFFER_INITIALIZER;
va_list ap;
int ret = 0;
va_start(ap, path);
path_component = va_arg(ap, char *);
virBufferAdd(&buf, path_component, -1);
while ((path_component = va_arg(ap, char *)) != NULL) {
virBufferAddChar(&buf, '/');
virBufferAdd(&buf, path_component, -1);
}
va_end(ap);
*path = virBufferContentAndReset(&buf);
if (*path == NULL)
ret = -1;
return ret;
}
/* Like gnulib's fread_file, but read no more than the specified maximum
number of bytes. If the length of the input is <= max_len, and
upon error while reading that data, it works just like fread_file. */
static char *
saferead_lim(int fd, size_t max_len, size_t *length)
{
char *buf = NULL;
size_t alloc = 0;
size_t size = 0;
int save_errno;
for (;;) {
int count;
int requested;
if (size + BUFSIZ + 1 > alloc) {
alloc += alloc / 2;
if (alloc < size + BUFSIZ + 1)
alloc = size + BUFSIZ + 1;
if (VIR_REALLOC_N(buf, alloc) < 0) {
save_errno = errno;
break;
}
}
/* Ensure that (size + requested <= max_len); */
requested = MIN(size < max_len ? max_len - size : 0,
alloc - size - 1);
count = saferead(fd, buf + size, requested);
size += count;
if (count != requested || requested == 0) {
save_errno = errno;
if (count < 0)
break;
buf[size] = '\0';
*length = size;
return buf;
}
}
VIR_FREE(buf);
errno = save_errno;
return NULL;
}
/* A wrapper around saferead_lim that merely stops reading at the
* specified maximum size. */
int
virFileReadHeaderFD(int fd, int maxlen, char **buf)
{
size_t len;
char *s;
if (maxlen <= 0) {
errno = EINVAL;
return -1;
}
s = saferead_lim(fd, maxlen, &len);
if (s == NULL)
return -1;
*buf = s;
return len;
}
/* A wrapper around saferead_lim that maps a failure due to
exceeding the maximum size limitation to EOVERFLOW. */
int
virFileReadLimFD(int fd, int maxlen, char **buf)
{
size_t len;
char *s;
if (maxlen <= 0) {
errno = EINVAL;
return -1;
}
s = saferead_lim(fd, maxlen+1, &len);
if (s == NULL)
return -1;
if (len > maxlen || (int)len != len) {
VIR_FREE(s);
/* There was at least one byte more than MAXLEN.
Set errno accordingly. */
errno = EOVERFLOW;
return -1;
}
*buf = s;
return len;
}
int
virFileReadAll(const char *path, int maxlen, char **buf)
{
int fd = open(path, O_RDONLY);
if (fd < 0) {
virReportSystemError(errno, _("Failed to open file '%s'"), path);
return -1;
}
int len = virFileReadLimFD(fd, maxlen, buf);
VIR_FORCE_CLOSE(fd);
if (len < 0) {
virReportSystemError(errno, _("Failed to read file '%s'"), path);
return -1;
}
return len;
}
int
virFileReadAllQuiet(const char *path, int maxlen, char **buf)
{
int fd = open(path, O_RDONLY);
if (fd < 0)
return -errno;
int len = virFileReadLimFD(fd, maxlen, buf);
VIR_FORCE_CLOSE(fd);
if (len < 0)
return -errno;
return len;
}
/* Truncate @path and write @str to it. If @mode is 0, ensure that
@path exists; otherwise, use @mode if @path must be created.
Return 0 for success, nonzero for failure.
Be careful to preserve any errno value upon failure. */
int
virFileWriteStr(const char *path, const char *str, mode_t mode)
{
int fd;
if (mode)
fd = open(path, O_WRONLY|O_TRUNC|O_CREAT, mode);
else
fd = open(path, O_WRONLY|O_TRUNC);
if (fd == -1)
return -1;
if (safewrite(fd, str, strlen(str)) < 0) {
VIR_FORCE_CLOSE(fd);
return -1;
}
/* Use errno from failed close only if there was no write error. */
if (VIR_CLOSE(fd) != 0)
return -1;
return 0;
}
int
virFileMatchesNameSuffix(const char *file,
const char *name,
const char *suffix)
{
int filelen = strlen(file);
int namelen = strlen(name);
int suffixlen = strlen(suffix);
if (filelen == (namelen + suffixlen) &&
STREQLEN(file, name, namelen) &&
STREQLEN(file + namelen, suffix, suffixlen))
return 1;
else
return 0;
}
int
virFileHasSuffix(const char *str,
const char *suffix)
{
int len = strlen(str);
int suffixlen = strlen(suffix);
if (len < suffixlen)
return 0;
return STRCASEEQ(str + len - suffixlen, suffix);
}
#define SAME_INODE(Stat_buf_1, Stat_buf_2) \
((Stat_buf_1).st_ino == (Stat_buf_2).st_ino \
&& (Stat_buf_1).st_dev == (Stat_buf_2).st_dev)
/* Return nonzero if checkLink and checkDest
* refer to the same file. Otherwise, return 0.
*/
int
virFileLinkPointsTo(const char *checkLink,
const char *checkDest)
{
struct stat src_sb;
struct stat dest_sb;
return (stat(checkLink, &src_sb) == 0
&& stat(checkDest, &dest_sb) == 0
&& SAME_INODE(src_sb, dest_sb));
}
/* Return positive if checkLink (residing within directory if not
* absolute) and checkDest refer to the same file. Otherwise, return
* -1 on allocation failure (error reported), or 0 if not the same
* (silent).
*/
int
virFileRelLinkPointsTo(const char *directory,
const char *checkLink,
const char *checkDest)
{
char *candidate;
int ret;
if (*checkLink == '/')
return virFileLinkPointsTo(checkLink, checkDest);
if (!directory) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot resolve '%s' without starting directory"),
checkLink);
return -1;
}
if (virAsprintf(&candidate, "%s/%s", directory, checkLink) < 0)
return -1;
ret = virFileLinkPointsTo(candidate, checkDest);
VIR_FREE(candidate);
return ret;
}
static int
virFileResolveLinkHelper(const char *linkpath,
bool intermediatePaths,
char **resultpath)
{
struct stat st;
*resultpath = NULL;
/* We don't need the full canonicalization of intermediate
* directories, if linkpath is absolute and the basename is
* already a non-symlink. */
if (IS_ABSOLUTE_FILE_NAME(linkpath) && !intermediatePaths) {
if (lstat(linkpath, &st) < 0)
return -1;
if (!S_ISLNK(st.st_mode))
return VIR_STRDUP_QUIET(*resultpath, linkpath) < 0 ? -1 : 0;
}
*resultpath = canonicalize_file_name(linkpath);
return *resultpath == NULL ? -1 : 0;
}
/*
* Attempt to resolve a symbolic link, returning an
* absolute path where only the last component is guaranteed
* not to be a symlink.
*
* Return 0 if path was not a symbolic, or the link was
* resolved. Return -1 with errno set upon error
*/
int
virFileResolveLink(const char *linkpath, char **resultpath)
{
return virFileResolveLinkHelper(linkpath, false, resultpath);
}
/*
* Attempt to resolve a symbolic link, returning an
* absolute path where every component is guaranteed
* not to be a symlink.
*
* Return 0 if path was not a symbolic, or the link was
* resolved. Return -1 with errno set upon error
*/
int
virFileResolveAllLinks(const char *linkpath, char **resultpath)
{
return virFileResolveLinkHelper(linkpath, true, resultpath);
}
/*
* Check whether the given file is a link.
* Returns 1 in case of the file being a link, 0 in case it is not
* a link and the negative errno in all other cases.
*/
int
virFileIsLink(const char *linkpath)
{
struct stat st;
if (lstat(linkpath, &st) < 0)
return -errno;
return S_ISLNK(st.st_mode) != 0;
}
/*
* Finds a requested executable file in the PATH env. e.g.:
* "kvm-img" will return "/usr/bin/kvm-img"
*
* You must free the result
*/
char *
virFindFileInPath(const char *file)
{
const char *origpath = NULL;
char *path = NULL;
char *pathiter;
char *pathseg;
char *fullpath = NULL;
if (file == NULL)
return NULL;
/* if we are passed an absolute path (starting with /), return a
* copy of that path, after validating that it is executable
*/
if (IS_ABSOLUTE_FILE_NAME(file)) {
char *ret = NULL;
if (virFileIsExecutable(file))
ignore_value(VIR_STRDUP_QUIET(ret, file));
return ret;
}
/* If we are passed an anchored path (containing a /), then there
* is no path search - it must exist in the current directory
*/
if (strchr(file, '/')) {
if (virFileIsExecutable(file))
ignore_value(virFileAbsPath(file, &path));
return path;
}
/* copy PATH env so we can tweak it */
origpath = virGetEnvBlockSUID("PATH");
if (!origpath)
origpath = "/bin:/usr/bin";
if (VIR_STRDUP_QUIET(path, origpath) <= 0)
return NULL;
/* for each path segment, append the file to search for and test for
* it. return it if found.
*/
pathiter = path;
while ((pathseg = strsep(&pathiter, ":")) != NULL) {
if (virAsprintf(&fullpath, "%s/%s", pathseg, file) < 0 ||
virFileIsExecutable(fullpath))
break;
VIR_FREE(fullpath);
}
VIR_FREE(path);
return fullpath;
}
static bool useDirOverride;
/**
* virFileFindResourceFull:
* @filename: libvirt distributed filename without any path
* @prefix: optional string to prepend to filename
* @suffix: optional string to append to filename
* @builddir: location of the filename in the build tree including
* abs_topsrcdir or abs_topbuilddir prefix
* @installdir: location of the installed binary
* @envname: environment variable used to override all dirs
*
* A helper which will return a path to @filename within
* the current build tree, if the calling binary is being
* run from the source tree. Otherwise it will return the
* path in the installed location.
*
* If @envname is non-NULL it will override all other
* directory lookup.
*
* Only use this with @filename files that are part of
* the libvirt tree, not 3rd party binaries/files.
*
* Returns the resolved path (caller frees) or NULL on error
*/
char *
virFileFindResourceFull(const char *filename,
const char *prefix,
const char *suffix,
const char *builddir,
const char *installdir,
const char *envname)
{
char *ret = NULL;
const char *envval = envname ? virGetEnvBlockSUID(envname) : NULL;
const char *path;
if (!prefix)
prefix = "";
if (!suffix)
suffix = "";
if (envval)
path = envval;
else if (useDirOverride)
path = builddir;
else
path = installdir;
if (virAsprintf(&ret, "%s/%s%s%s", path, prefix, filename, suffix) < 0)
return NULL;
VIR_DEBUG("Resolved '%s' to '%s'", filename, ret);
return ret;
}
char *
virFileFindResource(const char *filename,
const char *builddir,
const char *installdir)
{
return virFileFindResourceFull(filename, NULL, NULL, builddir, installdir, NULL);
}
/**
* virFileActivateDirOverride:
* @argv0: argv[0] of the calling program
*
* Look at @argv0 and try to detect if running from
* a build directory, by looking for a 'lt-' prefix
* on the binary name, or '/.libs/' in the path
*/
void
virFileActivateDirOverride(const char *argv0)
{
char *file = strrchr(argv0, '/');
if (!file || file[1] == '\0')
return;
file++;
if (STRPREFIX(file, "lt-") ||
strstr(argv0, "/.libs/")) {
useDirOverride = true;
VIR_DEBUG("Activating build dir override for %s", argv0);
}
}
bool
virFileIsDir(const char *path)
{
struct stat s;
return (stat(path, &s) == 0) && S_ISDIR(s.st_mode);
}
/**
* virFileExists: Check for presence of file
* @path: Path of file to check
*
* Returns if the file exists. Preserves errno in case it does not exist.
*/
bool
virFileExists(const char *path)
{
return access(path, F_OK) == 0;
}
/* Check that a file is regular and has executable bits. If false is
* returned, errno is valid.
*
* Note: In the presence of ACLs, this may return true for a file that
* would actually fail with EACCES for a given user, or false for a
* file that the user could actually execute, but setups with ACLs
* that weird are unusual. */
bool
virFileIsExecutable(const char *file)
{
struct stat sb;
/* We would also want to check faccessat if we cared about ACLs,
* but we don't. */
if (stat(file, &sb) < 0)
return false;
if (S_ISREG(sb.st_mode) && (sb.st_mode & 0111) != 0)
return true;
errno = S_ISDIR(sb.st_mode) ? EISDIR : EACCES;
return false;
}
/*
* Check that a file refers to a mount point. Trick is that for
* a mount point, the st_dev field will differ from the parent
* directory.
*
* Note that this will not detect bind mounts of dirs/files,
* only true filesystem mounts.
*/
int virFileIsMountPoint(const char *file)
{
char *parent = NULL;
int ret = -1;
struct stat sb1, sb2;
if (!(parent = mdir_name(file))) {
virReportOOMError();
goto cleanup;
}
VIR_DEBUG("Comparing '%s' to '%s'", file, parent);
if (stat(file, &sb1) < 0) {
if (errno == ENOENT)
ret = 0;
else
virReportSystemError(errno,
_("Cannot stat '%s'"),
file);
goto cleanup;
}
if (stat(parent, &sb2) < 0) {
virReportSystemError(errno,
_("Cannot stat '%s'"),
parent);
goto cleanup;
}
if (!S_ISDIR(sb1.st_mode)) {
ret = 0;
goto cleanup;
}
ret = sb1.st_dev != sb2.st_dev;
VIR_DEBUG("Is mount %d", ret);
cleanup:
VIR_FREE(parent);
return ret;
}
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
static int
virFileGetMountSubtreeImpl(const char *mtabpath,
const char *prefix,
char ***mountsret,
size_t *nmountsret,
bool reverse)
{
FILE *procmnt;
struct mntent mntent;
char mntbuf[1024];
int ret = -1;
char **mounts = NULL;
size_t nmounts = 0;
VIR_DEBUG("prefix=%s", prefix);
*mountsret = NULL;
*nmountsret = 0;
if (!(procmnt = setmntent(mtabpath, "r"))) {
virReportSystemError(errno,
_("Failed to read %s"), mtabpath);
return -1;
}
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
if (!(STREQ(mntent.mnt_dir, prefix) ||
(STRPREFIX(mntent.mnt_dir, prefix) &&
mntent.mnt_dir[strlen(prefix)] == '/')))
continue;
if (VIR_EXPAND_N(mounts, nmounts, nmounts ? 1 : 2) < 0)
goto cleanup;
if (VIR_STRDUP(mounts[nmounts - 2], mntent.mnt_dir) < 0)
goto cleanup;
}
if (mounts)
qsort(mounts, nmounts - 1, sizeof(mounts[0]),
reverse ? virStringSortRevCompare : virStringSortCompare);
*mountsret = mounts;
*nmountsret = nmounts ? nmounts - 1 : 0;
ret = 0;
cleanup:
if (ret < 0)
virStringFreeList(mounts);
endmntent(procmnt);
return ret;
}
#else /* ! defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R */
static int
virFileGetMountSubtreeImpl(const char *mtabpath ATTRIBUTE_UNUSED,
const char *prefix ATTRIBUTE_UNUSED,
char ***mountsret ATTRIBUTE_UNUSED,
size_t *nmountsret ATTRIBUTE_UNUSED,
bool reverse ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Unable to determine mount table on this platform"));
return -1;
}
#endif /* ! defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R */
/**
* virFileGetMountSubtree:
* @mtabpath: mount file to parser (eg /proc/mounts)
* @prefix: mount path prefix to match
* @mountsret: allocated and filled with matching mounts
* @nmountsret: filled with number of matching mounts, not counting NULL terminator
*
* Return the list of mounts from @mtabpath which contain
* the path @prefix, sorted from shortest to longest path.
*
* The @mountsret array will be NULL terminated and should
* be freed with virStringFreeList
*
* Returns 0 on success, -1 on error
*/
int virFileGetMountSubtree(const char *mtabpath,
const char *prefix,
char ***mountsret,
size_t *nmountsret)
{
return virFileGetMountSubtreeImpl(mtabpath, prefix, mountsret, nmountsret, false);
}
/**
* virFileGetMountReverseSubtree:
* @mtabpath: mount file to parser (eg /proc/mounts)
* @prefix: mount path prefix to match
* @mountsret: allocated and filled with matching mounts
* @nmountsret: filled with number of matching mounts, not counting NULL terminator
*
* Return the list of mounts from @mtabpath which contain
* the path @prefix, sorted from longest to shortest path.
* ie opposite order to which they appear in @mtabpath
*
* The @mountsret array will be NULL terminated and should
* be freed with virStringFreeList
*
* Returns 0 on success, -1 on error
*/
int virFileGetMountReverseSubtree(const char *mtabpath,
const char *prefix,
char ***mountsret,
size_t *nmountsret)
{
return virFileGetMountSubtreeImpl(mtabpath, prefix, mountsret, nmountsret, true);
}
#ifndef WIN32
/* Check that a file is accessible under certain
* user & gid.
* @mode can be F_OK, or a bitwise combination of R_OK, W_OK, and X_OK.
* see 'man access' for more details.
* Returns 0 on success, -1 on fail with errno set.
*/
int
virFileAccessibleAs(const char *path, int mode,
uid_t uid, gid_t gid)
{
pid_t pid = 0;
int status, ret = 0;
int forkRet = 0;
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
gid_t *groups;
int ngroups;
if (uid == geteuid() &&
gid == getegid())
return access(path, mode);
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
ngroups = virGetGroupList(uid, gid, &groups);
if (ngroups < 0)
return -1;
virFork: simplify semantics The old semantics of virFork() violates the priciple of good usability: it requires the caller to check the pid argument after use, *even when virFork returned -1*, in order to properly abort a child process that failed setup done immediately after fork() - that is, the caller must call _exit() in the child. While uses in virfile.c did this correctly, uses in 'virsh lxc-enter-namespace' and 'virt-login-shell' would happily return from the calling function in both the child and the parent, leading to very confusing results. [Thankfully, I found the problem by inspection, and can't actually trigger the double return on error without an LD_PRELOAD library.] It is much better if the semantics of virFork are impossible to abuse. Looking at virFork(), the parent could only ever return -1 with a non-negative pid if it misused pthread_sigmask, but this never happens. Up until this patch series, the child could return -1 with non-negative pid if it fails to set up signals correctly, but we recently fixed that to make the child call _exit() at that point instead of forcing the caller to do it. Thus, the return value and contents of the pid argument are now redundant (a -1 return now happens only for failure to fork, a child 0 return only happens for a successful 0 pid, and a parent 0 return only happens for a successful non-zero pid), so we might as well return the pid directly rather than an integer of whether it succeeded or failed; this is also good from the interface design perspective as users are already familiar with fork() semantics. One last change in this patch: before returning the pid directly, I found cases where using virProcessWait unconditionally on a cleanup path of a virFork's -1 pid return would be nicer if there were a way to avoid it overwriting an earlier message. While such paths are a bit harder to come by with my change to a direct pid return, I decided to keep the virProcessWait change in this patch. * src/util/vircommand.h (virFork): Change signature. * src/util/vircommand.c (virFork): Guarantee that child will only return on success, to simplify callers. Return pid rather than status, now that the situations are always the same. (virExec): Adjust caller, also avoid open-coding process death. * src/util/virprocess.c (virProcessWait): Tweak semantics when pid is -1. (virProcessRunInMountNamespace): Adjust caller. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * tools/virt-login-shell.c (main): Likewise. * tools/virsh-domain.c (cmdLxcEnterNamespace): Likewise. * tests/commandtest.c (test23): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-22 00:54:33 +00:00
pid = virFork();
if (pid < 0) {
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
return -1;
}
if (pid) { /* parent */
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
if (virProcessWait(pid, &status, false) < 0) {
/* virProcessWait() already reported error */
virFork: simplify semantics The old semantics of virFork() violates the priciple of good usability: it requires the caller to check the pid argument after use, *even when virFork returned -1*, in order to properly abort a child process that failed setup done immediately after fork() - that is, the caller must call _exit() in the child. While uses in virfile.c did this correctly, uses in 'virsh lxc-enter-namespace' and 'virt-login-shell' would happily return from the calling function in both the child and the parent, leading to very confusing results. [Thankfully, I found the problem by inspection, and can't actually trigger the double return on error without an LD_PRELOAD library.] It is much better if the semantics of virFork are impossible to abuse. Looking at virFork(), the parent could only ever return -1 with a non-negative pid if it misused pthread_sigmask, but this never happens. Up until this patch series, the child could return -1 with non-negative pid if it fails to set up signals correctly, but we recently fixed that to make the child call _exit() at that point instead of forcing the caller to do it. Thus, the return value and contents of the pid argument are now redundant (a -1 return now happens only for failure to fork, a child 0 return only happens for a successful 0 pid, and a parent 0 return only happens for a successful non-zero pid), so we might as well return the pid directly rather than an integer of whether it succeeded or failed; this is also good from the interface design perspective as users are already familiar with fork() semantics. One last change in this patch: before returning the pid directly, I found cases where using virProcessWait unconditionally on a cleanup path of a virFork's -1 pid return would be nicer if there were a way to avoid it overwriting an earlier message. While such paths are a bit harder to come by with my change to a direct pid return, I decided to keep the virProcessWait change in this patch. * src/util/vircommand.h (virFork): Change signature. * src/util/vircommand.c (virFork): Guarantee that child will only return on success, to simplify callers. Return pid rather than status, now that the situations are always the same. (virExec): Adjust caller, also avoid open-coding process death. * src/util/virprocess.c (virProcessWait): Tweak semantics when pid is -1. (virProcessRunInMountNamespace): Adjust caller. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * tools/virt-login-shell.c (main): Likewise. * tools/virsh-domain.c (cmdLxcEnterNamespace): Likewise. * tests/commandtest.c (test23): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-22 00:54:33 +00:00
errno = EINTR;
return -1;
}
if (status) {
errno = status;
return -1;
}
return 0;
}
/* child.
* Return positive value here. Parent
* will change it to negative one. */
if (forkRet < 0) {
ret = errno;
goto childerror;
}
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (virSetUIDGID(uid, gid, groups, ngroups) < 0) {
ret = errno;
goto childerror;
}
if (access(path, mode) < 0)
ret = errno;
childerror:
if ((ret & 0xFF) != ret) {
VIR_WARN("unable to pass desired return value %d", ret);
ret = 0xFF;
}
_exit(ret);
}
/* virFileOpenForceOwnerMode() - an internal utility function called
* only by virFileOpenAs(). Sets the owner and mode of the file
* opened as "fd" if it's not correct AND the flags say it should be
* forced. */
static int
virFileOpenForceOwnerMode(const char *path, int fd, mode_t mode,
uid_t uid, gid_t gid, unsigned int flags)
{
int ret = 0;
struct stat st;
if (!(flags & (VIR_FILE_OPEN_FORCE_OWNER | VIR_FILE_OPEN_FORCE_MODE)))
return 0;
if (fstat(fd, &st) == -1) {
ret = -errno;
virReportSystemError(errno, _("stat of '%s' failed"), path);
return ret;
}
/* NB: uid:gid are never "-1" (default) at this point - the caller
* has always changed -1 to the value of get[gu]id().
*/
if ((flags & VIR_FILE_OPEN_FORCE_OWNER) &&
((st.st_uid != uid) || (st.st_gid != gid)) &&
(fchown(fd, uid, gid) < 0)) {
ret = -errno;
virReportSystemError(errno,
_("cannot chown '%s' to (%u, %u)"),
path, (unsigned int) uid,
(unsigned int) gid);
return ret;
}
if ((flags & VIR_FILE_OPEN_FORCE_MODE) &&
((mode & (S_IRWXU|S_IRWXG|S_IRWXO)) !=
(st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO))) &&
(fchmod(fd, mode) < 0)) {
ret = -errno;
virReportSystemError(errno,
_("cannot set mode of '%s' to %04o"),
path, mode);
return ret;
}
return ret;
}
/* virFileOpenForked() - an internal utility function called only by
* virFileOpenAs(). It forks, then the child does setuid+setgid to
* given uid:gid and attempts to open the file, while the parent just
* calls recvfd to get the open fd back from the child. returns the
* fd, or -errno if there is an error. */
static int
virFileOpenForked(const char *path, int openflags, mode_t mode,
uid_t uid, gid_t gid, unsigned int flags)
{
pid_t pid;
int status = 0, ret = 0;
int recvfd_errno = 0;
int fd = -1;
int pair[2] = { -1, -1 };
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
gid_t *groups;
int ngroups;
/* parent is running as root, but caller requested that the
* file be opened as some other user and/or group). The
* following dance avoids problems caused by root-squashing
* NFS servers. */
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
ngroups = virGetGroupList(uid, gid, &groups);
if (ngroups < 0)
return -errno;
if (socketpair(AF_UNIX, SOCK_STREAM, 0, pair) < 0) {
ret = -errno;
virReportSystemError(errno,
_("failed to create socket needed for '%s'"),
path);
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
return ret;
}
virFork: simplify semantics The old semantics of virFork() violates the priciple of good usability: it requires the caller to check the pid argument after use, *even when virFork returned -1*, in order to properly abort a child process that failed setup done immediately after fork() - that is, the caller must call _exit() in the child. While uses in virfile.c did this correctly, uses in 'virsh lxc-enter-namespace' and 'virt-login-shell' would happily return from the calling function in both the child and the parent, leading to very confusing results. [Thankfully, I found the problem by inspection, and can't actually trigger the double return on error without an LD_PRELOAD library.] It is much better if the semantics of virFork are impossible to abuse. Looking at virFork(), the parent could only ever return -1 with a non-negative pid if it misused pthread_sigmask, but this never happens. Up until this patch series, the child could return -1 with non-negative pid if it fails to set up signals correctly, but we recently fixed that to make the child call _exit() at that point instead of forcing the caller to do it. Thus, the return value and contents of the pid argument are now redundant (a -1 return now happens only for failure to fork, a child 0 return only happens for a successful 0 pid, and a parent 0 return only happens for a successful non-zero pid), so we might as well return the pid directly rather than an integer of whether it succeeded or failed; this is also good from the interface design perspective as users are already familiar with fork() semantics. One last change in this patch: before returning the pid directly, I found cases where using virProcessWait unconditionally on a cleanup path of a virFork's -1 pid return would be nicer if there were a way to avoid it overwriting an earlier message. While such paths are a bit harder to come by with my change to a direct pid return, I decided to keep the virProcessWait change in this patch. * src/util/vircommand.h (virFork): Change signature. * src/util/vircommand.c (virFork): Guarantee that child will only return on success, to simplify callers. Return pid rather than status, now that the situations are always the same. (virExec): Adjust caller, also avoid open-coding process death. * src/util/virprocess.c (virProcessWait): Tweak semantics when pid is -1. (virProcessRunInMountNamespace): Adjust caller. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * tools/virt-login-shell.c (main): Likewise. * tools/virsh-domain.c (cmdLxcEnterNamespace): Likewise. * tests/commandtest.c (test23): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-22 00:54:33 +00:00
pid = virFork();
if (pid < 0) {
ret = -errno;
VIR_FREE(groups);
return ret;
}
if (pid == 0) {
/* child */
/* set desired uid/gid, then attempt to create the file */
virFork: simplify semantics The old semantics of virFork() violates the priciple of good usability: it requires the caller to check the pid argument after use, *even when virFork returned -1*, in order to properly abort a child process that failed setup done immediately after fork() - that is, the caller must call _exit() in the child. While uses in virfile.c did this correctly, uses in 'virsh lxc-enter-namespace' and 'virt-login-shell' would happily return from the calling function in both the child and the parent, leading to very confusing results. [Thankfully, I found the problem by inspection, and can't actually trigger the double return on error without an LD_PRELOAD library.] It is much better if the semantics of virFork are impossible to abuse. Looking at virFork(), the parent could only ever return -1 with a non-negative pid if it misused pthread_sigmask, but this never happens. Up until this patch series, the child could return -1 with non-negative pid if it fails to set up signals correctly, but we recently fixed that to make the child call _exit() at that point instead of forcing the caller to do it. Thus, the return value and contents of the pid argument are now redundant (a -1 return now happens only for failure to fork, a child 0 return only happens for a successful 0 pid, and a parent 0 return only happens for a successful non-zero pid), so we might as well return the pid directly rather than an integer of whether it succeeded or failed; this is also good from the interface design perspective as users are already familiar with fork() semantics. One last change in this patch: before returning the pid directly, I found cases where using virProcessWait unconditionally on a cleanup path of a virFork's -1 pid return would be nicer if there were a way to avoid it overwriting an earlier message. While such paths are a bit harder to come by with my change to a direct pid return, I decided to keep the virProcessWait change in this patch. * src/util/vircommand.h (virFork): Change signature. * src/util/vircommand.c (virFork): Guarantee that child will only return on success, to simplify callers. Return pid rather than status, now that the situations are always the same. (virExec): Adjust caller, also avoid open-coding process death. * src/util/virprocess.c (virProcessWait): Tweak semantics when pid is -1. (virProcessRunInMountNamespace): Adjust caller. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * tools/virt-login-shell.c (main): Likewise. * tools/virsh-domain.c (cmdLxcEnterNamespace): Likewise. * tests/commandtest.c (test23): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-22 00:54:33 +00:00
VIR_FORCE_CLOSE(pair[0]);
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (virSetUIDGID(uid, gid, groups, ngroups) < 0) {
ret = -errno;
goto childerror;
}
if ((fd = open(path, openflags, mode)) < 0) {
ret = -errno;
virReportSystemError(errno,
_("child process failed to create file '%s'"),
path);
goto childerror;
}
/* File is successfully open. Set permissions if requested. */
ret = virFileOpenForceOwnerMode(path, fd, mode, uid, gid, flags);
if (ret < 0) {
ret = -errno;
virReportSystemError(errno,
_("child process failed to force owner mode file '%s'"),
path);
goto childerror;
}
do {
ret = sendfd(pair[1], fd);
} while (ret < 0 && errno == EINTR);
if (ret < 0) {
ret = -errno;
virReportSystemError(errno, "%s",
_("child process failed to send fd to parent"));
goto childerror;
}
childerror:
/* ret tracks -errno on failure, but exit value must be positive.
* If the child exits with EACCES, then the parent tries again. */
/* XXX This makes assumptions about errno being < 255, which is
* not true on Hurd. */
VIR_FORCE_CLOSE(pair[1]);
if (ret < 0)
VIR_FORCE_CLOSE(fd);
ret = -ret;
if ((ret & 0xff) != ret) {
VIR_WARN("unable to pass desired return value %d", ret);
ret = 0xff;
}
_exit(ret);
}
/* parent */
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
VIR_FORCE_CLOSE(pair[1]);
do {
fd = recvfd(pair[0], 0);
} while (fd < 0 && errno == EINTR);
VIR_FORCE_CLOSE(pair[0]); /* NB: this preserves errno */
if (fd < 0)
recvfd_errno = errno;
if (virProcessWait(pid, &status, 0) < 0) {
/* virProcessWait() reports errno on waitpid failure, so we'll just
* set our return status to EINTR; otherwise, set status to EACCES
* since the original failure for the fork+setuid path would have
* been EACCES or EPERM by definition.
*/
if (virLastErrorIsSystemErrno(0))
status = EINTR;
else if (!status)
status = EACCES;
}
if (status) {
VIR_FORCE_CLOSE(fd);
return -status;
}
/* if waitpid succeeded, but recvfd failed, report recvfd_errno */
if (recvfd_errno != 0) {
virReportSystemError(recvfd_errno,
_("failed recvfd for child creating '%s'"),
path);
return -recvfd_errno;
}
/* otherwise, waitpid and recvfd succeeded, return the fd */
return fd;
}
/**
* virFileOpenAs:
* @path: file to open or create
* @openflags: flags to pass to open
* @mode: mode to use on creation or when forcing permissions
* @uid: uid that should own file on creation
* @gid: gid that should own file
* @flags: bit-wise or of VIR_FILE_OPEN_* flags
*
* Open @path, and return an fd to the open file. @openflags contains
* the flags normally passed to open(2), while those in @flags are
* used internally. If @flags includes VIR_FILE_OPEN_NOFORK, then try
* opening the file while executing with the current uid:gid
* (i.e. don't fork+setuid+setgid before the call to open()). If
* @flags includes VIR_FILE_OPEN_FORK, then try opening the file while
* the effective user id is @uid (by forking a child process); this
* allows one to bypass root-squashing NFS issues; NOFORK is always
* tried before FORK (the absence of both flags is treated identically
* to (VIR_FILE_OPEN_NOFORK | VIR_FILE_OPEN_FORK)). If @flags includes
* VIR_FILE_OPEN_FORCE_OWNER, then ensure that @path is owned by
* uid:gid before returning (even if it already existed with a
* different owner). If @flags includes VIR_FILE_OPEN_FORCE_MODE,
* ensure it has those permissions before returning (again, even if
* the file already existed with different permissions).
*
* The return value (if non-negative) is the file descriptor, left
* open. Returns -errno on failure.
*/
int
virFileOpenAs(const char *path, int openflags, mode_t mode,
uid_t uid, gid_t gid, unsigned int flags)
{
int ret = 0, fd = -1;
/* allow using -1 to mean "current value" */
if (uid == (uid_t) -1)
uid = geteuid();
if (gid == (gid_t) -1)
gid = getegid();
/* treat absence of both flags as presence of both for simpler
* calling. */
if (!(flags & (VIR_FILE_OPEN_NOFORK|VIR_FILE_OPEN_FORK)))
flags |= VIR_FILE_OPEN_NOFORK|VIR_FILE_OPEN_FORK;
if ((flags & VIR_FILE_OPEN_NOFORK)
|| (geteuid() != 0)
|| ((uid == 0) && (gid == 0))) {
if ((fd = open(path, openflags, mode)) < 0) {
ret = -errno;
if (!(flags & VIR_FILE_OPEN_FORK))
goto error;
} else {
ret = virFileOpenForceOwnerMode(path, fd, mode, uid, gid, flags);
if (ret < 0)
goto error;
}
}
/* If we either 1) didn't try opening as current user at all, or
* 2) failed, and errno/virStorageFileIsSharedFS indicate we might
* be successful if we try as a different uid, then try doing
* fork+setuid+setgid before opening.
*/
if ((fd < 0) && (flags & VIR_FILE_OPEN_FORK)) {
if (ret < 0) {
/* An open(2) that failed due to insufficient permissions
* could return one or the other of these depending on OS
* version and circumstances. Any other errno indicates a
* problem that couldn't be remedied by fork+setuid
* anyway. */
if (ret != -EACCES && ret != -EPERM)
goto error;
/* On Linux we can also verify the FS-type of the
* directory. (this is a NOP on other platforms). */
if (virFileIsSharedFS(path) <= 0)
goto error;
}
/* passed all prerequisites - retry the open w/fork+setuid */
if ((fd = virFileOpenForked(path, openflags, mode, uid, gid, flags)) < 0) {
ret = fd;
goto error;
}
}
/* File is successfully opened */
return fd;
error:
if (fd >= 0) {
/* some other failure after the open succeeded */
VIR_FORCE_CLOSE(fd);
}
/* whoever failed the open last has already set ret = -errno */
return ret;
}
/* virFileRemove:
* @path: file to unlink or directory to remove
* @uid: uid that was used to create the file (not required)
* @gid: gid that was used to create the file (not required)
*
* If a file/volume was created in an NFS root-squash environment,
* then we must 'unlink' the file in the same environment. Unlike
* the virFileOpenAs[Forked] and virDirCreate[NoFork], this code
* takes no extra flags and does not bother with EACCES failures
* from the child.
*/
int
virFileRemove(const char *path,
uid_t uid,
gid_t gid)
{
pid_t pid;
int waitret;
int status, ret = 0;
gid_t *groups;
int ngroups;
/* If not running as root or if a non explicit uid/gid was being used for
* the file/volume, then use unlink directly
*/
if ((geteuid() != 0) ||
((uid == (uid_t) -1) && (gid == (gid_t) -1))) {
if (virFileIsDir(path))
return rmdir(path);
else
return unlink(path);
}
/* Otherwise, we have to deal with the NFS root-squash craziness
* to run under the uid/gid that created the volume in order to
* perform the unlink of the volume.
*/
if (uid == (uid_t) -1)
uid = geteuid();
if (gid == (gid_t) -1)
gid = getegid();
ngroups = virGetGroupList(uid, gid, &groups);
if (ngroups < 0)
return -errno;
pid = virFork();
if (pid < 0) {
ret = -errno;
VIR_FREE(groups);
return ret;
}
if (pid) { /* parent */
/* wait for child to complete, and retrieve its exit code */
VIR_FREE(groups);
while ((waitret = waitpid(pid, &status, 0)) == -1 && errno == EINTR);
if (waitret == -1) {
ret = -errno;
virReportSystemError(errno,
_("failed to wait for child unlinking '%s'"),
path);
goto parenterror;
}
/*
* If waitpid succeeded, but if the child exited abnormally or
* reported non-zero status, report failure
*/
if (!WIFEXITED(status) || (WEXITSTATUS(status)) != 0) {
char *msg = virProcessTranslateStatus(status);
virReportError(VIR_ERR_INTERNAL_ERROR,
_("child failed to unlink '%s': %s"),
path, msg);
VIR_FREE(msg);
if (WIFEXITED(status))
ret = -WEXITSTATUS(status);
else
ret = -EACCES;
}
parenterror:
return ret;
}
/* child */
/* set desired uid/gid, then attempt to unlink the file */
if (virSetUIDGID(uid, gid, groups, ngroups) < 0) {
ret = errno;
goto childerror;
}
if (virFileIsDir(path)) {
if (rmdir(path) < 0) {
ret = errno;
goto childerror;
}
} else {
if (unlink(path) < 0) {
ret = errno;
goto childerror;
}
}
childerror:
if ((ret & 0xff) != ret) {
VIR_WARN("unable to pass desired return value %d", ret);
ret = 0xff;
}
_exit(ret);
}
/* return -errno on failure, or 0 on success */
static int
virDirCreateNoFork(const char *path,
mode_t mode, uid_t uid, gid_t gid,
unsigned int flags)
{
int ret = 0;
struct stat st;
if (!((flags & VIR_DIR_CREATE_ALLOW_EXIST) && virFileExists(path))) {
if (mkdir(path, mode) < 0) {
ret = -errno;
virReportSystemError(errno, _("failed to create directory '%s'"),
path);
goto error;
}
}
if (stat(path, &st) == -1) {
ret = -errno;
virReportSystemError(errno, _("stat of '%s' failed"), path);
goto error;
}
if (((uid != (uid_t) -1 && st.st_uid != uid) ||
(gid != (gid_t) -1 && st.st_gid != gid))
&& (chown(path, uid, gid) < 0)) {
ret = -errno;
virReportSystemError(errno, _("cannot chown '%s' to (%u, %u)"),
path, (unsigned int) uid, (unsigned int) gid);
goto error;
}
if (mode != (mode_t) -1 && chmod(path, mode) < 0) {
ret = -errno;
virReportSystemError(errno,
_("cannot set mode of '%s' to %04o"),
path, mode);
goto error;
}
error:
return ret;
}
/* return -errno on failure, or 0 on success */
int
virDirCreate(const char *path,
mode_t mode, uid_t uid, gid_t gid,
unsigned int flags)
{
struct stat st;
pid_t pid;
int waitret;
int status, ret = 0;
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
gid_t *groups;
int ngroups;
/* Everything after this check is crazyness to allow setting uid/gid
* on directories that are on root-squash NFS shares. We only want
* to go that route if the follow conditions are true:
*
* 1) VIR_DIR_CREATE_AS_UID was passed, currently only used when
* directory is being created for a NETFS pool
* 2) We are running as root, since that's when the root-squash
* workaround is required.
* 3) An explicit uid/gid was requested
* 4) The directory doesn't already exist and the ALLOW_EXIST flag
* wasn't passed.
*
* If any of those conditions are _not_ met, ignore the fork crazyness
*/
if ((!(flags & VIR_DIR_CREATE_AS_UID))
|| (geteuid() != 0)
|| ((uid == (uid_t) -1) && (gid == (gid_t) -1))
|| ((flags & VIR_DIR_CREATE_ALLOW_EXIST) && virFileExists(path))) {
return virDirCreateNoFork(path, mode, uid, gid, flags);
}
if (uid == (uid_t) -1)
uid = geteuid();
if (gid == (gid_t) -1)
gid = getegid();
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
ngroups = virGetGroupList(uid, gid, &groups);
if (ngroups < 0)
return -errno;
virFork: simplify semantics The old semantics of virFork() violates the priciple of good usability: it requires the caller to check the pid argument after use, *even when virFork returned -1*, in order to properly abort a child process that failed setup done immediately after fork() - that is, the caller must call _exit() in the child. While uses in virfile.c did this correctly, uses in 'virsh lxc-enter-namespace' and 'virt-login-shell' would happily return from the calling function in both the child and the parent, leading to very confusing results. [Thankfully, I found the problem by inspection, and can't actually trigger the double return on error without an LD_PRELOAD library.] It is much better if the semantics of virFork are impossible to abuse. Looking at virFork(), the parent could only ever return -1 with a non-negative pid if it misused pthread_sigmask, but this never happens. Up until this patch series, the child could return -1 with non-negative pid if it fails to set up signals correctly, but we recently fixed that to make the child call _exit() at that point instead of forcing the caller to do it. Thus, the return value and contents of the pid argument are now redundant (a -1 return now happens only for failure to fork, a child 0 return only happens for a successful 0 pid, and a parent 0 return only happens for a successful non-zero pid), so we might as well return the pid directly rather than an integer of whether it succeeded or failed; this is also good from the interface design perspective as users are already familiar with fork() semantics. One last change in this patch: before returning the pid directly, I found cases where using virProcessWait unconditionally on a cleanup path of a virFork's -1 pid return would be nicer if there were a way to avoid it overwriting an earlier message. While such paths are a bit harder to come by with my change to a direct pid return, I decided to keep the virProcessWait change in this patch. * src/util/vircommand.h (virFork): Change signature. * src/util/vircommand.c (virFork): Guarantee that child will only return on success, to simplify callers. Return pid rather than status, now that the situations are always the same. (virExec): Adjust caller, also avoid open-coding process death. * src/util/virprocess.c (virProcessWait): Tweak semantics when pid is -1. (virProcessRunInMountNamespace): Adjust caller. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * tools/virt-login-shell.c (main): Likewise. * tools/virsh-domain.c (cmdLxcEnterNamespace): Likewise. * tests/commandtest.c (test23): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-22 00:54:33 +00:00
pid = virFork();
if (pid < 0) {
ret = -errno;
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
return ret;
}
if (pid) { /* parent */
/* wait for child to complete, and retrieve its exit code */
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
VIR_FREE(groups);
while ((waitret = waitpid(pid, &status, 0)) == -1 && errno == EINTR);
if (waitret == -1) {
ret = -errno;
virReportSystemError(errno,
_("failed to wait for child creating '%s'"),
path);
goto parenterror;
}
/*
* If waitpid succeeded, but if the child exited abnormally or
* reported non-zero status, report failure, except for EACCES where
* we try to fall back to non-fork method as in the original logic
* introduced and explained by commit 98f6f381.
*/
if (!WIFEXITED(status) || (WEXITSTATUS(status)) != 0) {
if (WEXITSTATUS(status) == EACCES)
return virDirCreateNoFork(path, mode, uid, gid, flags);
char *msg = virProcessTranslateStatus(status);
virReportError(VIR_ERR_INTERNAL_ERROR,
_("child failed to create '%s': %s"),
path, msg);
VIR_FREE(msg);
if (WIFEXITED(status))
ret = -WEXITSTATUS(status);
else
ret = -EACCES;
}
parenterror:
return ret;
}
/* child */
/* set desired uid/gid, then attempt to create the directory */
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (virSetUIDGID(uid, gid, groups, ngroups) < 0) {
ret = errno;
goto childerror;
}
if (mkdir(path, mode) < 0) {
ret = errno;
if (ret != EACCES) {
/* in case of EACCES, the parent will retry */
virReportSystemError(errno, _("child failed to create directory '%s'"),
path);
}
goto childerror;
}
/* check if group was set properly by creating after
* setgid. If not, try doing it with chown */
if (stat(path, &st) == -1) {
ret = errno;
virReportSystemError(errno,
_("stat of '%s' failed"), path);
goto childerror;
}
if ((st.st_gid != gid) && (chown(path, (uid_t) -1, gid) < 0)) {
ret = errno;
virReportSystemError(errno,
_("cannot chown '%s' to group %u"),
path, (unsigned int) gid);
goto childerror;
}
if (mode != (mode_t) -1 && chmod(path, mode) < 0) {
virReportSystemError(errno,
_("cannot set mode of '%s' to %04o"),
path, mode);
goto childerror;
}
childerror:
if ((ret & 0xff) != ret) {
VIR_WARN("unable to pass desired return value %d", ret);
ret = 0xff;
}
_exit(ret);
}
#else /* WIN32 */
int
virFileAccessibleAs(const char *path,
int mode,
uid_t uid ATTRIBUTE_UNUSED,
gid_t gid ATTRIBUTE_UNUSED)
{
VIR_WARN("Ignoring uid/gid due to WIN32");
return access(path, mode);
}
/* return -errno on failure, or 0 on success */
int
virFileOpenAs(const char *path ATTRIBUTE_UNUSED,
int openflags ATTRIBUTE_UNUSED,
mode_t mode ATTRIBUTE_UNUSED,
uid_t uid ATTRIBUTE_UNUSED,
gid_t gid ATTRIBUTE_UNUSED,
unsigned int flags_unused ATTRIBUTE_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virFileOpenAs is not implemented for WIN32"));
return -ENOSYS;
}
int
virDirCreate(const char *path ATTRIBUTE_UNUSED,
mode_t mode ATTRIBUTE_UNUSED,
uid_t uid ATTRIBUTE_UNUSED,
gid_t gid ATTRIBUTE_UNUSED,
unsigned int flags_unused ATTRIBUTE_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virDirCreate is not implemented for WIN32"));
return -ENOSYS;
}
int
virFileRemove(const char *path,
uid_t uid ATTRIBUTE_UNUSED,
gid_t gid ATTRIBUTE_UNUSED)
{
if (unlink(path) < 0) {
virReportSystemError(errno, _("Unable to unlink path '%s'"),
path);
return -1;
}
return 0;
}
#endif /* WIN32 */
/**
* virDirRead:
* @dirp: directory to read
* @end: output one entry
* @name: if non-NULL, the name related to @dirp for use in error reporting
*
* Wrapper around readdir. Typical usage:
* struct dirent ent;
* int value;
* DIR *dir;
* if (!(dir = opendir(name)))
* goto error;
* while ((value = virDirRead(dir, &ent, name)) > 0)
* process ent;
* if (value < 0)
* goto error;
*
* Returns -1 on error, with error already reported if @name was
* supplied. On success, returns 1 for entry read, 0 for end-of-dir.
*/
int virDirRead(DIR *dirp, struct dirent **ent, const char *name)
{
errno = 0;
*ent = readdir(dirp); /* exempt from syntax-check */
if (!*ent && errno) {
if (name)
virReportSystemError(errno, _("Unable to read directory '%s'"),
name);
return -1;
}
return !!*ent;
}
static int
virFileMakePathHelper(char *path, mode_t mode)
{
struct stat st;
char *p;
VIR_DEBUG("path=%s mode=0%o", path, mode);
if (stat(path, &st) >= 0) {
if (S_ISDIR(st.st_mode))
return 0;
errno = ENOTDIR;
return -1;
}
if (errno != ENOENT)
return -1;
if ((p = strrchr(path, '/')) == NULL) {
errno = EINVAL;
return -1;
}
if (p != path) {
*p = '\0';
if (virFileMakePathHelper(path, mode) < 0)
return -1;
*p = '/';
}
if (mkdir(path, mode) < 0 && errno != EEXIST)
return -1;
return 0;
}
/**
* Creates the given directory with mode 0777 if it's not already existing.
*
* Returns 0 on success, or -1 if an error occurred (in which case, errno
* is set appropriately).
*/
int
virFileMakePath(const char *path)
{
return virFileMakePathWithMode(path, 0777);
}
int
virFileMakePathWithMode(const char *path,
mode_t mode)
{
int ret = -1;
char *tmp;
if (VIR_STRDUP(tmp, path) < 0) {
errno = ENOMEM;
goto cleanup;
}
ret = virFileMakePathHelper(tmp, mode);
cleanup:
VIR_FREE(tmp);
return ret;
}
int
virFileMakeParentPath(const char *path)
{
char *p;
char *tmp;
int ret = -1;
VIR_DEBUG("path=%s", path);
if (VIR_STRDUP(tmp, path) < 0) {
errno = ENOMEM;
return -1;
}
if ((p = strrchr(tmp, '/')) == NULL) {
errno = EINVAL;
goto cleanup;
}
*p = '\0';
ret = virFileMakePathHelper(tmp, 0777);
cleanup:
VIR_FREE(tmp);
return ret;
}
/* Build up a fully qualified path for a config file to be
* associated with a persistent guest or network */
char *
virFileBuildPath(const char *dir, const char *name, const char *ext)
{
char *path;
if (ext == NULL) {
ignore_value(virAsprintf(&path, "%s/%s", dir, name));
} else {
ignore_value(virAsprintf(&path, "%s/%s%s", dir, name, ext));
}
return path;
}
/* Open a non-blocking master side of a pty. If ttyName is not NULL,
* then populate it with the name of the slave. If rawmode is set,
* also put the master side into raw mode before returning. */
#ifndef WIN32
int
virFileOpenTty(int *ttymaster, char **ttyName, int rawmode)
{
/* XXX A word of caution - on some platforms (Solaris and HP-UX),
* additional ioctl() calls are needs after opening the slave
* before it will cause isatty() to return true. Should we make
* virFileOpenTty also return the opened slave fd, so the caller
* doesn't have to worry about that mess? */
int ret = -1;
int slave = -1;
char *name = NULL;
/* Unfortunately, we can't use the name argument of openpty, since
* there is no guarantee on how large the buffer has to be.
* Likewise, we can't use the termios argument: we have to use
* read-modify-write since there is no portable way to initialize
* a struct termios without use of tcgetattr. */
if (openpty(ttymaster, &slave, NULL, NULL, NULL) < 0)
return -1;
/* What a shame that openpty cannot atomically set FD_CLOEXEC, but
* that using posix_openpt/grantpt/unlockpt/ptsname is not
* thread-safe, and that ptsname_r is not portable. */
if (virSetNonBlock(*ttymaster) < 0 ||
virSetCloseExec(*ttymaster) < 0)
goto cleanup;
/* While Linux supports tcgetattr on either the master or the
* slave, Solaris requires it to be on the slave. */
if (rawmode) {
struct termios ttyAttr;
if (tcgetattr(slave, &ttyAttr) < 0)
goto cleanup;
cfmakeraw(&ttyAttr);
if (tcsetattr(slave, TCSADRAIN, &ttyAttr) < 0)
goto cleanup;
}
/* ttyname_r on the slave is required by POSIX, while ptsname_r on
* the master is a glibc extension, and the POSIX ptsname is not
* thread-safe. Since openpty gave us both descriptors, guess
* which way we will determine the name? :) */
if (ttyName) {
/* Initial guess of 64 is generally sufficient; rely on ERANGE
* to tell us if we need to grow. */
size_t len = 64;
int rc;
if (VIR_ALLOC_N(name, len) < 0)
goto cleanup;
while ((rc = ttyname_r(slave, name, len)) == ERANGE) {
if (VIR_RESIZE_N(name, len, len, len) < 0)
goto cleanup;
}
if (rc != 0) {
errno = rc;
goto cleanup;
}
*ttyName = name;
name = NULL;
}
ret = 0;
cleanup:
if (ret != 0)
VIR_FORCE_CLOSE(*ttymaster);
VIR_FORCE_CLOSE(slave);
VIR_FREE(name);
return ret;
}
#else /* WIN32 */
int
virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED,
char **ttyName ATTRIBUTE_UNUSED,
int rawmode ATTRIBUTE_UNUSED)
{
/* mingw completely lacks pseudo-terminals, and the gnulib
* replacements are not (yet) license compatible. */
errno = ENOSYS;
return -1;
}
#endif /* WIN32 */
bool
virFileIsAbsPath(const char *path)
{
if (!path)
return false;
if (VIR_FILE_IS_DIR_SEPARATOR(path[0]))
return true;
#ifdef WIN32
if (c_isalpha(path[0]) &&
path[1] == ':' &&
VIR_FILE_IS_DIR_SEPARATOR(path[2]))
return true;
#endif
return false;
}
const char *
virFileSkipRoot(const char *path)
{
#ifdef WIN32
/* Skip \\server\share or //server/share */
if (VIR_FILE_IS_DIR_SEPARATOR(path[0]) &&
VIR_FILE_IS_DIR_SEPARATOR(path[1]) &&
path[2] &&
!VIR_FILE_IS_DIR_SEPARATOR(path[2]))
{
const char *p = strchr(path + 2, VIR_FILE_DIR_SEPARATOR);
const char *q = strchr(path + 2, '/');
if (p == NULL || (q != NULL && q < p))
p = q;
if (p && p > path + 2 && p[1]) {
path = p + 1;
while (path[0] &&
!VIR_FILE_IS_DIR_SEPARATOR(path[0]))
path++;
/* Possibly skip a backslash after the share name */
if (VIR_FILE_IS_DIR_SEPARATOR(path[0]))
path++;
return path;
}
}
#endif
/* Skip initial slashes */
if (VIR_FILE_IS_DIR_SEPARATOR(path[0])) {
while (VIR_FILE_IS_DIR_SEPARATOR(path[0]))
path++;
return path;
}
#ifdef WIN32
/* Skip X:\ */
if (c_isalpha(path[0]) &&
path[1] == ':' &&
VIR_FILE_IS_DIR_SEPARATOR(path[2]))
return path + 3;
#endif
return path;
}
/*
* Creates an absolute path for a potentially relative path.
* Return 0 if the path was not relative, or on success.
* Return -1 on error.
*
* You must free the result.
*/
int
virFileAbsPath(const char *path, char **abspath)
{
char *buf;
if (path[0] == '/') {
if (VIR_STRDUP(*abspath, path) < 0)
return -1;
} else {
buf = getcwd(NULL, 0);
if (buf == NULL)
return -1;
if (virAsprintf(abspath, "%s/%s", buf, path) < 0) {
VIR_FREE(buf);
return -1;
}
VIR_FREE(buf);
}
return 0;
}
/* Remove spurious / characters from a path. The result must be freed */
char *
virFileSanitizePath(const char *path)
{
const char *cur = path;
char *uri;
char *cleanpath;
int idx = 0;
if (VIR_STRDUP(cleanpath, path) < 0)
return NULL;
/* don't sanitize URIs - rfc3986 states that two slashes may lead to a
* different resource, thus removing them would possibly change the path */
if ((uri = strstr(path, "://")) && strchr(path, '/') > uri)
return cleanpath;
/* Need to sanitize:
* // -> //
* /// -> /
* /../foo -> /../foo
* /foo///bar/ -> /foo/bar
*/
/* Starting with // is valid posix, but ///foo == /foo */
if (cur[0] == '/' && cur[1] == '/' && cur[2] != '/') {
idx = 2;
cur += 2;
}
/* Sanitize path in place */
while (*cur != '\0') {
if (*cur != '/') {
cleanpath[idx++] = *cur++;
continue;
}
/* Skip all extra / */
while (*++cur == '/')
continue;
/* Don't add a trailing / */
if (idx != 0 && *cur == '\0')
break;
cleanpath[idx++] = '/';
}
cleanpath[idx] = '\0';
return cleanpath;
}
/**
* virFilePrintf:
*
* A replacement for fprintf() which uses virVasprintf to
* ensure that portable string format placeholders can be
* used, since gnulib's fprintf() replacement is not
* LGPLV2+ compatible
*/
int virFilePrintf(FILE *fp, const char *msg, ...)
{
va_list vargs;
char *str;
int ret;
va_start(vargs, msg);
if ((ret = virVasprintf(&str, msg, vargs)) < 0)
goto cleanup;
if (fwrite(str, 1, ret, fp) != ret) {
virReportSystemError(errno, "%s",
_("Could not write to stream"));
ret = -1;
}
VIR_FREE(str);
cleanup:
va_end(vargs);
return ret;
}
#ifdef __linux__
# ifndef NFS_SUPER_MAGIC
# define NFS_SUPER_MAGIC 0x6969
# endif
# ifndef OCFS2_SUPER_MAGIC
# define OCFS2_SUPER_MAGIC 0x7461636f
# endif
# ifndef GFS2_MAGIC
# define GFS2_MAGIC 0x01161970
# endif
# ifndef AFS_FS_MAGIC
# define AFS_FS_MAGIC 0x6B414653
# endif
# ifndef SMB_SUPER_MAGIC
# define SMB_SUPER_MAGIC 0x517B
# endif
# ifndef CIFS_SUPER_MAGIC
# define CIFS_SUPER_MAGIC 0xFF534D42
# endif
# ifndef HUGETLBFS_MAGIC
# define HUGETLBFS_MAGIC 0x958458f6
# endif
int
virFileIsSharedFSType(const char *path,
int fstypes)
{
char *dirpath, *p;
struct statfs sb;
int statfs_ret;
if (VIR_STRDUP(dirpath, path) < 0)
return -1;
do {
/* Try less and less of the path until we get to a
* directory we can stat. Even if we don't have 'x'
* permission on any directory in the path on the NFS
* server (assuming it's NFS), we will be able to stat the
* mount point, and that will properly tell us if the
* fstype is NFS.
*/
if ((p = strrchr(dirpath, '/')) == NULL) {
virReportSystemError(EINVAL,
_("Invalid relative path '%s'"), path);
VIR_FREE(dirpath);
return -1;
}
if (p == dirpath)
*(p+1) = '\0';
else
*p = '\0';
statfs_ret = statfs(dirpath, &sb);
} while ((statfs_ret < 0) && (p != dirpath));
VIR_FREE(dirpath);
if (statfs_ret < 0) {
virReportSystemError(errno,
_("cannot determine filesystem for '%s'"),
path);
return -1;
}
VIR_DEBUG("Check if path %s with FS magic %lld is shared",
path, (long long int)sb.f_type);
if ((fstypes & VIR_FILE_SHFS_NFS) &&
(sb.f_type == NFS_SUPER_MAGIC))
return 1;
if ((fstypes & VIR_FILE_SHFS_GFS2) &&
(sb.f_type == GFS2_MAGIC))
return 1;
if ((fstypes & VIR_FILE_SHFS_OCFS) &&
(sb.f_type == OCFS2_SUPER_MAGIC))
return 1;
if ((fstypes & VIR_FILE_SHFS_AFS) &&
(sb.f_type == AFS_FS_MAGIC))
return 1;
if ((fstypes & VIR_FILE_SHFS_SMB) &&
(sb.f_type == SMB_SUPER_MAGIC))
return 1;
if ((fstypes & VIR_FILE_SHFS_CIFS) &&
(sb.f_type == CIFS_SUPER_MAGIC))
return 1;
return 0;
}
int
virFileGetHugepageSize(const char *path,
unsigned long long *size)
{
int ret = -1;
struct statfs fs;
if (statfs(path, &fs) < 0) {
virReportSystemError(errno,
_("cannot determine filesystem for '%s'"),
path);
goto cleanup;
}
if (fs.f_type != HUGETLBFS_MAGIC) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("not a hugetlbfs mount: '%s'"),
path);
goto cleanup;
}
*size = fs.f_bsize / 1024; /* we are storing size in KiB */
ret = 0;
cleanup:
return ret;
}
# define PROC_MEMINFO "/proc/meminfo"
# define HUGEPAGESIZE_STR "Hugepagesize:"
static int
virFileGetDefaultHugepageSize(unsigned long long *size)
{
int ret = -1;
char *meminfo, *c, *n, *unit;
if (virFileReadAll(PROC_MEMINFO, 4096, &meminfo) < 0)
goto cleanup;
if (!(c = strstr(meminfo, HUGEPAGESIZE_STR))) {
virReportError(VIR_ERR_NO_SUPPORT,
_("%s not found in %s"),
HUGEPAGESIZE_STR,
PROC_MEMINFO);
goto cleanup;
}
c += strlen(HUGEPAGESIZE_STR);
if ((n = strchr(c, '\n'))) {
/* Cut off the rest of the meminfo file */
*n = '\0';
}
if (virStrToLong_ull(c, &unit, 10, size) < 0 || STRNEQ(unit, " kB")) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unable to parse %s %s"),
HUGEPAGESIZE_STR, c);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FREE(meminfo);
return ret;
}
# define PROC_MOUNTS "/proc/mounts"
int
virFileFindHugeTLBFS(virHugeTLBFSPtr *ret_fs,
size_t *ret_nfs)
{
int ret = -1;
FILE *f = NULL;
struct mntent mb;
char mntbuf[1024];
virHugeTLBFSPtr fs = NULL;
size_t nfs = 0;
unsigned long long default_hugepagesz = 0;
if (!(f = setmntent(PROC_MOUNTS, "r"))) {
virReportSystemError(errno,
_("Unable to open %s"),
PROC_MOUNTS);
goto cleanup;
}
while (getmntent_r(f, &mb, mntbuf, sizeof(mntbuf))) {
virHugeTLBFSPtr tmp;
if (STRNEQ(mb.mnt_type, "hugetlbfs"))
continue;
if (VIR_EXPAND_N(fs, nfs, 1) < 0)
goto cleanup;
tmp = &fs[nfs - 1];
if (VIR_STRDUP(tmp->mnt_dir, mb.mnt_dir) < 0)
goto cleanup;
if (virFileGetHugepageSize(tmp->mnt_dir, &tmp->size) < 0)
goto cleanup;
if (!default_hugepagesz &&
virFileGetDefaultHugepageSize(&default_hugepagesz) < 0)
goto cleanup;
tmp->deflt = tmp->size == default_hugepagesz;
}
*ret_fs = fs;
*ret_nfs = nfs;
fs = NULL;
nfs = 0;
ret = 0;
cleanup:
endmntent(f);
while (nfs)
VIR_FREE(fs[--nfs].mnt_dir);
VIR_FREE(fs);
return ret;
}
#else /* defined __linux__ */
int virFileIsSharedFSType(const char *path ATTRIBUTE_UNUSED,
int fstypes ATTRIBUTE_UNUSED)
{
/* XXX implement me :-) */
return 0;
}
int
virFileGetHugepageSize(const char *path ATTRIBUTE_UNUSED,
unsigned long long *size ATTRIBUTE_UNUSED)
{
/* XXX implement me :-) */
virReportUnsupportedError();
return -1;
}
int
virFileFindHugeTLBFS(virHugeTLBFSPtr *ret_fs ATTRIBUTE_UNUSED,
size_t *ret_nfs ATTRIBUTE_UNUSED)
{
/* XXX implement me :-) */
virReportUnsupportedError();
return -1;
}
#endif /* defined __linux__ */
int virFileIsSharedFS(const char *path)
{
return virFileIsSharedFSType(path,
VIR_FILE_SHFS_NFS |
VIR_FILE_SHFS_GFS2 |
VIR_FILE_SHFS_OCFS |
VIR_FILE_SHFS_AFS |
VIR_FILE_SHFS_SMB |
VIR_FILE_SHFS_CIFS);
}