libvirt/src/util/virutil.c

1978 lines
50 KiB
C
Raw Normal View History

/*
2012-12-13 17:44:57 +00:00
* virutil.c: common, generic utility functions
*
* Copyright (C) 2006-2014 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
* Copyright (C) 2006, 2007 Binary Karma
* Copyright (C) 2006 Shuveb Hussain
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#ifdef WIN32
# include <conio.h>
#endif /* WIN32 */
#ifdef __linux__
# include <sys/sysmacros.h>
#endif
#include <sys/types.h>
2009-01-22 19:41:48 +00:00
#ifdef HAVE_GETPWUID_R
# include <pwd.h>
# include <grp.h>
2009-01-22 19:41:48 +00:00
#endif
#if WITH_CAPNG
# include <cap-ng.h>
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
# include <sys/prctl.h>
#endif
#include "virerror.h"
2012-12-12 17:59:27 +00:00
#include "virlog.h"
#include "virbuffer.h"
2012-12-12 18:06:53 +00:00
#include "viralloc.h"
#include "virfile.h"
#include "vircommand.h"
#include "virprocess.h"
#include "virstring.h"
#include "virutil.h"
#include "virsocket.h"
G_STATIC_ASSERT(sizeof(gid_t) <= sizeof(unsigned int) &&
sizeof(uid_t) <= sizeof(unsigned int));
#define VIR_FROM_THIS VIR_FROM_NONE
VIR_LOG_INIT("util.util");
#ifndef WIN32
int virSetInherit(int fd, bool inherit)
{
int fflags;
if ((fflags = fcntl(fd, F_GETFD)) < 0)
return -1;
if (inherit)
fflags &= ~FD_CLOEXEC;
else
fflags |= FD_CLOEXEC;
if ((fcntl(fd, F_SETFD, fflags)) < 0)
return -1;
return 0;
}
int virSetBlocking(int fd, bool block)
{
int fflags;
if ((fflags = fcntl(fd, F_GETFL)) < 0)
return -1;
if (block)
fflags &= ~O_NONBLOCK;
else
fflags |= O_NONBLOCK;
if ((fcntl(fd, F_SETFL, fflags)) < 0)
return -1;
return 0;
}
#else /* WIN32 */
int virSetInherit(int fd G_GNUC_UNUSED, bool inherit G_GNUC_UNUSED)
{
/* FIXME: Currently creating child processes is not supported on
* Win32, so there is no point in failing calls that are only relevant
* when creating child processes. So just pretend that we changed the
* inheritance property of the given fd as requested. */
return 0;
}
int virSetBlocking(int fd, bool blocking)
{
unsigned long arg = blocking ? 0 : 1;
if (ioctlsocket(fd, FIONBIO, &arg) < 0)
return -1;
return 0;
}
#endif /* WIN32 */
int virSetNonBlock(int fd)
{
return virSetBlocking(fd, false);
}
int virSetCloseExec(int fd)
{
return virSetInherit(fd, false);
}
#ifdef WIN32
int virSetSockReuseAddr(int fd G_GNUC_UNUSED, bool fatal G_GNUC_UNUSED)
{
/*
* SO_REUSEADDR on Windows is actually akin to SO_REUSEPORT
* on Linux/BSD. ie it allows 2 apps to listen to the same
* port at once which is certainly not what we want here.
*
* Win32 sockets have Linux/BSD-like SO_REUSEADDR behaviour
* by default, so we can be a no-op.
*
* http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx
*/
return 0;
}
#else
int virSetSockReuseAddr(int fd, bool fatal)
{
int opt = 1;
int ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
if (ret < 0 && fatal) {
virReportSystemError(errno, "%s",
_("Unable to set socket reuse addr flag"));
}
return ret;
}
#endif
/* Scale an integer VALUE in-place by an optional case-insensitive
* SUFFIX, defaulting to SCALE if suffix is NULL or empty (scale is
* typically 1 or 1024). Recognized suffixes include 'b' or 'bytes',
* as well as power-of-two scaling via binary abbreviations ('KiB',
* 'MiB', ...) or their one-letter counterpart ('k', 'M', ...), and
* power-of-ten scaling via SI abbreviations ('KB', 'MB', ...).
* Ensure that the result does not exceed LIMIT. Return 0 on success,
* -1 with error message raised on failure. */
int
virScaleInteger(unsigned long long *value, const char *suffix,
unsigned long long scale, unsigned long long limit)
{
if (!suffix || !*suffix) {
if (!scale) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("invalid scale %llu"), scale);
return -1;
}
suffix = "";
} else if (STRCASEEQ(suffix, "b") || STRCASEEQ(suffix, "byte") ||
STRCASEEQ(suffix, "bytes")) {
scale = 1;
} else {
int base;
if (!suffix[1] || STRCASEEQ(suffix + 1, "iB")) {
base = 1024;
} else if (g_ascii_tolower(suffix[1]) == 'b' && !suffix[2]) {
base = 1000;
} else {
virReportError(VIR_ERR_INVALID_ARG,
_("unknown suffix '%s'"), suffix);
return -1;
}
scale = 1;
switch (g_ascii_tolower(*suffix)) {
case 'e':
scale *= base;
G_GNUC_FALLTHROUGH;
case 'p':
scale *= base;
G_GNUC_FALLTHROUGH;
case 't':
scale *= base;
G_GNUC_FALLTHROUGH;
case 'g':
scale *= base;
G_GNUC_FALLTHROUGH;
case 'm':
scale *= base;
G_GNUC_FALLTHROUGH;
case 'k':
scale *= base;
break;
default:
virReportError(VIR_ERR_INVALID_ARG,
_("unknown suffix '%s'"), suffix);
return -1;
}
}
if (*value && *value > (limit / scale)) {
virReportError(VIR_ERR_OVERFLOW, _("value too large: %llu%s"),
*value, suffix);
return -1;
}
*value *= scale;
return 0;
}
/**
* virParseVersionString:
* @str: const char pointer to the version string
* @version: unsigned long pointer to output the version number
* @allowMissing: true to treat 3 like 3.0.0, false to error out on
* missing minor or micro
*
* Parse an unsigned version number from a version string. Expecting
* 'major.minor.micro' format, ignoring an optional suffix.
*
* The major, minor and micro numbers are encoded into a single version number:
*
* 1000000 * major + 1000 * minor + micro
*
* Returns the 0 for success, -1 for error.
*/
int
virParseVersionString(const char *str, unsigned long *version,
bool allowMissing)
{
unsigned int major, minor = 0, micro = 0;
char *tmp;
if (virStrToLong_ui(str, &tmp, 10, &major) < 0)
return -1;
if (!allowMissing && *tmp != '.')
return -1;
if ((*tmp == '.') && virStrToLong_ui(tmp + 1, &tmp, 10, &minor) < 0)
return -1;
if (!allowMissing && *tmp != '.')
return -1;
if ((*tmp == '.') && virStrToLong_ui(tmp + 1, &tmp, 10, &micro) < 0)
return -1;
if (major > UINT_MAX / 1000000 || minor > 999 || micro > 999)
return -1;
*version = 1000000 * major + 1000 * minor + micro;
return 0;
}
/**
* Format @val as a base-10 decimal number, in the
* buffer @buf of size @buflen. To allocate a suitable
* sized buffer, the INT_BUFLEN(int) macro should be
* used
*
* Returns pointer to start of the number in @buf
*/
char *
virFormatIntDecimal(char *buf, size_t buflen, int val)
{
char *p = buf + buflen - 1;
*p = '\0';
if (val >= 0) {
do {
*--p = '0' + (val % 10);
val /= 10;
} while (val != 0);
} else {
do {
*--p = '0' - (val % 10);
val /= 10;
} while (val != 0);
*--p = '-';
}
return p;
}
/**
* virFormatIntPretty
*
* @val: Value in bytes to be shortened
* @unit: unit to be used
*
* Similar to vshPrettyCapacity, but operates on integers and not doubles
*
* NB: Since using unsigned long long, we are limited to at most a "PiB"
* to make pretty. This is because a PiB is 1152921504606846976 bytes,
* but that value * 1024 > ULLONG_MAX value 18446744073709551615 bytes.
*
* Returns shortened value that can be used with @unit.
*/
unsigned long long
virFormatIntPretty(unsigned long long val,
const char **unit)
{
unsigned long long limit = 1024;
if (val % limit || val == 0) {
*unit = "B";
return val;
}
limit *= 1024;
if (val % limit) {
*unit = "KiB";
return val / (limit / 1024);
}
limit *= 1024;
if (val % limit) {
*unit = "MiB";
return val / (limit / 1024);
}
limit *= 1024;
if (val % limit) {
*unit = "GiB";
return val / (limit / 1024);
}
limit *= 1024;
if (val % limit) {
*unit = "TiB";
return val / (limit / 1024);
}
limit *= 1024;
*unit = "PiB";
return val / (limit / 1024);
}
/* Translates a device name of the form (regex) /^[fhv]d[a-z]+[0-9]*$/
* into the corresponding index and partition number
* (e.g. sda0 => (0,0), hdz2 => (25,2), vdaa12 => (26,12))
* @param name The name of the device
* @param disk The disk index to be returned
* @param partition The partition index to be returned
* @return 0 on success, or -1 on failure
*/
int virDiskNameParse(const char *name, int *disk, int *partition)
{
const char *ptr = NULL;
char *rem;
int idx = 0;
static char const* const drive_prefix[] = {"fd", "hd", "vd", "sd", "xvd", "ubd"};
size_t i;
for (i = 0; i < G_N_ELEMENTS(drive_prefix); i++) {
if (STRPREFIX(name, drive_prefix[i])) {
ptr = name + strlen(drive_prefix[i]);
break;
}
}
if (!ptr || !g_ascii_islower(*ptr))
return -1;
for (i = 0; *ptr; i++) {
if (!g_ascii_islower(*ptr))
break;
idx = (idx + (i < 1 ? 0 : 1)) * 26;
idx += *ptr - 'a';
ptr++;
}
/* Count the trailing digits. */
size_t n_digits = strspn(ptr, "0123456789");
if (ptr[n_digits] != '\0')
return -1;
*disk = idx;
/* Convert trailing digits into our partition index */
if (partition) {
*partition = 0;
/* Shouldn't start by zero */
if (n_digits > 1 && *ptr == '0')
return -1;
if (n_digits && virStrToLong_i(ptr, &rem, 10, partition) < 0)
return -1;
}
return 0;
}
/* Translates a device name of the form (regex) /^[fhv]d[a-z]+[0-9]*$/
* into the corresponding index (e.g. sda => 0, hdz => 25, vdaa => 26)
* Note that any trailing string of digits is simply ignored.
* @param name The name of the device
* @return name's index, or -1 on failure
*/
int virDiskNameToIndex(const char *name)
{
int idx;
if (virDiskNameParse(name, &idx, NULL) < 0)
idx = -1;
return idx;
}
2009-01-06 17:46:46 +00:00
char *virIndexToDiskName(int idx, const char *prefix)
{
char *name = NULL;
size_t i;
int ctr;
int offset;
if (idx < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Disk index %d is negative"), idx);
return NULL;
}
for (i = 0, ctr = idx; ctr >= 0; ++i, ctr = ctr / 26 - 1) { }
offset = strlen(prefix);
if (VIR_ALLOC_N(name, offset + i + 1))
return NULL;
strcpy(name, prefix);
name[offset + i] = '\0';
for (i = i - 1, ctr = idx; ctr >= 0; --i, ctr = ctr / 26 - 1)
name[offset + i] = 'a' + (ctr % 26);
return name;
}
#ifndef AI_CANONIDN
# define AI_CANONIDN 0
#endif
#ifndef HOST_NAME_MAX
# define HOST_NAME_MAX 256
#endif
/* Who knew getting a hostname could be so delicate. In Linux (and Unices
* in general), many things depend on "hostname" returning a value that will
* resolve one way or another. In the modern world where networks frequently
* come and go this is often being hard-coded to resolve to "localhost". If
* it *doesn't* resolve to localhost, then we would prefer to have the FQDN.
* That leads us to 3 possibilities:
*
* 1) gethostname() returns an FQDN (not localhost) - we return the string
* as-is, it's all of the information we want
* 2) gethostname() returns "localhost" - we return localhost; doing further
* work to try to resolve it is pointless
* 3) gethostname() returns a shortened hostname - in this case, we want to
* try to resolve this to a fully-qualified name. Therefore we pass it
* to getaddrinfo(). There are two possible responses:
* a) getaddrinfo() resolves to a FQDN - return the FQDN
* b) getaddrinfo() fails or resolves to localhost - in this case, the
* data we got from gethostname() is actually more useful than what
* we got from getaddrinfo(). Return the value from gethostname()
* and hope for the best.
*/
static char *
virGetHostnameImpl(bool quiet)
{
int r;
const char *hostname;
char *result = NULL;
struct addrinfo hints, *info;
hostname = g_get_host_name();
if (STRPREFIX(hostname, "localhost") || strchr(hostname, '.')) {
/* in this case, gethostname returned localhost (meaning we can't
* do any further canonicalization), or it returned an FQDN (and
* we don't need to do any further canonicalization). Return the
* string as-is; it's up to callers to check whether "localhost"
* is allowed.
*/
result = g_strdup(hostname);
goto cleanup;
}
/* otherwise, it's a shortened, non-localhost, hostname. Attempt to
* canonicalize the hostname by running it through getaddrinfo
*/
memset(&hints, 0, sizeof(hints));
hints.ai_flags = AI_CANONNAME|AI_CANONIDN;
hints.ai_family = AF_UNSPEC;
r = getaddrinfo(hostname, NULL, &hints, &info);
if (r != 0) {
if (!quiet)
VIR_WARN("getaddrinfo failed for '%s': %s",
hostname, gai_strerror(r));
result = g_strdup(hostname);
goto cleanup;
}
/* Tell static analyzers about getaddrinfo semantics. */
sa_assert(info);
if (info->ai_canonname == NULL ||
STRPREFIX(info->ai_canonname, "localhost"))
/* in this case, we tried to canonicalize and we ended up back with
* localhost. Ignore the canonicalized name and just return the
* original hostname
*/
result = g_strdup(hostname);
else
/* Caller frees this string. */
result = g_strdup(info->ai_canonname);
freeaddrinfo(info);
cleanup:
if (!result)
virReportOOMError();
return result;
}
char *
virGetHostname(void)
{
return virGetHostnameImpl(false);
}
char *
virGetHostnameQuiet(void)
{
return virGetHostnameImpl(true);
}
char *
virGetUserDirectory(void)
{
return g_strdup(g_get_home_dir());
}
char *virGetUserConfigDirectory(void)
{
#ifdef WIN32
return g_strdup(g_get_user_config_dir());
#else
return g_build_filename(g_get_user_config_dir(), "libvirt", NULL);
#endif
}
char *virGetUserCacheDirectory(void)
{
#ifdef WIN32
return g_strdup(g_get_user_cache_dir());
#else
return g_build_filename(g_get_user_cache_dir(), "libvirt", NULL);
#endif
}
char *virGetUserRuntimeDirectory(void)
{
#ifdef WIN32
return g_strdup(g_get_user_runtime_dir());
#else
return g_build_filename(g_get_user_runtime_dir(), "libvirt", NULL);
#endif
}
2009-01-22 19:41:48 +00:00
#ifdef HAVE_GETPWUID_R
/* Look up fields from the user database for the given user. On
* error, set errno, report the error if not instructed otherwise via @quiet,
* and return -1. */
static int
virGetUserEnt(uid_t uid, char **name, gid_t *group, char **dir, char **shell, bool quiet)
2009-01-22 19:41:48 +00:00
{
char *strbuf;
struct passwd pwbuf;
struct passwd *pw = NULL;
long val = sysconf(_SC_GETPW_R_SIZE_MAX);
size_t strbuflen = val;
int rc;
int ret = -1;
if (name)
*name = NULL;
if (dir)
*dir = NULL;
if (shell)
*shell = NULL;
/* sysconf is a hint; if it fails, fall back to a reasonable size */
if (val < 0)
strbuflen = 1024;
2009-01-22 19:41:48 +00:00
if (VIR_ALLOC_N(strbuf, strbuflen) < 0)
return -1;
2009-01-22 19:41:48 +00:00
/*
* From the manpage (terrifying but true):
*
* ERRORS
* 0 or ENOENT or ESRCH or EBADF or EPERM or ...
* The given name or uid was not found.
*/
while ((rc = getpwuid_r(uid, &pwbuf, strbuf, strbuflen, &pw)) == ERANGE) {
if (VIR_RESIZE_N(strbuf, strbuflen, strbuflen, strbuflen) < 0)
goto cleanup;
}
if (rc != 0) {
if (quiet)
goto cleanup;
virReportSystemError(rc,
_("Failed to find user record for uid '%u'"),
(unsigned int) uid);
goto cleanup;
} else if (pw == NULL) {
if (quiet)
goto cleanup;
virReportError(VIR_ERR_SYSTEM_ERROR,
_("Failed to find user record for uid '%u'"),
(unsigned int) uid);
goto cleanup;
}
if (name)
*name = g_strdup(pw->pw_name);
if (group)
*group = pw->pw_gid;
if (dir)
*dir = g_strdup(pw->pw_dir);
if (shell)
*shell = g_strdup(pw->pw_shell);
2009-01-22 19:41:48 +00:00
ret = 0;
cleanup:
if (ret < 0) {
if (name)
VIR_FREE(*name);
if (dir)
VIR_FREE(*dir);
if (shell)
VIR_FREE(*shell);
}
2009-01-22 19:41:48 +00:00
VIR_FREE(strbuf);
return ret;
}
static char *virGetGroupEnt(gid_t gid)
{
char *strbuf;
char *ret;
struct group grbuf;
struct group *gr = NULL;
long val = sysconf(_SC_GETGR_R_SIZE_MAX);
size_t strbuflen = val;
int rc;
/* sysconf is a hint; if it fails, fall back to a reasonable size */
if (val < 0)
strbuflen = 1024;
if (VIR_ALLOC_N(strbuf, strbuflen) < 0)
return NULL;
/*
* From the manpage (terrifying but true):
*
* ERRORS
* 0 or ENOENT or ESRCH or EBADF or EPERM or ...
* The given name or gid was not found.
*/
while ((rc = getgrgid_r(gid, &grbuf, strbuf, strbuflen, &gr)) == ERANGE) {
if (VIR_RESIZE_N(strbuf, strbuflen, strbuflen, strbuflen) < 0) {
VIR_FREE(strbuf);
return NULL;
}
}
if (rc != 0 || gr == NULL) {
if (rc != 0) {
virReportSystemError(rc,
_("Failed to find group record for gid '%u'"),
(unsigned int) gid);
} else {
virReportError(VIR_ERR_SYSTEM_ERROR,
_("Failed to find group record for gid '%u'"),
(unsigned int) gid);
}
VIR_FREE(strbuf);
return NULL;
}
ret = g_strdup(gr->gr_name);
VIR_FREE(strbuf);
return ret;
}
char *
virGetUserDirectoryByUID(uid_t uid)
{
char *ret;
virGetUserEnt(uid, NULL, NULL, &ret, NULL, false);
return ret;
}
char *virGetUserShell(uid_t uid)
{
char *ret;
virGetUserEnt(uid, NULL, NULL, NULL, &ret, false);
return ret;
}
char *virGetUserName(uid_t uid)
{
char *ret;
virGetUserEnt(uid, &ret, NULL, NULL, NULL, false);
return ret;
}
char *virGetGroupName(gid_t gid)
{
return virGetGroupEnt(gid);
}
/* Search in the password database for a user id that matches the user name
* `name`. Returns 0 on success, -1 on failure or 1 if name cannot be found.
*
* Warns if @missing_ok is false
*/
static int
virGetUserIDByName(const char *name, uid_t *uid, bool missing_ok)
{
char *strbuf = NULL;
struct passwd pwbuf;
struct passwd *pw = NULL;
long val = sysconf(_SC_GETPW_R_SIZE_MAX);
size_t strbuflen = val;
int rc;
int ret = -1;
/* sysconf is a hint; if it fails, fall back to a reasonable size */
if (val < 0)
strbuflen = 1024;
if (VIR_ALLOC_N(strbuf, strbuflen) < 0)
goto cleanup;
while ((rc = getpwnam_r(name, &pwbuf, strbuf, strbuflen, &pw)) == ERANGE) {
if (VIR_RESIZE_N(strbuf, strbuflen, strbuflen, strbuflen) < 0)
goto cleanup;
}
if (!pw) {
if (rc != 0 && !missing_ok) {
/* log the possible error from getpwnam_r. Unfortunately error
* reporting from this function is bad and we can't really
* rely on it, so we just report that the user wasn't found */
VIR_WARN("User record for user '%s' was not found: %s",
name, g_strerror(rc));
}
ret = 1;
goto cleanup;
}
if (uid)
*uid = pw->pw_uid;
ret = 0;
cleanup:
VIR_FREE(strbuf);
return ret;
}
/* Try to match a user id based on `user`. The default behavior is to parse
* `user` first as a user name and then as a user id. However if `user`
* contains a leading '+', the rest of the string is always parsed as a uid.
*
* Returns 0 on success and -1 otherwise.
*/
int
virGetUserID(const char *user, uid_t *uid)
{
unsigned int uint_uid;
if (*user == '+') {
user++;
} else {
int rc = virGetUserIDByName(user, uid, false);
if (rc <= 0)
return rc;
}
if (virStrToLong_ui(user, NULL, 10, &uint_uid) < 0 ||
((uid_t) uint_uid) != uint_uid) {
virReportError(VIR_ERR_INVALID_ARG, _("Failed to parse user '%s'"),
user);
return -1;
}
*uid = uint_uid;
return 0;
}
/* Search in the group database for a group id that matches the group name
* `name`. Returns 0 on success, -1 on failure or 1 if name cannot be found.
*
* Warns if @missing_ok is false
*/
static int
virGetGroupIDByName(const char *name, gid_t *gid, bool missing_ok)
{
char *strbuf = NULL;
struct group grbuf;
struct group *gr = NULL;
long val = sysconf(_SC_GETGR_R_SIZE_MAX);
size_t strbuflen = val;
int rc;
int ret = -1;
/* sysconf is a hint; if it fails, fall back to a reasonable size */
if (val < 0)
strbuflen = 1024;
if (VIR_ALLOC_N(strbuf, strbuflen) < 0)
goto cleanup;
while ((rc = getgrnam_r(name, &grbuf, strbuf, strbuflen, &gr)) == ERANGE) {
if (VIR_RESIZE_N(strbuf, strbuflen, strbuflen, strbuflen) < 0)
goto cleanup;
}
if (!gr) {
if (rc != 0 && !missing_ok) {
/* log the possible error from getgrnam_r. Unfortunately error
* reporting from this function is bad and we can't really
* rely on it, so we just report that the user wasn't found */
VIR_WARN("Group record for user '%s' was not found: %s",
name, g_strerror(rc));
}
ret = 1;
goto cleanup;
}
if (gid)
*gid = gr->gr_gid;
ret = 0;
cleanup:
VIR_FREE(strbuf);
return ret;
}
/* Try to match a group id based on `group`. The default behavior is to parse
* `group` first as a group name and then as a group id. However if `group`
* contains a leading '+', the rest of the string is always parsed as a guid.
*
* Returns 0 on success and -1 otherwise.
*/
int
virGetGroupID(const char *group, gid_t *gid)
{
unsigned int uint_gid;
if (*group == '+') {
group++;
} else {
int rc = virGetGroupIDByName(group, gid, false);
if (rc <= 0)
return rc;
}
if (virStrToLong_ui(group, NULL, 10, &uint_gid) < 0 ||
((gid_t) uint_gid) != uint_gid) {
virReportError(VIR_ERR_INVALID_ARG, _("Failed to parse group '%s'"),
group);
return -1;
}
*gid = uint_gid;
return 0;
}
/* Silently checks if User @name exists.
* Returns if the user exists and fallbacks to false on error.
*/
bool
virDoesUserExist(const char *name)
{
return virGetUserIDByName(name, NULL, true) == 0;
}
/* Silently checks if Group @name exists.
* Returns if the group exists and fallbacks to false on error.
*/
bool
virDoesGroupExist(const char *name)
{
return virGetGroupIDByName(name, NULL, true) == 0;
}
/* Work around an incompatibility of OS X 10.11: getgrouplist
accepts int *, not gid_t *, and int and gid_t differ in sign. */
VIR_WARNINGS_NO_POINTER_SIGN
/* Compute the list of primary and supplementary groups associated
* with @uid, and including @gid in the list (unless it is -1),
* storing a malloc'd result into @list. If uid is -1 or doesn't exist in the
* system database querying of the supplementary groups is skipped.
*
* Returns the size of the list on success, or -1 on failure with error
* reported and errno set. May not be called between fork and exec.
* */
int
virGetGroupList(uid_t uid, gid_t gid, gid_t **list)
{
int ret = 0;
char *user = NULL;
gid_t primary;
*list = NULL;
/* invalid users have no supplementary groups */
if (uid != (uid_t)-1 &&
virGetUserEnt(uid, &user, &primary, NULL, NULL, true) >= 0) {
int nallocgrps = 10;
gid_t *grps = g_new(gid_t, nallocgrps);
while (1) {
int nprevallocgrps = nallocgrps;
int rv;
rv = getgrouplist(user, primary, grps, &nallocgrps);
/* Some systems (like Darwin) have a bug where they
never increase max_n_groups. */
if (rv < 0 && nprevallocgrps == nallocgrps)
nallocgrps *= 2;
/* either shrinks to actual size, or enlarges to new size */
grps = g_renew(gid_t, grps, nallocgrps);
if (rv >= 0) {
ret = rv;
*list = grps;
break;
}
}
}
if (gid != (gid_t)-1) {
size_t i;
for (i = 0; i < ret; i++) {
if ((*list)[i] == gid)
goto cleanup;
}
if (VIR_APPEND_ELEMENT(*list, i, gid) < 0) {
ret = -1;
VIR_FREE(*list);
goto cleanup;
} else {
ret = i;
}
}
cleanup:
VIR_FREE(user);
return ret;
}
VIR_WARNINGS_RESET
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
/* Set the real and effective uid and gid to the given values, as well
* as all the supplementary groups, so that the process has all the
* assumed group membership of that uid. Return 0 on success, -1 on
* failure (the original system error remains in errno).
*/
int
virSetUIDGID(uid_t uid, gid_t gid, gid_t *groups G_GNUC_UNUSED,
int ngroups G_GNUC_UNUSED)
{
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (gid != (gid_t)-1 && setregid(gid, gid) < 0) {
virReportSystemError(errno,
_("cannot change to '%u' group"),
(unsigned int) gid);
return -1;
}
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
# if HAVE_SETGROUPS
if (gid != (gid_t)-1 && setgroups(ngroups, groups) < 0) {
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
virReportSystemError(errno, "%s",
_("cannot set supplemental groups"));
return -1;
}
# endif
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (uid != (uid_t)-1 && setreuid(uid, uid) < 0) {
virReportSystemError(errno,
_("cannot change to uid to '%u'"),
(unsigned int) uid);
return -1;
}
return 0;
}
#else /* ! HAVE_GETPWUID_R */
int
virGetGroupList(uid_t uid G_GNUC_UNUSED, gid_t gid G_GNUC_UNUSED,
gid_t **list)
{
*list = NULL;
return 0;
}
bool
virDoesUserExist(const char *name G_GNUC_UNUSED)
{
return false;
}
bool
virDoesGroupExist(const char *name G_GNUC_UNUSED)
{
return false;
}
# ifdef WIN32
char *
virGetUserDirectoryByUID(uid_t uid G_GNUC_UNUSED)
{
/* Since Windows lacks setuid binaries, and since we already fake
* geteuid(), we can safely assume that this is only called when
* querying about the current user */
return g_strdup(g_get_home_dir());
}
char *
virGetUserShell(uid_t uid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetUserShell is not available"));
return NULL;
}
# else /* !HAVE_GETPWUID_R && !WIN32 */
char *
virGetUserDirectoryByUID(uid_t uid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetUserDirectory is not available"));
return NULL;
}
char *
virGetUserShell(uid_t uid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetUserShell is not available"));
return NULL;
}
# endif /* ! HAVE_GETPWUID_R && ! WIN32 */
char *
virGetUserName(uid_t uid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetUserName is not available"));
return NULL;
}
int virGetUserID(const char *name G_GNUC_UNUSED,
uid_t *uid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetUserID is not available"));
return -1;
}
int virGetGroupID(const char *name G_GNUC_UNUSED,
gid_t *gid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetGroupID is not available"));
return -1;
}
int
virSetUIDGID(uid_t uid G_GNUC_UNUSED,
gid_t gid G_GNUC_UNUSED,
gid_t *groups G_GNUC_UNUSED,
int ngroups G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virSetUIDGID is not available"));
return -1;
}
char *
virGetGroupName(gid_t gid G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("virGetGroupName is not available"));
return NULL;
}
#endif /* HAVE_GETPWUID_R */
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
#if WITH_CAPNG
/* Set the real and effective uid and gid to the given values, while
* maintaining the capabilities indicated by bits in @capBits. Return
* 0 on success, -1 on failure (the original system error remains in
* errno).
*/
int
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
virSetUIDGIDWithCaps(uid_t uid, gid_t gid, gid_t *groups, int ngroups,
unsigned long long capBits, bool clearExistingCaps)
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
{
size_t i;
int capng_ret;
bool need_setgid = false;
bool need_setuid = false;
bool need_setpcap = false;
const char *capstr = NULL;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
/* First drop all caps (unless the requested uid is "unchanged" or
* root and clearExistingCaps wasn't requested), then add back
* those in capBits + the extra ones we need to change uid/gid and
* change the capabilities bounding set.
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
*/
if (clearExistingCaps || (uid != (uid_t)-1 && uid != 0))
capng_clear(CAPNG_SELECT_BOTH);
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
for (i = 0; i <= CAP_LAST_CAP; i++) {
capstr = capng_capability_to_name(i);
if (capBits & (1ULL << i)) {
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
capng_update(CAPNG_ADD,
CAPNG_EFFECTIVE|CAPNG_INHERITABLE|
CAPNG_PERMITTED|CAPNG_BOUNDING_SET,
i);
VIR_DEBUG("Added '%s' to child capabilities' set", capstr);
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
}
if (gid != (gid_t)-1 &&
!capng_have_capability(CAPNG_EFFECTIVE, CAP_SETGID)) {
need_setgid = true;
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETGID);
}
if (uid != (uid_t)-1 &&
!capng_have_capability(CAPNG_EFFECTIVE, CAP_SETUID)) {
need_setuid = true;
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETUID);
}
# ifdef PR_CAPBSET_DROP
/* If newer kernel, we need also need setpcap to change the bounding set */
if ((capBits || need_setgid || need_setuid) &&
!capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) {
need_setpcap = true;
}
if (need_setpcap)
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETPCAP);
# endif
/* Tell system we want to keep caps across uid change */
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0)) {
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
virReportSystemError(errno, "%s",
_("prctl failed to set KEEPCAPS"));
return -1;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
/* Change to the temp capabilities */
if ((capng_ret = capng_apply(CAPNG_SELECT_CAPS)) < 0) {
/* Failed. If we are running unprivileged, and the arguments make sense
* for this scenario, assume we're starting some kind of setuid helper:
* do not set any of capBits in the permitted or effective sets, and let
* the program get them on its own.
*
* (Too bad we cannot restrict the bounding set to the capabilities we
* would like the helper to have!).
*/
if (getuid() > 0 && clearExistingCaps && !need_setuid && !need_setgid) {
capng_clear(CAPNG_SELECT_CAPS);
} else {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot apply process capabilities %d"), capng_ret);
return -1;
}
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
if (virSetUIDGID(uid, gid, groups, ngroups) < 0)
return -1;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
/* Tell it we are done keeping capabilities */
if (prctl(PR_SET_KEEPCAPS, 0, 0, 0, 0)) {
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
virReportSystemError(errno, "%s",
_("prctl failed to reset KEEPCAPS"));
return -1;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
# ifdef PR_CAP_AMBIENT
/* we couldn't do this in the loop earlier above, because the capabilities
* were not applied yet, since in order to add a capability into the AMBIENT
* set, it has to be present in both the PERMITTED and INHERITABLE sets
* (capabilities(7))
*/
for (i = 0; i <= CAP_LAST_CAP; i++) {
capstr = capng_capability_to_name(i);
if (capBits & (1ULL << i)) {
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0) {
virReportSystemError(errno,
_("prctl failed to enable '%s' in the "
"AMBIENT set"),
capstr);
return -1;
}
}
}
# endif
/* Set bounding set while we have CAP_SETPCAP. Unfortunately we cannot
* do this if we failed to get the capability above, so ignore the
* return value.
*/
capng_apply(CAPNG_SELECT_BOUNDS);
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
/* Drop the caps that allow setuid/gid (unless they were requested) */
if (need_setgid)
capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETGID);
if (need_setuid)
capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETUID);
/* Throw away CAP_SETPCAP so no more changes */
if (need_setpcap)
capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETPCAP);
if (((capng_ret = capng_apply(CAPNG_SELECT_CAPS)) < 0)) {
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot apply process capabilities %d"), capng_ret);
return -1;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
return 0;
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
#else
/*
* On platforms without libcapng, the capabilities setting is treated
* as a NOP.
*/
int
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
virSetUIDGIDWithCaps(uid_t uid, gid_t gid, gid_t *groups, int ngroups,
unsigned long long capBits G_GNUC_UNUSED,
bool clearExistingCaps G_GNUC_UNUSED)
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
{
util: make virSetUIDGID async-signal-safe https://bugzilla.redhat.com/show_bug.cgi?id=964358 POSIX states that multi-threaded apps should not use functions that are not async-signal-safe between fork and exec, yet we were using getpwuid_r and initgroups. Although rare, it is possible to hit deadlock in the child, when it tries to grab a mutex that was already held by another thread in the parent. I actually hit this deadlock when testing multiple domains being started in parallel with a command hook, with the following backtrace in the child: Thread 1 (Thread 0x7fd56bbf2700 (LWP 3212)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fd5761e7388 in _L_lock_854 () from /lib64/libpthread.so.0 #2 0x00007fd5761e7257 in __pthread_mutex_lock (mutex=0x7fd56be00360) at pthread_mutex_lock.c:61 #3 0x00007fd56bbf9fc5 in _nss_files_getpwuid_r (uid=0, result=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, errnop=0x7fd56bbf25b8) at nss_files/files-pwd.c:40 #4 0x00007fd575aeff1d in __getpwuid_r (uid=0, resbuf=0x7fd56bbf0c70, buffer=0x7fd55c2a65f0 "", buflen=1024, result=0x7fd56bbf0cb0) at ../nss/getXXbyYY_r.c:253 #5 0x00007fd578aebafc in virSetUIDGID (uid=0, gid=0) at util/virutil.c:1031 #6 0x00007fd578aebf43 in virSetUIDGIDWithCaps (uid=0, gid=0, capBits=0, clearExistingCaps=true) at util/virutil.c:1388 #7 0x00007fd578a9a20b in virExec (cmd=0x7fd55c231f10) at util/vircommand.c:654 #8 0x00007fd578a9dfa2 in virCommandRunAsync (cmd=0x7fd55c231f10, pid=0x0) at util/vircommand.c:2247 #9 0x00007fd578a9d74e in virCommandRun (cmd=0x7fd55c231f10, exitstatus=0x0) at util/vircommand.c:2100 #10 0x00007fd56326fde5 in qemuProcessStart (conn=0x7fd53c000df0, driver=0x7fd55c0dc4f0, vm=0x7fd54800b100, migrateFrom=0x0, stdin_fd=-1, stdin_path=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=1) at qemu/qemu_process.c:3694 ... The solution is to split the work of getpwuid_r/initgroups into the unsafe portions (getgrouplist, called pre-fork) and safe portions (setgroups, called post-fork). * src/util/virutil.h (virSetUIDGID, virSetUIDGIDWithCaps): Adjust signature. * src/util/virutil.c (virSetUIDGID): Add parameters. (virSetUIDGIDWithCaps): Adjust clients. * src/util/vircommand.c (virExec): Likewise. * src/util/virfile.c (virFileAccessibleAs, virFileOpenForked) (virDirCreate): Likewise. * src/security/security_dac.c (virSecurityDACSetProcessLabel): Likewise. * src/lxc/lxc_container.c (lxcContainerSetID): Likewise. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for setgroups, not initgroups. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-22 02:59:10 +00:00
return virSetUIDGID(uid, gid, groups, ngroups);
util: virSetUIDGIDWithCaps - change uid while keeping caps Normally when a process' uid is changed to non-0, all the capabilities bits are cleared, even those explicitly set with calls to capng_update()/capng_apply() made immediately before setuid. And *after* the process' uid has been changed, it no longer has the necessary privileges to add capabilities back to the process. In order to set a non-0 uid while still maintaining any capabilities bits, it is necessary to either call capng_change_id() (which unfortunately doesn't currently call initgroups to setup auxiliary group membership), or to perform the small amount of calisthenics contained in the new utility function virSetUIDGIDWithCaps(). Another very important difference between the capabilities setting/clearing in virSetUIDGIDWithCaps() and virCommand's virSetCapabilities() (which it will replace in the next patch) is that the new function properly clears the capabilities bounding set, so it will not be possible for a child process to set any new capabilities. A short description of what is done by virSetUIDGIDWithCaps(): 1) clear all capabilities then set all those desired by the caller (in capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed to change the capabilities bounding set). 2) call prctl(), telling it that we want to maintain current capabilities across an upcoming setuid(). 3) switch to the new uid/gid 4) again call prctl(), telling it we will no longer want capabilities maintained if this process does another setuid(). 5) clear the capabilities that we added to allow us to setuid/setgid/change the bounding set (unless they were also requested by the caller via the virCommand API). Because the modification/maintaining of capabilities is intermingled with setting the uid, this is necessarily done in a single function, rather than having two independent functions. Note that, due to the way that effective capabilities are computed (at time of execve) for a process that has uid != 0, the *file* capabilities of the binary being executed must also have the desired capabilities bit(s) set (see "man 7 capabilities"). This can be done with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm sys_rawio").
2013-02-05 15:30:32 +00:00
}
#endif
void virWaitForDevices(void)
{
g_autoptr(virCommand) cmd = NULL;
g_autofree char *udev = NULL;
int exitstatus;
if (!(udev = virFindFileInPath(UDEVADM)))
return;
if (!(cmd = virCommandNewArgList(udev, "settle", NULL)))
return;
/*
* NOTE: we ignore errors here; this is just to make sure that any device
* nodes that are being created finish before we try to scan them.
*/
ignore_value(virCommandRun(cmd, &exitstatus));
}
bool
virValidateWWN(const char *wwn)
{
size_t i;
const char *p = wwn;
if (STRPREFIX(wwn, "0x"))
p += 2;
for (i = 0; p[i]; i++) {
if (!g_ascii_isxdigit(p[i]))
break;
}
if (i != 16 || p[i]) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Malformed wwn: %s"), wwn);
return false;
}
return true;
}
#if defined(major) && defined(minor)
int
virGetDeviceID(const char *path, int *maj, int *min)
{
struct stat sb;
if (stat(path, &sb) < 0)
return -errno;
if (!S_ISBLK(sb.st_mode))
return -EINVAL;
if (maj)
*maj = major(sb.st_rdev);
if (min)
*min = minor(sb.st_rdev);
return 0;
}
#else
int
virGetDeviceID(const char *path G_GNUC_UNUSED,
int *maj,
int *min)
{
*maj = *min = 0;
return -ENOSYS;
}
#endif
#define SYSFS_DEV_BLOCK_PATH "/sys/dev/block"
char *
virGetUnprivSGIOSysfsPath(const char *path,
const char *sysfs_dir)
{
int maj, min;
int rc;
if ((rc = virGetDeviceID(path, &maj, &min)) < 0) {
virReportSystemError(-rc,
_("Unable to get device ID '%s'"),
path);
return NULL;
}
return g_strdup_printf("%s/%d:%d/queue/unpriv_sgio",
sysfs_dir ? sysfs_dir : SYSFS_DEV_BLOCK_PATH, maj,
min);
}
int
virSetDeviceUnprivSGIO(const char *path,
const char *sysfs_dir,
int unpriv_sgio)
{
char *sysfs_path = NULL;
char *val = NULL;
int ret = -1;
int rc;
if (!(sysfs_path = virGetUnprivSGIOSysfsPath(path, sysfs_dir)))
return -1;
if (!virFileExists(sysfs_path)) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("unpriv_sgio is not supported by this kernel"));
goto cleanup;
}
val = g_strdup_printf("%d", unpriv_sgio);
if ((rc = virFileWriteStr(sysfs_path, val, 0)) < 0) {
virReportSystemError(-rc, _("failed to set %s"), sysfs_path);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FREE(sysfs_path);
VIR_FREE(val);
return ret;
}
int
virGetDeviceUnprivSGIO(const char *path,
const char *sysfs_dir,
int *unpriv_sgio)
{
char *sysfs_path = NULL;
char *buf = NULL;
char *tmp = NULL;
int ret = -1;
if (!(sysfs_path = virGetUnprivSGIOSysfsPath(path, sysfs_dir)))
return -1;
if (!virFileExists(sysfs_path)) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("unpriv_sgio is not supported by this kernel"));
goto cleanup;
}
if (virFileReadAll(sysfs_path, 1024, &buf) < 0)
goto cleanup;
if ((tmp = strchr(buf, '\n')))
*tmp = '\0';
if (virStrToLong_i(buf, NULL, 10, unpriv_sgio) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("failed to parse value of %s"), sysfs_path);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FREE(sysfs_path);
VIR_FREE(buf);
return ret;
}
/**
* virParseOwnershipIds:
*
* Parse the usual "uid:gid" ownership specification into uid_t and
* gid_t passed as parameters. NULL value for those parameters mean
* the information is not needed. Also, none of those values are
* changed in case of any error.
*
* Returns -1 on error, 0 otherwise.
*/
int
virParseOwnershipIds(const char *label, uid_t *uidPtr, gid_t *gidPtr)
{
int rc = -1;
uid_t theuid;
gid_t thegid;
char *tmp_label = NULL;
char *sep = NULL;
char *owner = NULL;
char *group = NULL;
tmp_label = g_strdup(label);
/* Split label */
sep = strchr(tmp_label, ':');
if (sep == NULL) {
virReportError(VIR_ERR_INVALID_ARG,
_("Failed to parse uid and gid from '%s'"),
label);
goto cleanup;
}
*sep = '\0';
owner = tmp_label;
group = sep + 1;
/* Parse owner and group, error message is defined by
* virGetUserID or virGetGroupID.
*/
if (virGetUserID(owner, &theuid) < 0 ||
virGetGroupID(group, &thegid) < 0)
goto cleanup;
if (uidPtr)
*uidPtr = theuid;
if (gidPtr)
*gidPtr = thegid;
rc = 0;
cleanup:
VIR_FREE(tmp_label);
return rc;
}
static time_t selfLastChanged;
time_t virGetSelfLastChanged(void)
{
return selfLastChanged;
}
void virUpdateSelfLastChanged(const char *path)
{
struct stat sb;
if (stat(path, &sb) < 0)
return;
if (sb.st_ctime > selfLastChanged) {
VIR_DEBUG("Setting self last changed to %lld for '%s'",
(long long)sb.st_ctime, path);
selfLastChanged = sb.st_ctime;
}
}
#ifndef WIN32
long virGetSystemPageSize(void)
{
return sysconf(_SC_PAGESIZE);
}
#else /* WIN32 */
long virGetSystemPageSize(void)
{
errno = ENOSYS;
return -1;
}
#endif /* WIN32 */
long virGetSystemPageSizeKB(void)
{
long val = virGetSystemPageSize();
if (val < 0)
return val;
return val / 1024;
}
/**
* virMemoryLimitTruncate
*
* Return truncated memory limit to VIR_DOMAIN_MEMORY_PARAM_UNLIMITED as maximum
* which means that the limit is not set => unlimited.
*/
unsigned long long
virMemoryLimitTruncate(unsigned long long value)
{
return value < VIR_DOMAIN_MEMORY_PARAM_UNLIMITED ? value :
VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
}
/**
* virMemoryLimitIsSet
*
* Returns true if the limit is set and false for unlimited value.
*/
bool
virMemoryLimitIsSet(unsigned long long value)
{
return value < VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
}
/**
* virMemoryMaxValue
*
* @capped: whether the value must fit into unsigned long
* (long long is assumed otherwise)
*
* Note: This function is mocked in tests/qemuxml2argvmock.c for test stability
*
* Returns the maximum possible memory value in bytes.
*/
unsigned long long
virMemoryMaxValue(bool capped)
{
/* On 32-bit machines, our bound is 0xffffffff * KiB. On 64-bit
* machines, our bound is off_t (2^63). */
if (capped && sizeof(unsigned long) < sizeof(long long))
return 1024ull * ULONG_MAX;
else
return LLONG_MAX;
}
bool
virHostHasIOMMU(void)
{
DIR *iommuDir = NULL;
struct dirent *iommuGroup = NULL;
bool ret = false;
int direrr;
if (virDirOpenQuiet(&iommuDir, "/sys/kernel/iommu_groups/") < 0)
goto cleanup;
while ((direrr = virDirRead(iommuDir, &iommuGroup, NULL)) > 0)
break;
if (direrr < 0 || !iommuGroup)
goto cleanup;
ret = true;
cleanup:
VIR_DIR_CLOSE(iommuDir);
return ret;
}
/**
* virHostGetDRMRenderNode:
*
* Picks the first DRM render node available. Missing DRI or missing DRM render
* nodes in the system results in an error.
*
* Returns an absolute path to the first render node available or NULL in case
* of an error with the error being reported.
* Caller is responsible for freeing the result string.
*
*/
char *
virHostGetDRMRenderNode(void)
{
char *ret = NULL;
DIR *driDir = NULL;
const char *driPath = "/dev/dri";
struct dirent *ent = NULL;
int dirErr = 0;
bool have_rendernode = false;
if (virDirOpen(&driDir, driPath) < 0)
return NULL;
while ((dirErr = virDirRead(driDir, &ent, driPath)) > 0) {
if (STRPREFIX(ent->d_name, "renderD")) {
have_rendernode = true;
break;
}
}
if (dirErr < 0)
goto cleanup;
/* even if /dev/dri exists, there might be no renderDX nodes available */
if (!have_rendernode) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("No DRM render nodes available"));
goto cleanup;
}
ret = g_strdup_printf("%s/%s", driPath, ent->d_name);
cleanup:
VIR_DIR_CLOSE(driDir);
return ret;
}
static const char *virKernelCmdlineSkipQuote(const char *cmdline,
bool *is_quoted)
{
if (cmdline[0] == '"') {
*is_quoted = !(*is_quoted);
cmdline++;
}
return cmdline;
}
/**
* virKernelCmdlineFindEqual:
* @cmdline: target kernel command line string
* @is_quoted: indicates whether the string begins with quotes
* @res: pointer to the position immediately after the parsed parameter,
* can be used in subsequent calls to process further parameters until
* the end of the string.
*
* Iterate over the provided kernel command line string while honoring
* the kernel quoting rules and returns the index of the equal sign
* separating argument and value.
*
* Returns 0 for the cases where no equal sign is found or the argument
* itself begins with the equal sign (both cases indicating that the
* argument has no value). Otherwise, returns the index of the equal
* sign in the string.
*/
static size_t virKernelCmdlineFindEqual(const char *cmdline,
bool is_quoted,
const char **res)
{
size_t i;
size_t equal_index = 0;
for (i = 0; cmdline[i]; i++) {
if (!(is_quoted) && g_ascii_isspace(cmdline[i]))
break;
if (equal_index == 0 && cmdline[i] == '=') {
equal_index = i;
continue;
}
virKernelCmdlineSkipQuote(cmdline + i, &is_quoted);
}
*res = cmdline + i;
return equal_index;
}
static char* virKernelArgNormalize(const char *arg)
{
return virStringReplace(arg, "_", "-");
}
/**
* virKernelCmdlineNextParam:
* @cmdline: kernel command line string to be checked for next parameter
* @param: pointer to hold retrieved parameter, will be NULL if none found
* @val: pointer to hold retrieved value of @param
*
* Parse the kernel cmdline and store the next parameter in @param
* and the value of @param in @val which can be NULL if @param has
* no value. In addition returns the address right after @param=@value
* for possible further processing.
*
* Returns a pointer to address right after @param=@val in the
* kernel command line, will point to the string's end (NULL)
* in case no next parameter is found
*/
const char *virKernelCmdlineNextParam(const char *cmdline,
char **param,
char **val)
{
const char *next;
int equal_index;
bool is_quoted = false;
*param = NULL;
*val = NULL;
virSkipSpaces(&cmdline);
cmdline = virKernelCmdlineSkipQuote(cmdline, &is_quoted);
equal_index = virKernelCmdlineFindEqual(cmdline, is_quoted, &next);
if (next == cmdline)
return next;
/* param has no value */
if (equal_index == 0) {
if (is_quoted && next[-1] == '"')
*param = g_strndup(cmdline, next - cmdline - 1);
else
*param = g_strndup(cmdline, next - cmdline);
return next;
}
*param = g_strndup(cmdline, equal_index);
if (cmdline[equal_index + 1] == '"') {
is_quoted = true;
equal_index++;
}
if (is_quoted && next[-1] == '"')
*val = g_strndup(cmdline + equal_index + 1,
next - cmdline - equal_index - 2);
else
*val = g_strndup(cmdline + equal_index + 1,
next - cmdline - equal_index - 1);
return next;
}
static bool virKernelCmdlineStrCmp(const char *kernel_val,
const char *caller_val,
virKernelCmdlineFlags flags)
{
if (flags & VIR_KERNEL_CMDLINE_FLAGS_CMP_PREFIX)
return STRPREFIX(kernel_val, caller_val);
return STREQ(kernel_val, caller_val);
}
/**
* virKernelCmdlineMatchParam:
* @cmdline: kernel command line string to be checked for @arg
* @arg: kernel command line argument
* @values: array of possible values to match @arg
* @len_values: size of array, it can be 0 meaning a match will be positive if
* the argument has no value.
* @flags: bitwise-OR of virKernelCmdlineFlags
*
* Try to match the provided kernel cmdline string with the provided @arg
* and the list @values of possible values according to the matching strategy
* defined in @flags.
*
*
* Returns true if a match is found, false otherwise
*/
bool virKernelCmdlineMatchParam(const char *cmdline,
const char *arg,
const char **values,
size_t len_values,
virKernelCmdlineFlags flags)
{
bool match = false;
size_t i;
const char *next = cmdline;
g_autofree char *arg_norm = virKernelArgNormalize(arg);
while (next[0] != '\0') {
g_autofree char *kparam = NULL;
g_autofree char *kparam_norm = NULL;
g_autofree char *kval = NULL;
next = virKernelCmdlineNextParam(next, &kparam, &kval);
if (!kparam)
break;
kparam_norm = virKernelArgNormalize(kparam);
if (STRNEQ(kparam_norm, arg_norm))
continue;
if (!kval) {
match = (len_values == 0) ? true : false;
} else {
match = false;
for (i = 0; i < len_values; i++) {
if (virKernelCmdlineStrCmp(kval, values[i], flags)) {
match = true;
break;
}
}
}
if (match && (flags & VIR_KERNEL_CMDLINE_FLAGS_SEARCH_FIRST))
break;
}
return match;
}
/*
* Get a password from the console input stream.
* The caller must free the returned password.
*
* Returns: the password, or NULL
*/
char *virGetPassword(void)
{
#ifdef WIN32
GString *pw = g_string_new("");
while (1) {
char c = _getch();
if (c == '\r')
break;
g_string_append_c(pw, c);
}
return g_string_free(pw, FALSE);
#else /* !WIN32 */
return g_strdup(getpass(""));
#endif /* ! WIN32 */
}
static int
virPipeImpl(int fds[2], bool nonblock, bool errreport)
{
#ifdef HAVE_PIPE2
int flags = O_CLOEXEC;
if (nonblock)
flags |= O_NONBLOCK;
int rv = pipe2(fds, flags);
#else /* !HAVE_PIPE2 */
# ifdef WIN32
int rv = _pipe(fds, 4096, _O_BINARY);
# else /* !WIN32 */
int rv = pipe(fds);
# endif /* !WIN32 */
#endif /* !HAVE_PIPE2 */
if (rv < 0) {
if (errreport)
virReportSystemError(errno, "%s",
_("Unable to create pipes"));
return rv;
}
#ifndef HAVE_PIPE2
if (nonblock) {
if (virSetNonBlock(fds[0]) < 0 ||
virSetNonBlock(fds[1]) < 0) {
if (errreport)
virReportSystemError(errno, "%s",
_("Unable to set pipes to non-blocking"));
virReportSystemError(errno, "%s",
_("Unable to create pipes"));
VIR_FORCE_CLOSE(fds[0]);
VIR_FORCE_CLOSE(fds[1]);
return -1;
}
}
#endif /* !HAVE_PIPE2 */
return 0;
}
int
virPipe(int fds[2])
{
return virPipeImpl(fds, false, true);
}
int
virPipeQuiet(int fds[2])
{
return virPipeImpl(fds, false, false);
}
int
virPipeNonBlock(int fds[2])
{
return virPipeImpl(fds, true, true);
}