2008-04-10 07:30:52 +00:00
|
|
|
/*
|
2014-09-01 12:08:08 +00:00
|
|
|
* Copyright (C) 2008-2014 Red Hat, Inc.
|
2010-03-12 17:47:26 +00:00
|
|
|
* Copyright (C) 2008 IBM Corp.
|
2015-01-14 15:15:57 +00:00
|
|
|
* Copyright (c) 2015 SUSE LINUX Products GmbH, Nuernberg, Germany.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* lxc_container.c: file description
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
|
2008-08-28 22:40:50 +00:00
|
|
|
* Daniel P. Berrange <berrange@redhat.com>
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
2012-09-20 22:30:55 +00:00
|
|
|
* License along with this library. If not, see
|
2012-07-21 10:06:23 +00:00
|
|
|
* <http://www.gnu.org/licenses/>.
|
2008-04-10 07:30:52 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
2015-08-20 13:46:17 +00:00
|
|
|
#include <sched.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <limits.h>
|
|
|
|
#include <stdlib.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <stdio.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/mount.h>
|
2008-08-13 10:25:34 +00:00
|
|
|
#include <sys/wait.h>
|
2010-01-22 13:21:16 +00:00
|
|
|
#include <sys/stat.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <unistd.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <mntent.h>
|
2012-07-20 21:16:19 +00:00
|
|
|
#include <sys/reboot.h>
|
|
|
|
#include <linux/reboot.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
/* Yes, we want linux private one, for _syscall2() macro */
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
|
|
|
|
/* For MS_MOVE */
|
|
|
|
#include <linux/fs.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2012-09-20 14:17:56 +00:00
|
|
|
#if WITH_CAPNG
|
2010-03-09 18:22:22 +00:00
|
|
|
# include <cap-ng.h>
|
2009-06-29 17:09:42 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2012-09-20 14:43:12 +00:00
|
|
|
#if WITH_BLKID
|
2011-11-01 14:59:51 +00:00
|
|
|
# include <blkid/blkid.h>
|
|
|
|
#endif
|
|
|
|
|
2013-05-17 09:59:25 +00:00
|
|
|
#if WITH_SELINUX
|
|
|
|
# include <selinux/selinux.h>
|
|
|
|
#endif
|
|
|
|
|
2012-12-13 18:21:53 +00:00
|
|
|
#include "virerror.h"
|
2012-12-12 17:59:27 +00:00
|
|
|
#include "virlog.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
#include "lxc_container.h"
|
2012-12-12 18:06:53 +00:00
|
|
|
#include "viralloc.h"
|
2011-11-02 16:03:09 +00:00
|
|
|
#include "virnetdevveth.h"
|
2012-12-13 18:01:25 +00:00
|
|
|
#include "viruuid.h"
|
2011-07-19 18:32:58 +00:00
|
|
|
#include "virfile.h"
|
2012-12-12 17:04:51 +00:00
|
|
|
#include "virusb.h"
|
2012-12-12 16:27:01 +00:00
|
|
|
#include "vircommand.h"
|
2011-11-02 15:53:39 +00:00
|
|
|
#include "virnetdev.h"
|
2012-09-24 17:10:37 +00:00
|
|
|
#include "virprocess.h"
|
2013-04-03 10:36:23 +00:00
|
|
|
#include "virstring.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-01-20 17:13:33 +00:00
|
|
|
#define VIR_FROM_THIS VIR_FROM_LXC
|
|
|
|
|
2014-02-28 12:16:17 +00:00
|
|
|
VIR_LOG_INIT("lxc.lxc_container");
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/*
|
|
|
|
* GLibc headers are behind the kernel, so we define these
|
|
|
|
* constants if they're not present already.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CLONE_NEWPID
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWPID 0x20000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUTS
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUTS 0x04000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUSER
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUSER 0x10000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWIPC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWIPC 0x08000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWNET
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWNET 0x40000000 /* New network namespace */
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-26 14:52:15 +00:00
|
|
|
/*
|
|
|
|
* Workaround older glibc. While kernel may support the setns
|
|
|
|
* syscall, the glibc wrapper might not exist. If that's the
|
|
|
|
* case, use our own.
|
|
|
|
*/
|
|
|
|
#ifndef __NR_setns
|
|
|
|
# if defined(__x86_64__)
|
|
|
|
# define __NR_setns 308
|
|
|
|
# elif defined(__i386__)
|
|
|
|
# define __NR_setns 346
|
|
|
|
# elif defined(__arm__)
|
|
|
|
# define __NR_setns 375
|
|
|
|
# elif defined(__aarch64__)
|
|
|
|
# define __NR_setns 375
|
|
|
|
# elif defined(__powerpc__)
|
|
|
|
# define __NR_setns 350
|
|
|
|
# elif defined(__s390__)
|
|
|
|
# define __NR_setns 339
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef HAVE_SETNS
|
|
|
|
# if defined(__NR_setns)
|
|
|
|
# include <sys/syscall.h>
|
|
|
|
|
|
|
|
static inline int setns(int fd, int nstype)
|
|
|
|
{
|
|
|
|
return syscall(__NR_setns, fd, nstype);
|
|
|
|
}
|
|
|
|
# else /* !__NR_setns */
|
|
|
|
# error Please determine the syscall number for setns on your architecture
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/* messages between parent and container */
|
|
|
|
typedef char lxc_message_t;
|
|
|
|
#define LXC_CONTINUE_MSG 'c'
|
|
|
|
|
|
|
|
typedef struct __lxc_child_argv lxc_child_argv_t;
|
|
|
|
struct __lxc_child_argv {
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr config;
|
2012-01-25 14:12:53 +00:00
|
|
|
virSecurityManagerPtr securityDriver;
|
2012-07-03 11:06:38 +00:00
|
|
|
size_t nveths;
|
2008-08-13 10:52:15 +00:00
|
|
|
char **veths;
|
2008-08-13 10:25:34 +00:00
|
|
|
int monitor;
|
2013-07-09 17:15:45 +00:00
|
|
|
size_t npassFDs;
|
|
|
|
int *passFDs;
|
2011-10-20 08:44:31 +00:00
|
|
|
size_t nttyPaths;
|
2013-07-09 17:15:45 +00:00
|
|
|
char **ttyPaths;
|
2011-06-02 15:52:32 +00:00
|
|
|
int handshakefd;
|
2015-08-20 13:46:17 +00:00
|
|
|
int *nsInheritFDs;
|
2008-08-13 10:25:34 +00:00
|
|
|
};
|
|
|
|
|
2013-03-22 14:09:41 +00:00
|
|
|
static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
|
2013-11-29 12:19:37 +00:00
|
|
|
const char *srcprefix,
|
|
|
|
const char *sec_mount_options);
|
2013-03-22 14:09:41 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2012-07-20 21:16:19 +00:00
|
|
|
/*
|
|
|
|
* reboot(LINUX_REBOOT_CMD_CAD_ON) will return -EINVAL
|
|
|
|
* in a child pid namespace if container reboot support exists.
|
|
|
|
* Otherwise, it will either succeed or return -EPERM.
|
|
|
|
*/
|
|
|
|
ATTRIBUTE_NORETURN static int
|
|
|
|
lxcContainerRebootChild(void *argv)
|
|
|
|
{
|
|
|
|
int *cmd = argv;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = reboot(*cmd);
|
|
|
|
if (ret == -1 && errno == EINVAL)
|
|
|
|
_exit(1);
|
|
|
|
_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static
|
|
|
|
int lxcContainerHasReboot(void)
|
|
|
|
{
|
|
|
|
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
|
|
|
|
CLONE_NEWIPC|SIGCHLD;
|
|
|
|
int cpid;
|
|
|
|
char *childStack;
|
|
|
|
char *stack;
|
|
|
|
char *buf;
|
|
|
|
int cmd, v;
|
|
|
|
int status;
|
|
|
|
char *tmp;
|
2013-11-25 07:06:29 +00:00
|
|
|
int stacksize = getpagesize() * 4;
|
2012-07-20 21:16:19 +00:00
|
|
|
|
|
|
|
if (virFileReadAll("/proc/sys/kernel/ctrl-alt-del", 10, &buf) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if ((tmp = strchr(buf, '\n')))
|
|
|
|
*tmp = '\0';
|
|
|
|
|
|
|
|
if (virStrToLong_i(buf, NULL, 10, &v) < 0) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Malformed ctrl-alt-del setting '%s'"), buf);
|
|
|
|
VIR_FREE(buf);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FREE(buf);
|
|
|
|
cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF;
|
|
|
|
|
2013-11-25 07:06:29 +00:00
|
|
|
if (VIR_ALLOC_N(stack, stacksize) < 0)
|
2012-07-20 21:16:19 +00:00
|
|
|
return -1;
|
|
|
|
|
2013-11-25 07:06:29 +00:00
|
|
|
childStack = stack + stacksize;
|
2012-07-20 21:16:19 +00:00
|
|
|
|
|
|
|
cpid = clone(lxcContainerRebootChild, childStack, flags, &cmd);
|
|
|
|
VIR_FREE(stack);
|
|
|
|
if (cpid < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Unable to clone to check reboot support"));
|
|
|
|
return -1;
|
2014-02-20 03:23:44 +00:00
|
|
|
} else if (virProcessWait(cpid, &status, false) < 0) {
|
2012-07-20 21:16:19 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-02-20 03:23:44 +00:00
|
|
|
if (status != 1) {
|
2012-07-20 21:16:19 +00:00
|
|
|
VIR_DEBUG("Containerized reboot support is missing "
|
|
|
|
"(kernel probably too old < 3.4)");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Containerized reboot support is available");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2011-05-06 14:50:00 +00:00
|
|
|
* lxcContainerBuildInitCmd:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @vmDef: pointer to vm definition structure
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2011-05-06 14:50:00 +00:00
|
|
|
* Build a virCommandPtr for launching the container 'init' process
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2011-05-06 14:50:00 +00:00
|
|
|
* Returns a virCommandPtr
|
2008-04-10 07:30:52 +00:00
|
|
|
*/
|
2013-12-13 16:50:28 +00:00
|
|
|
static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
|
|
|
|
char **ttyPaths,
|
|
|
|
size_t nttyPaths)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2011-02-22 13:09:19 +00:00
|
|
|
char uuidstr[VIR_UUID_STRING_BUFLEN];
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandPtr cmd;
|
2013-12-13 16:50:28 +00:00
|
|
|
virBuffer buf = VIR_BUFFER_INITIALIZER;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
/* 'container_ptys' must exclude the PTY associated with
|
|
|
|
* the /dev/console device, hence start at 1 not 0
|
|
|
|
*/
|
|
|
|
for (i = 1; i < nttyPaths; i++) {
|
|
|
|
if (!STRPREFIX(ttyPaths[i], "/dev/")) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Expected a /dev path for '%s'"),
|
|
|
|
ttyPaths[i]);
|
|
|
|
virBufferFreeAndReset(&buf);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
virBufferAdd(&buf, ttyPaths[i] + 5, -1);
|
|
|
|
virBufferAddChar(&buf, ' ');
|
|
|
|
}
|
|
|
|
virBufferTrim(&buf, NULL, 1);
|
|
|
|
|
2014-06-27 08:47:11 +00:00
|
|
|
if (virBufferCheckError(&buf) < 0)
|
2013-12-13 16:50:28 +00:00
|
|
|
return NULL;
|
2011-02-22 13:09:19 +00:00
|
|
|
|
|
|
|
virUUIDFormat(vmDef->uuid, uuidstr);
|
|
|
|
|
2011-05-05 21:38:09 +00:00
|
|
|
cmd = virCommandNew(vmDef->os.init);
|
|
|
|
|
2012-03-26 17:09:31 +00:00
|
|
|
if (vmDef->os.initargv && vmDef->os.initargv[0])
|
|
|
|
virCommandAddArgSet(cmd, (const char **)vmDef->os.initargv);
|
|
|
|
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandAddEnvString(cmd, "PATH=/bin:/sbin");
|
|
|
|
virCommandAddEnvString(cmd, "TERM=linux");
|
2012-01-24 18:51:01 +00:00
|
|
|
virCommandAddEnvString(cmd, "container=lxc-libvirt");
|
2014-07-25 06:39:55 +00:00
|
|
|
virCommandAddEnvString(cmd, "HOME=/");
|
2012-03-14 12:52:58 +00:00
|
|
|
virCommandAddEnvPair(cmd, "container_uuid", uuidstr);
|
2013-12-13 16:50:28 +00:00
|
|
|
if (nttyPaths > 1)
|
|
|
|
virCommandAddEnvPair(cmd, "container_ttys", virBufferCurrentContent(&buf));
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_UUID", uuidstr);
|
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_NAME", vmDef->name);
|
2011-10-03 17:37:47 +00:00
|
|
|
if (vmDef->os.cmdline)
|
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_CMDLINE", vmDef->os.cmdline);
|
2011-05-05 21:38:09 +00:00
|
|
|
|
2013-12-13 16:50:28 +00:00
|
|
|
virBufferFreeAndReset(&buf);
|
2011-05-06 14:50:00 +00:00
|
|
|
return cmd;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2013-07-09 17:15:45 +00:00
|
|
|
* lxcContainerSetupFDs:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD from parent
|
|
|
|
* @ttyfd: FD of tty to set as the container console
|
2013-07-09 17:15:45 +00:00
|
|
|
* @npassFDs: number of extra FDs
|
|
|
|
* @passFDs: list of extra FDs
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2013-07-09 17:15:45 +00:00
|
|
|
* Setup file descriptors in the container. @ttyfd is set to be
|
|
|
|
* the container's stdin, stdout & stderr. Any FDs included in
|
|
|
|
* @passFDs, will be dup()'d such that they start from stderr+1
|
|
|
|
* with no gaps.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2013-07-09 17:15:45 +00:00
|
|
|
static int lxcContainerSetupFDs(int *ttyfd,
|
|
|
|
size_t npassFDs, int *passFDs)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
Convert 'int i' to 'size_t i' in src/lxc/ files
Convert the type of loop iterators named 'i', 'j', k',
'ii', 'jj', 'kk', to be 'size_t' instead of 'int' or
'unsigned int', also santizing 'ii', 'jj', 'kk' to use
the normal 'i', 'j', 'k' naming
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2013-07-08 14:09:33 +00:00
|
|
|
int open_max;
|
|
|
|
int fd;
|
2013-07-09 17:15:45 +00:00
|
|
|
int last_fd;
|
|
|
|
size_t i;
|
|
|
|
size_t j;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2015-01-16 17:20:48 +00:00
|
|
|
VIR_DEBUG("Logging from the container init will now cease "
|
|
|
|
"as the FDs are about to be closed for exec of "
|
|
|
|
"the container init process");
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
if (setsid() < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("setsid failed"));
|
2008-08-28 22:40:50 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
if (ioctl(*ttyfd, TIOCSCTTY, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2014-10-30 16:13:00 +00:00
|
|
|
_("ioctl(TIOCSCTTY) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
if (dup2(*ttyfd, STDIN_FILENO) < 0) {
|
2013-07-11 11:22:20 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2013-07-09 17:15:45 +00:00
|
|
|
_("dup2(stdin) failed"));
|
2013-07-11 11:22:20 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
if (dup2(*ttyfd, STDOUT_FILENO) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2013-07-09 17:15:45 +00:00
|
|
|
_("dup2(stdout) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
if (dup2(*ttyfd, STDERR_FILENO) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2013-07-09 17:15:45 +00:00
|
|
|
_("dup2(stderr) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
VIR_FORCE_CLOSE(*ttyfd);
|
|
|
|
|
|
|
|
/* Any FDs in @passFDs need to be moved around so that
|
|
|
|
* they are numbered, without gaps, starting from
|
|
|
|
* STDERR_FILENO + 1
|
|
|
|
*/
|
|
|
|
for (i = 0; i < npassFDs; i++) {
|
|
|
|
int wantfd;
|
|
|
|
|
|
|
|
wantfd = STDERR_FILENO + i + 1;
|
|
|
|
VIR_DEBUG("Pass %d onto %d", passFDs[i], wantfd);
|
|
|
|
|
|
|
|
/* If we already have desired FD number, life
|
|
|
|
* is easy. Nothing needs renumbering */
|
|
|
|
if (passFDs[i] == wantfd)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lets check to see if any later FDs are occupying
|
|
|
|
* our desired FD number. If so, we must move them
|
|
|
|
* out of the way
|
|
|
|
*/
|
|
|
|
for (j = i + 1; j < npassFDs; j++) {
|
|
|
|
if (passFDs[j] == wantfd) {
|
|
|
|
VIR_DEBUG("Clash %zu", j);
|
|
|
|
int newfd = dup(passFDs[j]);
|
|
|
|
if (newfd < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Cannot move fd %d out of the way"),
|
|
|
|
passFDs[j]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* We're intentionally not closing the
|
|
|
|
* old value of passFDs[j], because we
|
|
|
|
* don't want later iterations of the
|
|
|
|
* loop to take it back. dup2() will
|
|
|
|
* cause it to be closed shortly anyway
|
|
|
|
*/
|
|
|
|
VIR_DEBUG("Moved clash onto %d", newfd);
|
|
|
|
passFDs[j] = newfd;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Finally we can move into our desired FD number */
|
|
|
|
if (dup2(passFDs[i], wantfd) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Cannot duplicate fd %d onto fd %d"),
|
|
|
|
passFDs[i], wantfd);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
VIR_FORCE_CLOSE(passFDs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
last_fd = STDERR_FILENO + npassFDs;
|
|
|
|
|
|
|
|
/* Just in case someone forget to set FD_CLOEXEC, explicitly
|
|
|
|
* close all remaining FDs before executing the container */
|
|
|
|
open_max = sysconf(_SC_OPEN_MAX);
|
|
|
|
if (open_max < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2013-07-09 17:15:45 +00:00
|
|
|
_("sysconf(_SC_OPEN_MAX) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:15:45 +00:00
|
|
|
for (fd = last_fd + 1; fd < open_max; fd++) {
|
|
|
|
int tmpfd = fd;
|
|
|
|
VIR_MASS_CLOSE(tmpfd);
|
|
|
|
}
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
rc = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-07-22 11:11:12 +00:00
|
|
|
VIR_DEBUG("rc=%d", rc);
|
2008-04-10 07:30:52 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerSendContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD to child
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2008-08-13 10:25:34 +00:00
|
|
|
* Sends the continue message via the socket pair stored in the vm
|
|
|
|
* structure.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:52:15 +00:00
|
|
|
int lxcContainerSendContinue(int control)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
2008-08-13 10:25:34 +00:00
|
|
|
lxc_message_t msg = LXC_CONTINUE_MSG;
|
|
|
|
int writeCount = 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2013-07-01 16:42:25 +00:00
|
|
|
VIR_DEBUG("Send continue on fd %d", control);
|
2008-08-13 10:25:34 +00:00
|
|
|
writeCount = safewrite(control, &msg, sizeof(msg));
|
2014-11-13 14:27:11 +00:00
|
|
|
if (writeCount != sizeof(msg))
|
2008-08-13 10:25:34 +00:00
|
|
|
goto error_out;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
rc = 0;
|
2014-03-25 06:49:26 +00:00
|
|
|
error_out:
|
2008-08-13 10:25:34 +00:00
|
|
|
return rc;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerWaitForContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: Control FD from parent
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* This function will wait for the container continue message from the
|
|
|
|
* parent process. It will send this message on the socket pair stored in
|
|
|
|
* the vm structure once it has completed the post clone container setup.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2011-06-01 22:17:00 +00:00
|
|
|
int lxcContainerWaitForContinue(int control)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
lxc_message_t msg;
|
|
|
|
int readLen;
|
|
|
|
|
2013-07-01 16:42:25 +00:00
|
|
|
VIR_DEBUG("Wait continue on fd %d", control);
|
2008-08-13 10:25:34 +00:00
|
|
|
readLen = saferead(control, &msg, sizeof(msg));
|
2013-07-01 16:42:25 +00:00
|
|
|
VIR_DEBUG("Got continue on fd %d %d", control, readLen);
|
2011-11-01 12:28:26 +00:00
|
|
|
if (readLen != sizeof(msg)) {
|
|
|
|
if (readLen >= 0)
|
|
|
|
errno = EIO;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (msg != LXC_CONTINUE_MSG) {
|
|
|
|
errno = EINVAL;
|
2008-08-13 10:14:47 +00:00
|
|
|
return -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
2008-08-13 10:14:47 +00:00
|
|
|
return 0;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2013-06-07 07:12:21 +00:00
|
|
|
/**
|
|
|
|
* lxcContainerSetID:
|
|
|
|
*
|
|
|
|
* This function calls setuid and setgid to create proper
|
|
|
|
* cred for tasks running in container.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
|
|
|
static int lxcContainerSetID(virDomainDefPtr def)
|
|
|
|
{
|
|
|
|
/* Only call virSetUIDGID when user namespace is enabled
|
|
|
|
* for this container. And user namespace is only enabled
|
|
|
|
* when nuidmap&ngidmap is not zero */
|
|
|
|
|
2013-10-28 11:18:26 +00:00
|
|
|
if (!def->idmap.nuidmap)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
VIR_DEBUG("Setting UID/GID to 0/0");
|
|
|
|
if (virSetUIDGID(0, 0, NULL, 0) < 0) {
|
2013-06-07 07:12:21 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("setuid or setgid failed"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-27 08:41:22 +00:00
|
|
|
static virDomainNetDefPtr
|
|
|
|
lxcContainerGetNetDef(virDomainDefPtr vmDef, const char *devName)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
virDomainNetDefPtr netDef;
|
|
|
|
|
|
|
|
for (i = 0; i < vmDef->nnets; i++) {
|
|
|
|
netDef = vmDef->nets[i];
|
2015-01-11 12:51:29 +00:00
|
|
|
if (STREQ_NULLABLE(netDef->ifname_guest_actual, devName))
|
2014-06-27 08:41:22 +00:00
|
|
|
return netDef;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2009-11-05 13:11:30 +00:00
|
|
|
* lxcContainerRenameAndEnableInterfaces:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
2009-11-05 13:11:30 +00:00
|
|
|
* This function will rename the interfaces to ethN
|
|
|
|
* with id ascending order from zero and enable the
|
|
|
|
* renamed interfaces for this container.
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or nonzero in case of error
|
|
|
|
*/
|
2014-06-27 08:41:22 +00:00
|
|
|
static int lxcContainerRenameAndEnableInterfaces(virDomainDefPtr vmDef,
|
2012-07-03 11:06:38 +00:00
|
|
|
size_t nveths,
|
2009-11-05 13:11:30 +00:00
|
|
|
char **veths)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
2014-07-22 11:35:48 +00:00
|
|
|
size_t i, j;
|
2009-11-05 13:11:30 +00:00
|
|
|
char *newname = NULL;
|
2014-07-24 13:20:27 +00:00
|
|
|
char *toStr = NULL;
|
|
|
|
char *viaStr = NULL;
|
2014-06-27 08:41:22 +00:00
|
|
|
virDomainNetDefPtr netDef;
|
|
|
|
bool privNet = vmDef->features[VIR_DOMAIN_FEATURE_PRIVNET] ==
|
2014-06-27 15:18:53 +00:00
|
|
|
VIR_TRISTATE_SWITCH_ON;
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < nveths; i++) {
|
2014-06-27 08:41:22 +00:00
|
|
|
if (!(netDef = lxcContainerGetNetDef(vmDef, veths[i])))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
newname = netDef->ifname_guest;
|
|
|
|
if (!newname) {
|
2010-07-23 17:25:56 +00:00
|
|
|
rc = -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
}
|
2009-11-05 13:11:30 +00:00
|
|
|
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("Renaming %s to %s", veths[i], newname);
|
2011-11-02 15:53:39 +00:00
|
|
|
rc = virNetDevSetName(veths[i], newname);
|
2010-07-23 17:25:56 +00:00
|
|
|
if (rc < 0)
|
2009-11-05 13:11:30 +00:00
|
|
|
goto error_out;
|
|
|
|
|
2014-07-22 11:35:48 +00:00
|
|
|
for (j = 0; j < netDef->nips; j++) {
|
|
|
|
virDomainNetIpDefPtr ip = netDef->ips[j];
|
|
|
|
unsigned int prefix = (ip->prefix > 0) ? ip->prefix :
|
|
|
|
VIR_SOCKET_ADDR_DEFAULT_PREFIX;
|
|
|
|
char *ipStr = virSocketAddrFormat(&ip->address);
|
|
|
|
|
|
|
|
VIR_DEBUG("Adding IP address '%s/%u' to '%s'",
|
|
|
|
ipStr, ip->prefix, newname);
|
|
|
|
if (virNetDevSetIPAddress(newname, &ip->address, prefix) < 0) {
|
|
|
|
virReportError(VIR_ERR_SYSTEM_ERROR,
|
|
|
|
_("Failed to set IP address '%s' on %s"),
|
|
|
|
ipStr, newname);
|
|
|
|
VIR_FREE(ipStr);
|
|
|
|
goto error_out;
|
|
|
|
}
|
|
|
|
VIR_FREE(ipStr);
|
|
|
|
}
|
|
|
|
|
2015-04-24 13:52:56 +00:00
|
|
|
if (netDef->nips ||
|
|
|
|
netDef->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_UP) {
|
2014-07-25 13:24:29 +00:00
|
|
|
VIR_DEBUG("Enabling %s", newname);
|
|
|
|
rc = virNetDevSetOnline(newname, true);
|
|
|
|
if (rc < 0)
|
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
|
2014-07-25 13:24:29 +00:00
|
|
|
/* Set the routes */
|
|
|
|
for (j = 0; j < netDef->nroutes; j++) {
|
2015-01-14 15:15:57 +00:00
|
|
|
virNetworkRouteDefPtr route = netDef->routes[j];
|
|
|
|
|
|
|
|
if (virNetDevAddRoute(newname,
|
|
|
|
virNetworkRouteDefGetAddress(route),
|
|
|
|
virNetworkRouteDefGetPrefix(route),
|
|
|
|
virNetworkRouteDefGetGateway(route),
|
|
|
|
virNetworkRouteDefGetMetric(route)) < 0) {
|
2014-07-24 13:20:27 +00:00
|
|
|
goto error_out;
|
2014-07-25 13:24:29 +00:00
|
|
|
}
|
|
|
|
VIR_FREE(toStr);
|
|
|
|
VIR_FREE(viaStr);
|
2014-07-24 13:20:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* enable lo device only if there were other net devices */
|
2012-01-18 11:38:49 +00:00
|
|
|
if (veths || privNet)
|
2011-11-02 15:53:39 +00:00
|
|
|
rc = virNetDevSetOnline("lo", true);
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
error_out:
|
2014-07-24 13:20:27 +00:00
|
|
|
VIR_FREE(toStr);
|
|
|
|
VIR_FREE(viaStr);
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2011-01-28 21:38:06 +00:00
|
|
|
/*_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)*/
|
2013-11-19 23:04:25 +00:00
|
|
|
extern int pivot_root(const char * new_root, const char * put_old);
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
#ifndef MS_REC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_REC 16384
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MNT_DETACH
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MNT_DETACH 0x00000002
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_PRIVATE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_PRIVATE (1<<18)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_SLAVE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_SLAVE (1<<19)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
2013-04-08 15:10:16 +00:00
|
|
|
|
|
|
|
static int lxcContainerUnmountSubtree(const char *prefix,
|
|
|
|
bool isOldRootFS)
|
|
|
|
{
|
|
|
|
char **mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
size_t i;
|
|
|
|
int saveErrno;
|
|
|
|
const char *failedUmount = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
VIR_DEBUG("Unmount subtreee from %s", prefix);
|
|
|
|
|
2013-11-27 15:19:49 +00:00
|
|
|
if (virFileGetMountReverseSubtree("/proc/mounts", prefix,
|
|
|
|
&mounts, &nmounts) < 0)
|
2013-04-08 15:10:16 +00:00
|
|
|
goto cleanup;
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < nmounts; i++) {
|
2013-04-08 15:10:16 +00:00
|
|
|
VIR_DEBUG("Umount %s", mounts[i]);
|
|
|
|
if (umount(mounts[i]) < 0) {
|
|
|
|
char ebuf[1024];
|
|
|
|
failedUmount = mounts[i];
|
|
|
|
saveErrno = errno;
|
|
|
|
VIR_WARN("Failed to unmount '%s', trying to detach subtree '%s': %s",
|
|
|
|
failedUmount, mounts[nmounts-1],
|
|
|
|
virStrerror(errno, ebuf, sizeof(ebuf)));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (failedUmount) {
|
|
|
|
/* This detaches the subtree */
|
|
|
|
if (umount2(mounts[nmounts-1], MNT_DETACH) < 0) {
|
|
|
|
virReportSystemError(saveErrno,
|
|
|
|
_("Failed to unmount '%s' and could not detach subtree '%s'"),
|
|
|
|
failedUmount, mounts[nmounts-1]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* This unmounts the tmpfs on which the old root filesystem was hosted */
|
|
|
|
if (isOldRootFS &&
|
|
|
|
umount(mounts[nmounts-1]) < 0) {
|
|
|
|
virReportSystemError(saveErrno,
|
|
|
|
_("Failed to unmount '%s' and could not unmount old root '%s'"),
|
|
|
|
failedUmount, mounts[nmounts-1]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-11-27 15:19:49 +00:00
|
|
|
virStringFreeList(mounts);
|
2013-04-08 15:10:16 +00:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-11-21 16:45:55 +00:00
|
|
|
static int lxcContainerResolveSymlinks(virDomainFSDefPtr fs, bool gentle)
|
|
|
|
{
|
|
|
|
char *newroot;
|
|
|
|
|
|
|
|
if (!fs->src || fs->symlinksResolved)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (access(fs->src, F_OK)) {
|
|
|
|
if (gentle) {
|
|
|
|
/* Just ignore the error for the while, we'll try again later */
|
|
|
|
VIR_DEBUG("Skipped unaccessible '%s'", fs->src);
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to access '%s'"), fs->src);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Resolving '%s'", fs->src);
|
|
|
|
if (virFileResolveAllLinks(fs->src, &newroot) < 0) {
|
|
|
|
if (gentle) {
|
|
|
|
VIR_DEBUG("Skipped non-resolvable '%s'", fs->src);
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to resolve symlink at %s"),
|
|
|
|
fs->src);
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Mark it resolved to skip it the next time */
|
|
|
|
fs->symlinksResolved = true;
|
|
|
|
|
|
|
|
VIR_DEBUG("Resolved '%s' to %s", fs->src, newroot);
|
|
|
|
|
|
|
|
VIR_FREE(fs->src);
|
|
|
|
fs->src = newroot;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2013-04-08 15:10:16 +00:00
|
|
|
|
2013-03-22 14:09:41 +00:00
|
|
|
static int lxcContainerPrepareRoot(virDomainDefPtr def,
|
2013-11-29 12:19:37 +00:00
|
|
|
virDomainFSDefPtr root,
|
|
|
|
const char *sec_mount_options)
|
2013-03-22 14:09:41 +00:00
|
|
|
{
|
|
|
|
char *dst;
|
|
|
|
char *tmp;
|
|
|
|
|
2013-07-09 13:24:10 +00:00
|
|
|
VIR_DEBUG("Prepare root %d", root->type);
|
|
|
|
|
2013-03-22 14:09:41 +00:00
|
|
|
if (root->type == VIR_DOMAIN_FS_TYPE_MOUNT)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (root->type == VIR_DOMAIN_FS_TYPE_FILE) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("Unexpected root filesystem without loop device"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (root->type != VIR_DOMAIN_FS_TYPE_BLOCK) {
|
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
|
|
|
_("Unsupported root filesystem type %s"),
|
|
|
|
virDomainFSTypeToString(root->type));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-11-21 16:45:55 +00:00
|
|
|
if (lxcContainerResolveSymlinks(root, false) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2013-03-22 14:09:41 +00:00
|
|
|
if (virAsprintf(&dst, "%s/%s.root",
|
2013-07-04 10:11:37 +00:00
|
|
|
LXC_STATE_DIR, def->name) < 0)
|
2013-03-22 14:09:41 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
tmp = root->dst;
|
|
|
|
root->dst = dst;
|
|
|
|
|
2013-11-29 12:19:37 +00:00
|
|
|
if (lxcContainerMountFSBlock(root, "", sec_mount_options) < 0) {
|
2013-03-22 14:09:41 +00:00
|
|
|
root->dst = tmp;
|
|
|
|
VIR_FREE(dst);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
root->dst = tmp;
|
|
|
|
root->type = VIR_DOMAIN_FS_TYPE_MOUNT;
|
|
|
|
VIR_FREE(root->src);
|
|
|
|
root->src = dst;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
static int lxcContainerPivotRoot(virDomainFSDefPtr root)
|
|
|
|
{
|
2011-07-05 21:02:53 +00:00
|
|
|
int ret;
|
2009-04-14 17:51:12 +00:00
|
|
|
char *oldroot = NULL, *newroot = NULL;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = -1;
|
|
|
|
|
2012-05-08 16:50:48 +00:00
|
|
|
VIR_DEBUG("Pivot via %s", root->src);
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* root->parent must be private, so make / private. */
|
|
|
|
if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make root private"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2013-07-04 10:11:37 +00:00
|
|
|
if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0)
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath(oldroot) < 0) {
|
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
oldroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a tmpfs root since old and new roots must be
|
|
|
|
* on separate filesystems */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount empty tmpfs at %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
oldroot);
|
|
|
|
goto err;
|
|
|
|
}
|
2009-04-16 13:08:03 +00:00
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* Create a directory called 'new' in tmpfs */
|
2013-07-04 10:11:37 +00:00
|
|
|
if (virAsprintf(&newroot, "%s/new", oldroot) < 0)
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath(newroot) < 0) {
|
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
newroot);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ... and mount our root onto it */
|
|
|
|
if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2013-11-22 07:11:07 +00:00
|
|
|
_("Failed to bind %s to new root %s"),
|
|
|
|
root->src, newroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
if (root->readonly) {
|
|
|
|
if (mount(root->src, newroot, NULL, MS_BIND|MS_REC|MS_RDONLY|MS_REMOUNT, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make new root %s readonly"),
|
|
|
|
root->src);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-14 06:57:55 +00:00
|
|
|
/* Now we chdir into the tmpfs, then pivot into the
|
2009-04-14 17:51:12 +00:00
|
|
|
* root->src bind-mounted onto '/new' */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (chdir(newroot) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2013-06-14 06:57:55 +00:00
|
|
|
_("Failed to chdir into %s"), newroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The old root directory will live at /.oldroot after
|
|
|
|
* this and will soon be unmounted completely */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (pivot_root(".", ".oldroot") < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to pivot root"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* CWD is undefined after pivot_root, so go to / */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (chdir("/") < 0)
|
|
|
|
goto err;
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
err:
|
2009-04-14 17:51:12 +00:00
|
|
|
VIR_FREE(oldroot);
|
|
|
|
VIR_FREE(newroot);
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2013-09-10 12:35:12 +00:00
|
|
|
typedef struct {
|
|
|
|
const char *src;
|
|
|
|
const char *dst;
|
|
|
|
const char *type;
|
|
|
|
int mflags;
|
2013-10-07 12:03:51 +00:00
|
|
|
bool skipUserNS;
|
2013-10-07 12:12:15 +00:00
|
|
|
bool skipUnmounted;
|
2014-12-10 09:22:28 +00:00
|
|
|
bool skipNoNetns;
|
2013-09-10 12:35:12 +00:00
|
|
|
} virLXCBasicMountInfo;
|
|
|
|
|
|
|
|
static const virLXCBasicMountInfo lxcBasicMounts[] = {
|
2014-12-10 09:22:28 +00:00
|
|
|
{ "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false, false },
|
2015-06-16 13:44:36 +00:00
|
|
|
{ "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false },
|
2014-12-10 09:22:28 +00:00
|
|
|
{ "/.oldroot/proc/sys/net/ipv4", "/proc/sys/net/ipv4", NULL, MS_BIND, false, false, true },
|
|
|
|
{ "/.oldroot/proc/sys/net/ipv6", "/proc/sys/net/ipv6", NULL, MS_BIND, false, false, true },
|
|
|
|
{ "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false },
|
|
|
|
{ "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false },
|
2012-09-20 12:17:58 +00:00
|
|
|
#if WITH_SELINUX
|
2014-12-10 09:22:28 +00:00
|
|
|
{ SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false },
|
2012-03-26 15:39:30 +00:00
|
|
|
#endif
|
2013-09-10 12:35:12 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-02-05 14:10:02 +00:00
|
|
|
bool lxcIsBasicMountLocation(const char *path)
|
2013-09-09 15:17:19 +00:00
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_CARDINALITY(lxcBasicMounts); i++) {
|
|
|
|
if (STREQ(path, lxcBasicMounts[i].dst))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerSetReadOnly(void)
|
|
|
|
{
|
|
|
|
FILE *procmnt;
|
|
|
|
struct mntent mntent;
|
|
|
|
char mntbuf[1024];
|
|
|
|
int ret = -1;
|
|
|
|
char **mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if (!(procmnt = setmntent("/proc/mounts", "r"))) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to read /proc/mounts"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
|
2014-03-07 08:33:31 +00:00
|
|
|
char *tmp;
|
2013-09-09 15:17:19 +00:00
|
|
|
if (STREQ(mntent.mnt_dir, "/") ||
|
|
|
|
STREQ(mntent.mnt_dir, "/.oldroot") ||
|
|
|
|
STRPREFIX(mntent.mnt_dir, "/.oldroot/") ||
|
|
|
|
lxcIsBasicMountLocation(mntent.mnt_dir))
|
|
|
|
continue;
|
|
|
|
|
2014-03-07 08:33:31 +00:00
|
|
|
if (VIR_STRDUP(tmp, mntent.mnt_dir) < 0 ||
|
|
|
|
VIR_APPEND_ELEMENT(mounts, nmounts, tmp) < 0) {
|
|
|
|
VIR_FREE(tmp);
|
2013-09-09 15:17:19 +00:00
|
|
|
goto cleanup;
|
2014-03-07 08:33:31 +00:00
|
|
|
}
|
2013-09-09 15:17:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (mounts)
|
|
|
|
qsort(mounts, nmounts, sizeof(mounts[0]),
|
2013-11-28 11:14:59 +00:00
|
|
|
virStringSortRevCompare);
|
2013-09-09 15:17:19 +00:00
|
|
|
|
|
|
|
for (i = 0; i < nmounts; i++) {
|
|
|
|
VIR_DEBUG("Bind readonly %s", mounts[i]);
|
|
|
|
if (mount(mounts[i], mounts[i], NULL, MS_BIND|MS_REC|MS_RDONLY|MS_REMOUNT, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make mount %s readonly"),
|
|
|
|
mounts[i]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-09-09 15:17:19 +00:00
|
|
|
for (i = 0; i < nmounts; i++)
|
|
|
|
VIR_FREE(mounts[i]);
|
|
|
|
VIR_FREE(mounts);
|
|
|
|
endmntent(procmnt);
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-14 10:01:51 +00:00
|
|
|
static int lxcContainerMountBasicFS(bool userns_enabled,
|
|
|
|
bool netns_disabled)
|
2013-09-10 12:35:12 +00:00
|
|
|
{
|
Convert 'int i' to 'size_t i' in src/lxc/ files
Convert the type of loop iterators named 'i', 'j', k',
'ii', 'jj', 'kk', to be 'size_t' instead of 'int' or
'unsigned int', also santizing 'ii', 'jj', 'kk' to use
the normal 'i', 'j', 'k' naming
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2013-07-08 14:09:33 +00:00
|
|
|
size_t i;
|
|
|
|
int rc = -1;
|
2014-07-14 10:01:51 +00:00
|
|
|
char* mnt_src = NULL;
|
|
|
|
int mnt_mflags;
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2013-06-07 07:12:22 +00:00
|
|
|
VIR_DEBUG("Mounting basic filesystems");
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2013-09-10 12:35:12 +00:00
|
|
|
for (i = 0; i < ARRAY_CARDINALITY(lxcBasicMounts); i++) {
|
2013-10-07 11:25:00 +00:00
|
|
|
bool bindOverReadonly;
|
2013-09-10 12:35:12 +00:00
|
|
|
virLXCBasicMountInfo const *mnt = &lxcBasicMounts[i];
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2014-07-14 10:01:51 +00:00
|
|
|
/* When enable userns but disable netns, kernel will
|
|
|
|
* forbid us doing a new fresh mount for sysfs.
|
|
|
|
* So we had to do a bind mount for sysfs instead.
|
|
|
|
*/
|
|
|
|
if (userns_enabled && netns_disabled &&
|
|
|
|
STREQ(mnt->src, "sysfs")) {
|
2014-11-13 14:27:11 +00:00
|
|
|
if (VIR_STRDUP(mnt_src, "/sys") < 0)
|
2014-07-14 10:01:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
mnt_mflags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY|MS_BIND;
|
|
|
|
} else {
|
2014-11-13 14:27:11 +00:00
|
|
|
if (VIR_STRDUP(mnt_src, mnt->src) < 0)
|
2014-07-14 10:01:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
mnt_mflags = mnt->mflags;
|
|
|
|
}
|
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
VIR_DEBUG("Processing %s -> %s",
|
2014-07-14 10:01:51 +00:00
|
|
|
mnt_src, mnt->dst);
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2013-10-07 12:12:15 +00:00
|
|
|
if (mnt->skipUnmounted) {
|
|
|
|
char *hostdir;
|
|
|
|
int ret;
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2013-10-07 12:12:15 +00:00
|
|
|
if (virAsprintf(&hostdir, "/.oldroot%s", mnt->dst) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
ret = virFileIsMountPoint(hostdir);
|
|
|
|
VIR_FREE(hostdir);
|
|
|
|
if (ret < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (ret == 0) {
|
|
|
|
VIR_DEBUG("Skipping '%s' which isn't mounted in host",
|
|
|
|
mnt->dst);
|
2014-09-01 12:08:08 +00:00
|
|
|
VIR_FREE(mnt_src);
|
2013-10-07 12:12:15 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2013-05-15 15:26:59 +00:00
|
|
|
|
2013-10-07 12:03:51 +00:00
|
|
|
if (mnt->skipUserNS && userns_enabled) {
|
|
|
|
VIR_DEBUG("Skipping due to user ns enablement");
|
2014-09-01 12:08:08 +00:00
|
|
|
VIR_FREE(mnt_src);
|
2013-09-05 10:50:40 +00:00
|
|
|
continue;
|
2013-10-07 12:03:51 +00:00
|
|
|
}
|
2013-09-05 10:50:40 +00:00
|
|
|
|
2014-12-10 09:22:28 +00:00
|
|
|
/* Skip mounts with missing source without shouting: it may be a
|
|
|
|
* missing folder in /proc due to the absence of a kernel feature */
|
|
|
|
if (STRPREFIX(mnt_src, "/") && !virFileExists(mnt_src)) {
|
|
|
|
VIR_DEBUG("Skipping due to missing source: %s", mnt_src);
|
|
|
|
VIR_FREE(mnt_src);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mnt->skipNoNetns && netns_disabled) {
|
|
|
|
VIR_DEBUG("Skipping due to absence of network namespace");
|
|
|
|
VIR_FREE(mnt_src);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-09-10 12:35:12 +00:00
|
|
|
if (virFileMakePath(mnt->dst) < 0) {
|
2013-01-09 11:20:59 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mkdir %s"),
|
2014-12-10 09:22:28 +00:00
|
|
|
mnt->dst);
|
2013-01-09 11:20:59 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-10-07 11:25:00 +00:00
|
|
|
/*
|
|
|
|
* We can't immediately set the MS_RDONLY flag when mounting filesystems
|
|
|
|
* because (in at least some kernel versions) this will propagate back
|
|
|
|
* to the original mount in the host OS, turning it readonly too. Thus
|
|
|
|
* we mount the filesystem in read-write mode initially, and then do a
|
|
|
|
* separate read-only bind mount on top of that.
|
|
|
|
*/
|
2014-07-14 10:01:51 +00:00
|
|
|
bindOverReadonly = !!(mnt_mflags & MS_RDONLY);
|
2013-10-07 11:25:00 +00:00
|
|
|
|
2013-10-07 11:08:14 +00:00
|
|
|
VIR_DEBUG("Mount %s on %s type=%s flags=%x",
|
2014-07-14 10:01:51 +00:00
|
|
|
mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY);
|
|
|
|
if (mount(mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2013-10-07 11:08:14 +00:00
|
|
|
_("Failed to mount %s on %s type %s flags=%x"),
|
2014-07-14 10:01:51 +00:00
|
|
|
mnt_src, mnt->dst, NULLSTR(mnt->type),
|
|
|
|
mnt_mflags & ~MS_RDONLY);
|
2013-10-07 11:25:00 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bindOverReadonly &&
|
2014-07-14 10:01:51 +00:00
|
|
|
mount(mnt_src, mnt->dst, NULL,
|
2015-06-16 13:44:36 +00:00
|
|
|
MS_BIND|MS_REMOUNT|mnt_mflags|MS_RDONLY, NULL) < 0) {
|
2013-10-07 11:25:00 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to re-mount %s on %s flags=%x"),
|
2014-07-14 10:01:51 +00:00
|
|
|
mnt_src, mnt->dst,
|
2013-10-07 11:25:00 +00:00
|
|
|
MS_BIND|MS_REMOUNT|MS_RDONLY);
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
2014-09-01 12:08:08 +00:00
|
|
|
|
|
|
|
VIR_FREE(mnt_src);
|
2011-07-22 12:02:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2014-07-14 10:01:51 +00:00
|
|
|
VIR_FREE(mnt_src);
|
2011-07-22 12:02:05 +00:00
|
|
|
VIR_DEBUG("rc=%d", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-01-08 21:04:35 +00:00
|
|
|
#if WITH_FUSE
|
2013-05-15 09:49:20 +00:00
|
|
|
static int lxcContainerMountProcFuse(virDomainDefPtr def,
|
|
|
|
const char *stateDir)
|
2012-11-12 07:02:28 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
char *meminfo_path = NULL;
|
|
|
|
|
2013-05-15 09:49:20 +00:00
|
|
|
VIR_DEBUG("Mount /proc/meminfo stateDir=%s", stateDir);
|
|
|
|
|
2012-11-12 07:02:28 +00:00
|
|
|
if ((ret = virAsprintf(&meminfo_path,
|
2013-05-15 09:53:14 +00:00
|
|
|
"/.oldroot/%s/%s.fuse/meminfo",
|
2013-05-15 09:49:20 +00:00
|
|
|
stateDir,
|
2012-11-12 07:02:28 +00:00
|
|
|
def->name)) < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if ((ret = mount(meminfo_path, "/proc/meminfo",
|
|
|
|
NULL, MS_BIND, NULL)) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on /proc/meminfo"),
|
|
|
|
meminfo_path);
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_FREE(meminfo_path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else
|
2013-05-15 09:49:20 +00:00
|
|
|
static int lxcContainerMountProcFuse(virDomainDefPtr def ATTRIBUTE_UNUSED,
|
|
|
|
const char *stateDir ATTRIBUTE_UNUSED)
|
2012-11-12 07:02:28 +00:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2013-06-07 07:12:22 +00:00
|
|
|
static int lxcContainerMountFSDev(virDomainDefPtr def,
|
|
|
|
const char *stateDir)
|
|
|
|
{
|
2013-08-13 13:58:45 +00:00
|
|
|
int ret = -1;
|
2013-06-07 07:12:22 +00:00
|
|
|
char *path = NULL;
|
2013-11-20 02:11:08 +00:00
|
|
|
int flags = def->idmap.nuidmap ? MS_BIND : MS_MOVE;
|
2013-06-07 07:12:22 +00:00
|
|
|
|
|
|
|
VIR_DEBUG("Mount /dev/ stateDir=%s", stateDir);
|
|
|
|
|
|
|
|
if ((ret = virAsprintf(&path, "/.oldroot/%s/%s.dev",
|
|
|
|
stateDir, def->name)) < 0)
|
|
|
|
return ret;
|
|
|
|
|
2013-08-13 13:58:45 +00:00
|
|
|
if (virFileMakePath("/dev") < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Cannot create /dev"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-11-20 02:11:08 +00:00
|
|
|
VIR_DEBUG("Trying to %s %s to /dev", def->idmap.nuidmap ?
|
|
|
|
"bind" : "move", path);
|
2013-06-07 07:12:22 +00:00
|
|
|
|
2013-11-20 02:11:08 +00:00
|
|
|
if (mount(path, "/dev", NULL, flags, NULL) < 0) {
|
2013-06-07 07:12:22 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on /dev"),
|
|
|
|
path);
|
2013-08-13 13:58:45 +00:00
|
|
|
goto cleanup;
|
2013-06-07 07:12:22 +00:00
|
|
|
}
|
|
|
|
|
2013-08-13 13:58:45 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-06-07 07:12:22 +00:00
|
|
|
VIR_FREE(path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-05-15 09:49:20 +00:00
|
|
|
static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
|
|
|
|
const char *stateDir)
|
2011-07-22 12:02:05 +00:00
|
|
|
{
|
2013-12-06 06:20:21 +00:00
|
|
|
int ret = -1;
|
2013-03-22 13:54:12 +00:00
|
|
|
char *path = NULL;
|
2013-11-20 02:11:08 +00:00
|
|
|
int flags = def->idmap.nuidmap ? MS_BIND : MS_MOVE;
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2013-05-15 09:49:20 +00:00
|
|
|
VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir);
|
|
|
|
|
2013-12-06 06:20:21 +00:00
|
|
|
if (virAsprintf(&path, "/.oldroot/%s/%s.devpts",
|
|
|
|
stateDir, def->name) < 0)
|
2013-03-22 13:54:12 +00:00
|
|
|
return ret;
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath("/dev/pts") < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Cannot create /dev/pts"));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-01-20 17:13:33 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2013-11-20 02:11:08 +00:00
|
|
|
VIR_DEBUG("Trying to %s %s to /dev/pts", def->idmap.nuidmap ?
|
|
|
|
"bind" : "move", path);
|
2013-03-22 13:54:12 +00:00
|
|
|
|
2013-12-06 06:20:21 +00:00
|
|
|
if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) {
|
2013-03-22 13:54:12 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on /dev/pts"),
|
|
|
|
path);
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2009-09-04 14:12:35 +00:00
|
|
|
|
2013-12-06 06:20:21 +00:00
|
|
|
ret = 0;
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-03-22 13:54:12 +00:00
|
|
|
VIR_FREE(path);
|
|
|
|
return ret;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
|
2013-06-07 07:12:22 +00:00
|
|
|
static int lxcContainerSetupDevices(char **ttyPaths, size_t nttyPaths)
|
2009-04-22 14:26:50 +00:00
|
|
|
{
|
2011-10-20 08:44:31 +00:00
|
|
|
size_t i;
|
2012-02-08 14:21:28 +00:00
|
|
|
const struct {
|
|
|
|
const char *src;
|
|
|
|
const char *dst;
|
|
|
|
} links[] = {
|
|
|
|
{ "/proc/self/fd/0", "/dev/stdin" },
|
|
|
|
{ "/proc/self/fd/1", "/dev/stdout" },
|
|
|
|
{ "/proc/self/fd/2", "/dev/stderr" },
|
|
|
|
{ "/proc/self/fd", "/dev/fd" },
|
|
|
|
};
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < ARRAY_CARDINALITY(links); i++) {
|
2012-02-08 14:21:28 +00:00
|
|
|
if (symlink(links[i].src, links[i].dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink device %s to %s"),
|
|
|
|
links[i].dst, links[i].src);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-04 13:40:57 +00:00
|
|
|
/* We have private devpts capability, so bind that */
|
|
|
|
if (virFileTouch("/dev/ptmx", 0666) < 0)
|
|
|
|
return -1;
|
2012-01-11 09:59:37 +00:00
|
|
|
|
2013-06-04 13:40:57 +00:00
|
|
|
if (mount("/dev/pts/ptmx", "/dev/ptmx", "ptmx", MS_BIND, NULL) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to bind /dev/pts/ptmx on to /dev/ptmx"));
|
|
|
|
return -1;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < nttyPaths; i++) {
|
2011-10-20 08:44:31 +00:00
|
|
|
char *tty;
|
2013-07-04 10:11:37 +00:00
|
|
|
if (virAsprintf(&tty, "/dev/tty%zu", i+1) < 0)
|
2011-10-20 08:44:31 +00:00
|
|
|
return -1;
|
|
|
|
if (symlink(ttyPaths[i], tty) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink %s to %s"),
|
|
|
|
ttyPaths[i], tty);
|
2013-10-29 06:19:35 +00:00
|
|
|
VIR_FREE(tty);
|
2011-10-20 08:44:31 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FREE(tty);
|
|
|
|
if (i == 0 &&
|
|
|
|
symlink(ttyPaths[i], "/dev/console") < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink %s to /dev/console"),
|
|
|
|
ttyPaths[i]);
|
|
|
|
return -1;
|
|
|
|
}
|
2010-11-05 13:27:34 +00:00
|
|
|
}
|
2008-08-28 22:40:50 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
static int lxcContainerMountFSBind(virDomainFSDefPtr fs,
|
|
|
|
const char *srcprefix)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2011-07-22 11:11:12 +00:00
|
|
|
char *src = NULL;
|
|
|
|
int ret = -1;
|
2012-06-25 09:53:39 +00:00
|
|
|
struct stat st;
|
2011-07-22 11:11:12 +00:00
|
|
|
|
2013-09-09 15:17:19 +00:00
|
|
|
VIR_DEBUG("src=%s dst=%s", fs->src, fs->dst);
|
|
|
|
|
2013-07-04 10:11:37 +00:00
|
|
|
if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0)
|
2011-07-22 11:11:12 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2012-06-25 09:53:39 +00:00
|
|
|
if (stat(fs->dst, &st) < 0) {
|
|
|
|
if (errno != ENOENT) {
|
|
|
|
virReportSystemError(errno, _("Unable to stat bind target %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* ENOENT => create the target dir or file */
|
|
|
|
if (stat(src, &st) < 0) {
|
|
|
|
virReportSystemError(errno, _("Unable to stat bind source %s"),
|
|
|
|
src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Create Empty file for target mount point */
|
|
|
|
int fd = open(fs->dst, O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666);
|
|
|
|
if (fd < 0) {
|
|
|
|
if (errno != EEXIST) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create bind target %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (VIR_CLOSE(fd) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to close bind target %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
2011-07-22 11:11:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (mount(src, fs->dst, NULL, MS_BIND, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to bind mount directory %s to %s"),
|
|
|
|
src, fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
if (fs->readonly) {
|
|
|
|
VIR_DEBUG("Binding %s readonly", fs->dst);
|
2012-06-11 03:37:36 +00:00
|
|
|
if (mount(src, fs->dst, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
|
2011-07-22 12:08:20 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make directory %s readonly"),
|
|
|
|
fs->dst);
|
2011-07-22 12:02:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_FREE(src);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-09-20 14:43:12 +00:00
|
|
|
#ifdef WITH_BLKID
|
2011-11-01 14:59:51 +00:00
|
|
|
static int
|
|
|
|
lxcContainerMountDetectFilesystem(const char *src, char **type)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
int ret = -1;
|
|
|
|
int rc;
|
|
|
|
const char *data = NULL;
|
|
|
|
blkid_probe blkid = NULL;
|
|
|
|
|
|
|
|
*type = NULL;
|
|
|
|
|
|
|
|
if ((fd = open(src, O_RDONLY)) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to open filesystem %s"), src);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(blkid = blkid_new_probe())) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Unable to create blkid library handle"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (blkid_probe_set_device(blkid, fd, 0, 0) < 0) {
|
|
|
|
virReportSystemError(EINVAL,
|
|
|
|
_("Unable to associate device %s with blkid library"),
|
|
|
|
src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
blkid_probe_enable_superblocks(blkid, 1);
|
|
|
|
|
|
|
|
blkid_probe_set_superblocks_flags(blkid, BLKID_SUBLKS_TYPE);
|
|
|
|
|
|
|
|
rc = blkid_do_safeprobe(blkid);
|
|
|
|
if (rc != 0) {
|
|
|
|
if (rc == 1) /* Nothing found, return success with *type == NULL */
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (rc == -2) {
|
|
|
|
virReportSystemError(EINVAL,
|
|
|
|
_("Too many filesystems detected for %s"),
|
|
|
|
src);
|
|
|
|
} else {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to detect filesystem for %s"),
|
|
|
|
src);
|
|
|
|
}
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (blkid_probe_lookup_value(blkid, "TYPE", &data, NULL) < 0) {
|
|
|
|
virReportSystemError(ENOENT,
|
|
|
|
_("Unable to find filesystem type for %s"),
|
|
|
|
src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-05-03 12:43:39 +00:00
|
|
|
if (VIR_STRDUP(*type, data) < 0)
|
2011-11-01 14:59:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
done:
|
2011-11-01 14:59:51 +00:00
|
|
|
ret = 0;
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-11-01 14:59:51 +00:00
|
|
|
VIR_FORCE_CLOSE(fd);
|
|
|
|
if (blkid)
|
|
|
|
blkid_free_probe(blkid);
|
|
|
|
return ret;
|
|
|
|
}
|
2012-09-20 14:43:12 +00:00
|
|
|
#else /* ! WITH_BLKID */
|
2011-11-01 14:59:51 +00:00
|
|
|
static int
|
|
|
|
lxcContainerMountDetectFilesystem(const char *src ATTRIBUTE_UNUSED,
|
|
|
|
char **type)
|
|
|
|
{
|
|
|
|
/* No libblkid, so just return success with no detected type */
|
|
|
|
*type = NULL;
|
|
|
|
return 0;
|
|
|
|
}
|
2012-09-20 14:43:12 +00:00
|
|
|
#endif /* ! WITH_BLKID */
|
2011-07-22 12:02:51 +00:00
|
|
|
|
|
|
|
/*
|
2013-09-06 11:14:00 +00:00
|
|
|
* This function attempts to do automatic detection of filesystem
|
2011-07-22 12:02:51 +00:00
|
|
|
* type following the same rules as the util-linux 'mount' binary.
|
|
|
|
*
|
|
|
|
* The main difference is that we don't (currently) try to use
|
|
|
|
* libblkid to detect the format first. We go straight to using
|
|
|
|
* /etc/filesystems, and then /proc/filesystems
|
|
|
|
*/
|
|
|
|
static int lxcContainerMountFSBlockAuto(virDomainFSDefPtr fs,
|
|
|
|
int fsflags,
|
2013-08-13 12:25:56 +00:00
|
|
|
const char *src,
|
2013-11-29 12:19:37 +00:00
|
|
|
const char *srcprefix,
|
|
|
|
const char *sec_mount_options)
|
2011-07-22 12:02:51 +00:00
|
|
|
{
|
|
|
|
FILE *fp = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
bool tryProc = false;
|
|
|
|
bool gotStar = false;
|
|
|
|
char *fslist = NULL;
|
|
|
|
char *line = NULL;
|
|
|
|
const char *type;
|
|
|
|
|
2013-08-13 12:25:56 +00:00
|
|
|
VIR_DEBUG("src=%s dst=%s srcprefix=%s", src, fs->dst, srcprefix);
|
2011-07-22 12:02:51 +00:00
|
|
|
|
|
|
|
/* First time around we use /etc/filesystems */
|
2014-03-25 06:49:26 +00:00
|
|
|
retry:
|
2013-08-13 12:25:56 +00:00
|
|
|
if (virAsprintf(&fslist, "%s%s", srcprefix,
|
2013-07-04 10:11:37 +00:00
|
|
|
tryProc ? "/proc/filesystems" : "/etc/filesystems") < 0)
|
2011-07-22 12:02:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
VIR_DEBUG("Open fslist %s", fslist);
|
|
|
|
if (!(fp = fopen(fslist, "r"))) {
|
|
|
|
/* If /etc/filesystems does not exist, then we need to retry
|
|
|
|
* with /proc/filesystems next
|
|
|
|
*/
|
|
|
|
if (errno == ENOENT &&
|
|
|
|
!tryProc) {
|
|
|
|
tryProc = true;
|
|
|
|
VIR_FREE(fslist);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to read %s"),
|
|
|
|
fslist);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (!feof(fp)) {
|
|
|
|
size_t n;
|
|
|
|
VIR_FREE(line);
|
|
|
|
if (getline(&line, &n, fp) <= 0) {
|
|
|
|
if (feof(fp))
|
|
|
|
break;
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
if (strstr(line, "nodev"))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
type = strchr(line, '\n');
|
|
|
|
if (type)
|
|
|
|
line[type-line] = '\0';
|
|
|
|
|
|
|
|
type = line;
|
|
|
|
virSkipSpaces(&type);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* /etc/filesystems is only allowed to contain '*' on the last line
|
|
|
|
*/
|
2011-11-01 12:29:25 +00:00
|
|
|
if (gotStar && !tryProc) {
|
2012-07-13 12:59:51 +00:00
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("%s has unexpected '*' before last line"),
|
|
|
|
fslist);
|
2011-07-22 12:02:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* An '*' on the last line in /etc/filesystems
|
|
|
|
* means try /proc/filesystems next. We don't
|
|
|
|
* jump immediately though, since we need to see
|
|
|
|
* if any more lines follow
|
|
|
|
*/
|
|
|
|
if (!tryProc &&
|
|
|
|
STREQ(type, "*"))
|
|
|
|
gotStar = true;
|
|
|
|
|
2013-11-29 12:19:37 +00:00
|
|
|
VIR_DEBUG("Trying mount '%s' on '%s' with '%s' opts '%s'",
|
|
|
|
src, fs->dst, type, sec_mount_options);
|
|
|
|
if (mount(src, fs->dst, type, fsflags, sec_mount_options) < 0) {
|
2011-07-22 12:02:51 +00:00
|
|
|
/* These errnos indicate a bogus filesystem type for
|
|
|
|
* the image we have, so skip to the next type
|
|
|
|
*/
|
|
|
|
if (errno == EINVAL || errno == ENODEV)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
virReportSystemError(errno,
|
2011-11-01 14:34:02 +00:00
|
|
|
_("Failed to mount device %s to %s"),
|
2011-07-22 12:02:51 +00:00
|
|
|
src, fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2011-07-22 12:08:20 +00:00
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
/* We've got to the end of /etc/filesystems and saw
|
|
|
|
* a '*', so we must try /proc/filesystems next
|
|
|
|
*/
|
|
|
|
if (ret != 0 &&
|
|
|
|
!tryProc &&
|
|
|
|
gotStar) {
|
|
|
|
tryProc = true;
|
|
|
|
VIR_FREE(fslist);
|
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2011-11-01 14:34:02 +00:00
|
|
|
if (ret != 0) {
|
|
|
|
virReportSystemError(ENODEV,
|
|
|
|
_("Failed to mount device %s to %s, unable to detect filesystem"),
|
|
|
|
src, fs->dst);
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_DEBUG("Done mounting filesystem ret=%d tryProc=%d", ret, tryProc);
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_FREE(line);
|
2012-06-11 05:52:37 +00:00
|
|
|
VIR_FREE(fslist);
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mount a block device 'src' on fs->dst, automatically
|
|
|
|
* probing for filesystem type
|
|
|
|
*/
|
|
|
|
static int lxcContainerMountFSBlockHelper(virDomainFSDefPtr fs,
|
2013-08-13 12:25:56 +00:00
|
|
|
const char *src,
|
2013-11-29 12:19:37 +00:00
|
|
|
const char *srcprefix,
|
|
|
|
const char *sec_mount_options)
|
2011-07-22 12:02:51 +00:00
|
|
|
{
|
|
|
|
int fsflags = 0;
|
|
|
|
int ret = -1;
|
2011-11-01 14:59:51 +00:00
|
|
|
char *format = NULL;
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
if (fs->readonly)
|
|
|
|
fsflags |= MS_RDONLY;
|
|
|
|
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-11-01 14:59:51 +00:00
|
|
|
if (lxcContainerMountDetectFilesystem(src, &format) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (format) {
|
2013-11-29 12:19:37 +00:00
|
|
|
VIR_DEBUG("Mount '%s' on '%s' with detected format '%s' opts '%s'",
|
|
|
|
src, fs->dst, format, sec_mount_options);
|
|
|
|
if (mount(src, fs->dst, format, fsflags, sec_mount_options) < 0) {
|
2011-11-01 14:59:51 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount device %s to %s as %s"),
|
|
|
|
src, fs->dst, format);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
} else {
|
2013-11-29 12:19:37 +00:00
|
|
|
ret = lxcContainerMountFSBlockAuto(fs, fsflags, src, srcprefix, sec_mount_options);
|
2011-11-01 14:59:51 +00:00
|
|
|
}
|
2011-07-22 12:02:51 +00:00
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2012-06-12 05:55:48 +00:00
|
|
|
VIR_FREE(format);
|
2011-07-22 12:02:51 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
|
2013-11-29 12:19:37 +00:00
|
|
|
const char *srcprefix,
|
|
|
|
const char *sec_mount_options)
|
2011-07-22 12:02:51 +00:00
|
|
|
{
|
|
|
|
char *src = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
|
2013-09-09 15:17:19 +00:00
|
|
|
VIR_DEBUG("src=%s dst=%s", fs->src, fs->dst);
|
|
|
|
|
2013-07-04 10:11:37 +00:00
|
|
|
if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0)
|
2011-07-22 12:02:51 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2013-11-29 12:19:37 +00:00
|
|
|
ret = lxcContainerMountFSBlockHelper(fs, src, srcprefix, sec_mount_options);
|
2011-07-22 11:11:12 +00:00
|
|
|
|
|
|
|
VIR_DEBUG("Done mounting filesystem ret=%d", ret);
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-07-22 11:11:12 +00:00
|
|
|
VIR_FREE(src);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-18 18:44:47 +00:00
|
|
|
static int lxcContainerMountFSTmpfs(virDomainFSDefPtr fs,
|
|
|
|
char *sec_mount_options)
|
2012-05-08 16:50:48 +00:00
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
char *data = NULL;
|
|
|
|
|
2013-09-09 15:17:19 +00:00
|
|
|
VIR_DEBUG("usage=%lld sec=%s", fs->usage, sec_mount_options);
|
|
|
|
|
2012-07-18 18:44:47 +00:00
|
|
|
if (virAsprintf(&data,
|
LXC: Fix handling of RAM filesystem size units
Since 76b644c when the support for RAM filesystems was introduced,
libvirt accepted the following XML:
<source usage='1024' unit='KiB'/>
This was parsed correctly and internally stored in bytes, but it
was formatted as (with an extra 's'):
<source usage='1024' units='KiB'/>
When read again, this was treated as if the units were missing,
meaning libvirt was unable to parse its own XML correctly.
The usage attribute was documented as being in KiB, but it was not
scaled if the unit was missing. Transient domains still worked,
because this was balanced by an extra 'k' in the mount options.
This patch:
Changes the parser to use 'units' instead of 'unit', as the latter
was never documented (fixing persistent domains) and some programs
(libvirt-glib, libvirt-sandbox) already parse the 'units' attribute.
Removes the extra 'k' from the tmpfs mount options, which is needed
because now we parse our own XML correctly.
Changes the default input unit to KiB to match documentation, fixing:
https://bugzilla.redhat.com/show_bug.cgi?id=1015689
2013-10-09 12:17:13 +00:00
|
|
|
"size=%lld%s", fs->usage, sec_mount_options) < 0)
|
2012-05-08 16:50:48 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount("tmpfs", fs->dst, "tmpfs", MS_NOSUID|MS_NODEV, data) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount directory %s as tmpfs"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fs->readonly) {
|
|
|
|
VIR_DEBUG("Binding %s readonly", fs->dst);
|
|
|
|
if (mount(fs->dst, fs->dst, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make directory %s readonly"),
|
|
|
|
fs->dst);
|
2013-09-30 09:06:25 +00:00
|
|
|
goto cleanup;
|
2012-05-08 16:50:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2012-05-08 16:50:48 +00:00
|
|
|
VIR_FREE(data);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
static int lxcContainerMountFS(virDomainFSDefPtr fs,
|
2012-07-18 18:44:47 +00:00
|
|
|
char *sec_mount_options)
|
2011-07-22 11:11:12 +00:00
|
|
|
{
|
|
|
|
switch (fs->type) {
|
|
|
|
case VIR_DOMAIN_FS_TYPE_MOUNT:
|
2013-05-15 09:53:14 +00:00
|
|
|
if (lxcContainerMountFSBind(fs, "/.oldroot") < 0)
|
2011-07-22 11:11:12 +00:00
|
|
|
return -1;
|
|
|
|
break;
|
2011-07-22 12:02:51 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_BLOCK:
|
2013-11-29 12:19:37 +00:00
|
|
|
if (lxcContainerMountFSBlock(fs, "/.oldroot", sec_mount_options) < 0)
|
2011-07-22 12:02:51 +00:00
|
|
|
return -1;
|
|
|
|
break;
|
2012-05-08 16:50:48 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_RAM:
|
2012-07-18 18:44:47 +00:00
|
|
|
if (lxcContainerMountFSTmpfs(fs, sec_mount_options) < 0)
|
2012-05-08 16:50:48 +00:00
|
|
|
return -1;
|
|
|
|
break;
|
2012-06-20 14:03:30 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_BIND:
|
|
|
|
if (lxcContainerMountFSBind(fs, "") < 0)
|
|
|
|
return -1;
|
|
|
|
break;
|
2011-08-04 09:13:02 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_FILE:
|
2012-06-20 14:03:30 +00:00
|
|
|
/* We do actually support this, but the lxc controller
|
|
|
|
* should have associated the file with a loopback
|
|
|
|
* device and changed this to TYPE_BLOCK for us */
|
2012-07-13 12:59:51 +00:00
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Unexpected filesystem type %s"),
|
|
|
|
virDomainFSTypeToString(fs->type));
|
2013-07-09 13:24:10 +00:00
|
|
|
return -1;
|
2011-07-22 11:11:12 +00:00
|
|
|
default:
|
2012-07-13 12:59:51 +00:00
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
|
|
|
_("Cannot mount filesystem type %s"),
|
|
|
|
virDomainFSTypeToString(fs->type));
|
2013-07-09 13:24:10 +00:00
|
|
|
return -1;
|
2011-07-22 11:11:12 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountAllFS(virDomainDefPtr vmDef,
|
2012-07-18 18:44:47 +00:00
|
|
|
char *sec_mount_options)
|
2011-07-22 11:11:12 +00:00
|
|
|
{
|
|
|
|
size_t i;
|
2013-05-15 09:53:15 +00:00
|
|
|
VIR_DEBUG("Mounting all non-root filesystems");
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Pull in rest of container's mounts */
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < vmDef->nfss; i++) {
|
2013-05-15 09:53:15 +00:00
|
|
|
if (STREQ(vmDef->fss[i]->dst, "/"))
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
2014-11-24 14:10:19 +00:00
|
|
|
VIR_DEBUG("Mounting '%s' -> '%s'", vmDef->fss[i]->src, vmDef->fss[i]->dst);
|
|
|
|
|
2014-11-21 16:45:55 +00:00
|
|
|
if (lxcContainerResolveSymlinks(vmDef->fss[i], false) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2014-11-24 14:10:19 +00:00
|
|
|
|
|
|
|
if (!(vmDef->fss[i]->src &&
|
|
|
|
STRPREFIX(vmDef->fss[i]->src, vmDef->fss[i]->dst)) &&
|
|
|
|
lxcContainerUnmountSubtree(vmDef->fss[i]->dst, false) < 0)
|
2013-04-08 15:10:16 +00:00
|
|
|
return -1;
|
|
|
|
|
2013-05-15 09:53:14 +00:00
|
|
|
if (lxcContainerMountFS(vmDef->fss[i], sec_mount_options) < 0)
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-05-15 09:53:15 +00:00
|
|
|
VIR_DEBUG("Mounted all non-root filesystems");
|
2008-08-28 22:40:50 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-07-09 10:15:11 +00:00
|
|
|
int lxcContainerSetupHostdevCapsMakePath(const char *dev)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
char *dir, *tmp;
|
|
|
|
|
|
|
|
if (VIR_STRDUP(dir, dev) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if ((tmp = strrchr(dir, '/'))) {
|
|
|
|
*tmp = '\0';
|
|
|
|
if (virFileMakePath(dir) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create directory for '%s' dev '%s'"),
|
|
|
|
dir, dev);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-07-09 10:15:11 +00:00
|
|
|
VIR_FREE(dir);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-09-11 08:07:54 +00:00
|
|
|
static int lxcContainerUnmountForSharedRoot(const char *stateDir,
|
|
|
|
const char *domain)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
char *tmp = NULL;
|
|
|
|
|
|
|
|
#if WITH_SELINUX
|
|
|
|
/* Some versions of Linux kernel don't let you overmount
|
|
|
|
* the selinux filesystem, so make sure we kill it first
|
|
|
|
*/
|
|
|
|
/* Filed coverity bug for false positive 'USE_AFTER_FREE' due to swap
|
|
|
|
* of root->src with root->dst and the VIR_FREE(root->src) prior to the
|
|
|
|
* reset of root->src in lxcContainerPrepareRoot()
|
|
|
|
*/
|
|
|
|
/* coverity[deref_arg] */
|
|
|
|
if (lxcContainerUnmountSubtree(SELINUX_MOUNT, false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* These filesystems are created by libvirt temporarily, they
|
|
|
|
* shouldn't appear in container. */
|
|
|
|
if (virAsprintf(&tmp, "%s/%s.dev", stateDir, domain) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree(tmp, false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
VIR_FREE(tmp);
|
|
|
|
if (virAsprintf(&tmp, "%s/%s.devpts", stateDir, domain) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree(tmp, false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
#if WITH_FUSE
|
|
|
|
VIR_FREE(tmp);
|
|
|
|
if (virAsprintf(&tmp, "%s/%s.fuse", stateDir, domain) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree(tmp, false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* If we have the root source being '/', then we need to
|
|
|
|
* get rid of any existing stuff under /proc, /sys & /tmp.
|
|
|
|
* We need new namespace aware versions of those. We must
|
|
|
|
* do /proc last otherwise we won't find /proc/mounts :-) */
|
|
|
|
if (lxcContainerUnmountSubtree("/sys", false) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree("/dev", false) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree("/proc", false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-09-11 08:07:54 +00:00
|
|
|
VIR_FREE(tmp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-12-10 09:22:28 +00:00
|
|
|
static bool
|
|
|
|
lxcNeedNetworkNamespace(virDomainDefPtr def)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
if (def->nets != NULL)
|
|
|
|
return true;
|
|
|
|
if (def->features[VIR_DOMAIN_FEATURE_PRIVNET] == VIR_TRISTATE_SWITCH_ON)
|
|
|
|
return true;
|
|
|
|
for (i = 0; i < def->nhostdevs; i++) {
|
|
|
|
if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES &&
|
|
|
|
def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
/* Got a FS mapped to /, we're going the pivot_root
|
|
|
|
* approach to do a better-chroot-than-chroot
|
|
|
|
* this is based on this thread http://lkml.org/lkml/2008/3/5/29
|
|
|
|
*/
|
|
|
|
static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
|
2011-10-20 08:44:31 +00:00
|
|
|
virDomainFSDefPtr root,
|
|
|
|
char **ttyPaths,
|
2012-05-11 10:02:50 +00:00
|
|
|
size_t nttyPaths,
|
2012-11-22 14:23:49 +00:00
|
|
|
virSecurityManagerPtr securityDriver)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2013-04-05 11:50:27 +00:00
|
|
|
virCgroupPtr cgroup = NULL;
|
2012-05-11 16:26:48 +00:00
|
|
|
int ret = -1;
|
2012-11-22 14:23:49 +00:00
|
|
|
char *sec_mount_options;
|
2013-05-15 09:49:20 +00:00
|
|
|
char *stateDir = NULL;
|
2012-11-22 14:23:49 +00:00
|
|
|
|
2013-07-12 10:04:55 +00:00
|
|
|
VIR_DEBUG("Setup pivot root");
|
|
|
|
|
2012-11-22 14:23:49 +00:00
|
|
|
if (!(sec_mount_options = virSecurityManagerGetMountOptions(securityDriver, vmDef)))
|
|
|
|
return -1;
|
2012-05-11 16:26:48 +00:00
|
|
|
|
|
|
|
/* Before pivoting we need to identify any
|
|
|
|
* cgroups controllers that are mounted */
|
2013-07-04 15:49:24 +00:00
|
|
|
if (virCgroupNewSelf(&cgroup) < 0)
|
2012-11-22 14:23:49 +00:00
|
|
|
goto cleanup;
|
2012-05-11 16:26:48 +00:00
|
|
|
|
2013-05-15 09:49:20 +00:00
|
|
|
if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2013-03-22 14:09:41 +00:00
|
|
|
/* Ensure the root filesystem is mounted */
|
2013-11-29 12:19:37 +00:00
|
|
|
if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0)
|
2013-03-22 14:09:41 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Gives us a private root, leaving all parent OS mounts on /.oldroot */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerPivotRoot(root) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2013-11-20 02:11:09 +00:00
|
|
|
/* FIXME: we should find a way to unmount these mounts for container
|
|
|
|
* even user namespace is enabled. */
|
|
|
|
if (STREQ(root->src, "/") && (!vmDef->idmap.nuidmap) &&
|
2013-09-11 08:07:54 +00:00
|
|
|
lxcContainerUnmountForSharedRoot(stateDir, vmDef->name) < 0)
|
2012-06-12 20:26:37 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
/* Mounts the core /proc, /sys, etc filesystems */
|
2014-07-14 10:01:51 +00:00
|
|
|
if (lxcContainerMountBasicFS(vmDef->idmap.nuidmap,
|
2014-12-10 09:22:28 +00:00
|
|
|
!lxcNeedNetworkNamespace(vmDef)) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2013-09-09 15:17:19 +00:00
|
|
|
/* Ensure entire root filesystem (except /.oldroot) is readonly */
|
|
|
|
if (root->readonly &&
|
|
|
|
lxcContainerSetReadOnly() < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2012-11-12 07:02:28 +00:00
|
|
|
/* Mounts /proc/meminfo etc sysinfo */
|
2013-05-15 09:49:20 +00:00
|
|
|
if (lxcContainerMountProcFuse(vmDef, stateDir) < 0)
|
2012-11-12 07:02:28 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2012-05-11 16:26:48 +00:00
|
|
|
/* Now we can re-mount the cgroups controllers in the
|
|
|
|
* same configuration as before */
|
2013-04-05 11:50:27 +00:00
|
|
|
if (virCgroupIsolateMount(cgroup, "/.oldroot/", sec_mount_options) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2013-06-07 07:12:22 +00:00
|
|
|
/* Mounts /dev */
|
|
|
|
if (lxcContainerMountFSDev(vmDef, stateDir) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
/* Mounts /dev/pts */
|
2013-05-15 09:49:20 +00:00
|
|
|
if (lxcContainerMountFSDevPTS(vmDef, stateDir) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2013-06-07 07:12:22 +00:00
|
|
|
/* Setup device nodes in /dev/ */
|
|
|
|
if (lxcContainerSetupDevices(ttyPaths, nttyPaths) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Sets up any non-root mounts from guest config */
|
2013-05-15 09:53:15 +00:00
|
|
|
if (lxcContainerMountAllFS(vmDef, sec_mount_options) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-11-23 14:46:18 +00:00
|
|
|
/* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
|
2012-05-11 10:35:28 +00:00
|
|
|
if (lxcContainerUnmountSubtree("/.oldroot", true) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-05-11 16:26:48 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2013-05-15 09:49:20 +00:00
|
|
|
VIR_FREE(stateDir);
|
2013-04-05 11:50:27 +00:00
|
|
|
virCgroupFree(&cgroup);
|
2012-11-22 14:23:49 +00:00
|
|
|
VIR_FREE(sec_mount_options);
|
2012-05-11 16:26:48 +00:00
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2014-11-21 16:45:55 +00:00
|
|
|
static int lxcContainerResolveAllSymlinks(virDomainDefPtr vmDef)
|
2012-01-17 21:33:02 +00:00
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
2013-07-12 10:04:55 +00:00
|
|
|
VIR_DEBUG("Resolving symlinks");
|
|
|
|
|
2013-05-21 08:03:33 +00:00
|
|
|
for (i = 0; i < vmDef->nfss; i++) {
|
2012-01-17 21:33:02 +00:00
|
|
|
virDomainFSDefPtr fs = vmDef->fss[i];
|
2014-11-21 16:45:55 +00:00
|
|
|
/* In the first pass, be gentle as some files may
|
|
|
|
depend on other filesystems to be mounted */
|
|
|
|
if (lxcContainerResolveSymlinks(fs, true) < 0)
|
2013-09-23 10:22:17 +00:00
|
|
|
return -1;
|
2012-01-17 21:33:02 +00:00
|
|
|
}
|
2013-07-09 13:24:10 +00:00
|
|
|
VIR_DEBUG("Resolved all filesystem symlinks");
|
2012-01-17 21:33:02 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
/*
|
2013-09-06 11:14:00 +00:00
|
|
|
* This is running as the 'init' process inside the container.
|
2009-06-29 17:09:42 +00:00
|
|
|
* It removes some capabilities that could be dangerous to
|
|
|
|
* host system, since they are not currently "containerized"
|
|
|
|
*/
|
2012-09-20 14:17:56 +00:00
|
|
|
#if WITH_CAPNG
|
2014-07-18 08:02:29 +00:00
|
|
|
|
|
|
|
/* Define capabilities to -1 if those aren't defined in the kernel:
|
|
|
|
* this will help us ignore them. */
|
|
|
|
# ifndef CAP_AUDIT_CONTROL
|
|
|
|
# define CAP_AUDIT_CONTROL -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_AUDIT_WRITE
|
|
|
|
# define CAP_AUDIT_WRITE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_BLOCK_SUSPEND
|
|
|
|
# define CAP_BLOCK_SUSPEND -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_CHOWN
|
|
|
|
# define CAP_CHOWN -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_DAC_OVERRIDE
|
|
|
|
# define CAP_DAC_OVERRIDE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_DAC_READ_SEARCH
|
|
|
|
# define CAP_DAC_READ_SEARCH -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_FOWNER
|
|
|
|
# define CAP_FOWNER -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_FSETID
|
|
|
|
# define CAP_FSETID -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_IPC_LOCK
|
|
|
|
# define CAP_IPC_LOCK -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_IPC_OWNER
|
|
|
|
# define CAP_IPC_OWNER -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_KILL
|
|
|
|
# define CAP_KILL -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_LEASE
|
|
|
|
# define CAP_LEASE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_LINUX_IMMUTABLE
|
|
|
|
# define CAP_LINUX_IMMUTABLE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_MAC_ADMIN
|
|
|
|
# define CAP_MAC_ADMIN -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_MAC_OVERRIDE
|
|
|
|
# define CAP_MAC_OVERRIDE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_MKNOD
|
|
|
|
# define CAP_MKNOD -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_NET_ADMIN
|
|
|
|
# define CAP_NET_ADMIN -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_NET_BIND_SERVICE
|
|
|
|
# define CAP_NET_BIND_SERVICE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_NET_BROADCAST
|
|
|
|
# define CAP_NET_BROADCAST -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_NET_RAW
|
|
|
|
# define CAP_NET_RAW -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SETGID
|
|
|
|
# define CAP_SETGID -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SETFCAP
|
|
|
|
# define CAP_SETFCAP -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SETPCAP
|
|
|
|
# define CAP_SETPCAP -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SETUID
|
|
|
|
# define CAP_SETUID -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_ADMIN
|
|
|
|
# define CAP_SYS_ADMIN -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_BOOT
|
|
|
|
# define CAP_SYS_BOOT -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_CHROOT
|
|
|
|
# define CAP_SYS_CHROOT -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_MODULE
|
|
|
|
# define CAP_SYS_MODULE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_NICE
|
|
|
|
# define CAP_SYS_NICE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_PACCT
|
|
|
|
# define CAP_SYS_PACCT -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_PTRACE
|
|
|
|
# define CAP_SYS_PTRACE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_RAWIO
|
|
|
|
# define CAP_SYS_RAWIO -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_RESOURCE
|
|
|
|
# define CAP_SYS_RESOURCE -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_TIME
|
|
|
|
# define CAP_SYS_TIME -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYS_TTY_CONFIG
|
|
|
|
# define CAP_SYS_TTY_CONFIG -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_SYSLOG
|
|
|
|
# define CAP_SYSLOG -1
|
|
|
|
# endif
|
|
|
|
# ifndef CAP_WAKE_ALARM
|
|
|
|
# define CAP_WAKE_ALARM -1
|
|
|
|
# endif
|
|
|
|
|
|
|
|
static int lxcContainerDropCapabilities(virDomainDefPtr def,
|
|
|
|
bool keepReboot)
|
2013-07-11 06:05:20 +00:00
|
|
|
{
|
2009-06-29 17:09:42 +00:00
|
|
|
int ret;
|
2014-07-18 08:02:29 +00:00
|
|
|
size_t i;
|
|
|
|
int policy = def->features[VIR_DOMAIN_FEATURE_CAPABILITIES];
|
|
|
|
|
|
|
|
/* Maps virDomainCapsFeature to CAPS_* */
|
2014-07-23 08:19:20 +00:00
|
|
|
static int capsMapping[] = {CAP_AUDIT_CONTROL,
|
|
|
|
CAP_AUDIT_WRITE,
|
|
|
|
CAP_BLOCK_SUSPEND,
|
|
|
|
CAP_CHOWN,
|
|
|
|
CAP_DAC_OVERRIDE,
|
|
|
|
CAP_DAC_READ_SEARCH,
|
|
|
|
CAP_FOWNER,
|
|
|
|
CAP_FSETID,
|
|
|
|
CAP_IPC_LOCK,
|
|
|
|
CAP_IPC_OWNER,
|
|
|
|
CAP_KILL,
|
|
|
|
CAP_LEASE,
|
|
|
|
CAP_LINUX_IMMUTABLE,
|
|
|
|
CAP_MAC_ADMIN,
|
|
|
|
CAP_MAC_OVERRIDE,
|
|
|
|
CAP_MKNOD,
|
|
|
|
CAP_NET_ADMIN,
|
|
|
|
CAP_NET_BIND_SERVICE,
|
|
|
|
CAP_NET_BROADCAST,
|
|
|
|
CAP_NET_RAW,
|
|
|
|
CAP_SETGID,
|
|
|
|
CAP_SETFCAP,
|
|
|
|
CAP_SETPCAP,
|
|
|
|
CAP_SETUID,
|
|
|
|
CAP_SYS_ADMIN,
|
|
|
|
CAP_SYS_BOOT,
|
|
|
|
CAP_SYS_CHROOT,
|
|
|
|
CAP_SYS_MODULE,
|
|
|
|
CAP_SYS_NICE,
|
|
|
|
CAP_SYS_PACCT,
|
|
|
|
CAP_SYS_PTRACE,
|
|
|
|
CAP_SYS_RAWIO,
|
|
|
|
CAP_SYS_RESOURCE,
|
|
|
|
CAP_SYS_TIME,
|
|
|
|
CAP_SYS_TTY_CONFIG,
|
|
|
|
CAP_SYSLOG,
|
|
|
|
CAP_WAKE_ALARM};
|
2009-06-29 17:09:42 +00:00
|
|
|
|
|
|
|
capng_get_caps_process();
|
|
|
|
|
2014-07-18 08:02:29 +00:00
|
|
|
/* Make sure we drop everything if required by the user */
|
|
|
|
if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_DENY)
|
|
|
|
capng_clear(CAPNG_SELECT_BOTH);
|
|
|
|
|
|
|
|
/* Apply all single capabilities changes */
|
|
|
|
for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
|
|
|
|
bool toDrop = false;
|
|
|
|
int state = def->caps_features[i];
|
|
|
|
|
|
|
|
if (!cap_valid(capsMapping[i]))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch ((virDomainCapabilitiesPolicy) policy) {
|
|
|
|
|
|
|
|
case VIR_DOMAIN_CAPABILITIES_POLICY_DENY:
|
2014-06-27 15:18:53 +00:00
|
|
|
if (state == VIR_TRISTATE_SWITCH_ON &&
|
2014-07-18 08:02:29 +00:00
|
|
|
(ret = capng_update(CAPNG_ADD,
|
|
|
|
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
|
|
|
|
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
|
|
|
|
capsMapping[i])) < 0) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Failed to add capability %s: %d"),
|
|
|
|
virDomainCapsFeatureTypeToString(i), ret);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT:
|
|
|
|
switch ((virDomainCapsFeature) i) {
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT: /* No use of reboot */
|
2014-06-27 15:18:53 +00:00
|
|
|
toDrop = !keepReboot && (state != VIR_TRISTATE_SWITCH_ON);
|
2014-07-18 08:02:29 +00:00
|
|
|
break;
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE: /* No kernel module loading */
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_SYS_TIME: /* No changing the clock */
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_MKNOD: /* No creating device nodes */
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL: /* No messing with auditing status */
|
|
|
|
case VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN: /* No messing with LSM config */
|
2014-06-27 15:18:53 +00:00
|
|
|
toDrop = (state != VIR_TRISTATE_SWITCH_ON);
|
2014-07-18 08:02:29 +00:00
|
|
|
break;
|
|
|
|
default: /* User specified capabilities to drop */
|
2014-06-27 15:18:53 +00:00
|
|
|
toDrop = (state == VIR_TRISTATE_SWITCH_OFF);
|
2014-07-18 08:02:29 +00:00
|
|
|
}
|
|
|
|
/* Fallthrough */
|
|
|
|
|
|
|
|
case VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW:
|
|
|
|
if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW)
|
2014-06-27 15:18:53 +00:00
|
|
|
toDrop = state == VIR_TRISTATE_SWITCH_OFF;
|
2014-07-18 08:02:29 +00:00
|
|
|
|
|
|
|
if (toDrop && (ret = capng_update(CAPNG_DROP,
|
|
|
|
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
|
|
|
|
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
|
|
|
|
capsMapping[i])) < 0) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Failed to remove capability %s: %d"),
|
|
|
|
virDomainCapsFeatureTypeToString(i), ret);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
|
|
|
_("Unsupported capabilities policy: %s"),
|
|
|
|
virDomainCapabilitiesPolicyTypeToString(policy));
|
|
|
|
}
|
2009-06-29 17:09:42 +00:00
|
|
|
}
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
|
2012-07-13 12:59:51 +00:00
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Failed to apply capabilities: %d"), ret);
|
2009-06-29 17:09:42 +00:00
|
|
|
return -1;
|
2009-05-11 14:05:27 +00:00
|
|
|
}
|
2009-06-29 17:09:42 +00:00
|
|
|
|
2009-11-12 11:03:23 +00:00
|
|
|
/* We do not need to call capng_lock() in this case. The bounding
|
|
|
|
* set restriction will prevent them reacquiring sys_boot/module/time,
|
|
|
|
* etc which is all that matters for the container. Once inside the
|
|
|
|
* container it is fine for SECURE_NOROOT / SECURE_NO_SETUID_FIXUP to
|
|
|
|
* be unmasked - they can never escape the bounding set. */
|
2009-06-29 17:09:42 +00:00
|
|
|
|
2013-07-11 06:05:20 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2009-06-29 17:09:42 +00:00
|
|
|
#else
|
2014-07-18 08:02:29 +00:00
|
|
|
static int lxcContainerDropCapabilities(virDomainDefPtr def ATTRIBUTE_UNUSED,
|
|
|
|
bool keepReboot ATTRIBUTE_UNUSED)
|
2013-07-11 06:05:20 +00:00
|
|
|
{
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
|
2009-05-11 14:05:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2013-07-11 06:05:20 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
|
|
|
|
|
2015-08-20 13:46:17 +00:00
|
|
|
/**
|
|
|
|
* lxcAttach_ns:
|
|
|
|
* @ns_fd: array of namespaces to attach
|
|
|
|
*/
|
|
|
|
static int lxcAttachNS(int *ns_fd)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
if (ns_fd)
|
|
|
|
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
|
|
|
|
if (ns_fd[i] < 0)
|
|
|
|
continue;
|
|
|
|
VIR_DEBUG("Setting into namespace\n");
|
|
|
|
/* We get EINVAL if new NS is same as the current
|
|
|
|
* NS, or if the fd namespace doesn't match the
|
|
|
|
* type passed to setns()'s second param. Since we
|
|
|
|
* pass 0, we know the EINVAL is harmless
|
|
|
|
*/
|
|
|
|
if (setns(ns_fd[i], 0) < 0 &&
|
|
|
|
errno != EINVAL) {
|
|
|
|
virReportSystemError(errno, _("failed to set namespace '%s'"),
|
|
|
|
virLXCDomainNamespaceTypeToString(i));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FORCE_CLOSE(ns_fd[i]);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2009-11-05 12:35:13 +00:00
|
|
|
* lxcContainerChild:
|
|
|
|
* @data: pointer to container arguments
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This function is run in the process clone()'d in lxcStartContainer.
|
|
|
|
* Perform a number of container setup tasks:
|
|
|
|
* Setup container file system
|
|
|
|
* mount container /proca
|
|
|
|
* Then exec's the container init
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2012-10-17 09:23:12 +00:00
|
|
|
static int lxcContainerChild(void *data)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2008-08-13 10:14:47 +00:00
|
|
|
lxc_child_argv_t *argv = data;
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr vmDef = argv->config;
|
2011-06-02 15:01:36 +00:00
|
|
|
int ttyfd = -1;
|
2011-05-06 14:50:00 +00:00
|
|
|
int ret = -1;
|
2011-06-02 15:01:36 +00:00
|
|
|
char *ttyPath = NULL;
|
2009-04-22 14:26:50 +00:00
|
|
|
virDomainFSDefPtr root;
|
2011-05-06 14:50:00 +00:00
|
|
|
virCommandPtr cmd = NULL;
|
2012-07-20 21:16:19 +00:00
|
|
|
int hasReboot;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (NULL == vmDef) {
|
2012-07-13 12:59:51 +00:00
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
"%s", _("lxcChild() passed invalid vm definition"));
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2015-08-20 13:46:17 +00:00
|
|
|
if (lxcAttachNS(argv->nsInheritFDs) < 0) {
|
|
|
|
virReportError(VIR_ERR_SYSTEM_ERROR, "%s",
|
|
|
|
_("failed to attach the namespace"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-06-07 07:12:21 +00:00
|
|
|
/* Wait for controller to finish setup tasks, including
|
|
|
|
* things like move of network interfaces, uid/gid mapping
|
|
|
|
*/
|
|
|
|
if (lxcContainerWaitForContinue(argv->monitor) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to read the container continue message"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
VIR_DEBUG("Received container continue message");
|
|
|
|
|
2012-07-20 21:16:19 +00:00
|
|
|
if ((hasReboot = lxcContainerHasReboot()) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2013-12-13 16:50:28 +00:00
|
|
|
cmd = lxcContainerBuildInitCmd(vmDef,
|
|
|
|
argv->ttyPaths,
|
|
|
|
argv->nttyPaths);
|
2011-05-06 14:50:00 +00:00
|
|
|
virCommandWriteArgLog(cmd, 1);
|
|
|
|
|
2013-06-07 07:12:21 +00:00
|
|
|
if (lxcContainerSetID(vmDef) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2013-11-12 11:57:56 +00:00
|
|
|
root = virDomainGetFilesystemForTarget(vmDef, "/");
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2011-10-20 08:44:31 +00:00
|
|
|
if (argv->nttyPaths) {
|
2013-05-20 10:12:17 +00:00
|
|
|
const char *tty = argv->ttyPaths[0];
|
|
|
|
if (STRPREFIX(tty, "/dev/pts/"))
|
|
|
|
tty += strlen("/dev/pts/");
|
|
|
|
if (virAsprintf(&ttyPath, "%s/%s.devpts/%s",
|
2013-07-04 10:11:37 +00:00
|
|
|
LXC_STATE_DIR, vmDef->name, tty) < 0)
|
2013-05-20 10:12:17 +00:00
|
|
|
goto cleanup;
|
2014-10-31 08:51:23 +00:00
|
|
|
} else {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("At least one tty is required"));
|
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
2011-10-20 08:44:31 +00:00
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
VIR_DEBUG("Container TTY path: %s", ttyPath);
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
|
2008-08-28 22:40:50 +00:00
|
|
|
if (ttyfd < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to open tty %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
ttyPath);
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2014-11-21 16:45:55 +00:00
|
|
|
if (lxcContainerResolveAllSymlinks(vmDef) < 0)
|
2013-04-03 15:19:24 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2013-07-09 13:24:10 +00:00
|
|
|
VIR_DEBUG("Setting up pivot");
|
2013-04-03 15:19:24 +00:00
|
|
|
if (lxcContainerSetupPivotRoot(vmDef, root,
|
|
|
|
argv->ttyPaths, argv->nttyPaths,
|
|
|
|
argv->securityDriver) < 0)
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2011-06-02 19:25:21 +00:00
|
|
|
if (!virFileExists(vmDef->os.init)) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("cannot find init path '%s' relative to container root"),
|
|
|
|
vmDef->os.init);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
/* rename and enable interfaces */
|
2014-06-27 08:41:22 +00:00
|
|
|
if (lxcContainerRenameAndEnableInterfaces(vmDef,
|
2012-01-18 11:38:49 +00:00
|
|
|
argv->nveths,
|
2011-06-02 15:01:36 +00:00
|
|
|
argv->veths) < 0) {
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2011-06-02 15:01:36 +00:00
|
|
|
}
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2009-05-11 14:05:27 +00:00
|
|
|
/* drop a set of root capabilities */
|
2014-07-18 08:02:29 +00:00
|
|
|
if (lxcContainerDropCapabilities(vmDef, !!hasReboot) < 0)
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2011-06-02 15:52:32 +00:00
|
|
|
if (lxcContainerSendContinue(argv->handshakefd) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
2013-09-06 11:14:00 +00:00
|
|
|
_("Failed to send continue signal to controller"));
|
2011-06-02 15:52:32 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2012-01-25 14:12:53 +00:00
|
|
|
VIR_DEBUG("Setting up security labeling");
|
|
|
|
if (virSecurityManagerSetProcessLabel(argv->securityDriver, vmDef) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2015-01-16 17:20:48 +00:00
|
|
|
VIR_DEBUG("Setting up inherited FDs");
|
2013-07-09 17:15:45 +00:00
|
|
|
VIR_FORCE_CLOSE(argv->handshakefd);
|
|
|
|
VIR_FORCE_CLOSE(argv->monitor);
|
|
|
|
if (lxcContainerSetupFDs(&ttyfd,
|
|
|
|
argv->npassFDs, argv->passFDs) < 0)
|
2012-05-01 09:48:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2011-06-02 15:01:36 +00:00
|
|
|
ret = 0;
|
2014-03-25 06:49:26 +00:00
|
|
|
cleanup:
|
2011-06-02 15:01:36 +00:00
|
|
|
VIR_FREE(ttyPath);
|
|
|
|
VIR_FORCE_CLOSE(ttyfd);
|
2011-06-02 15:18:14 +00:00
|
|
|
VIR_FORCE_CLOSE(argv->monitor);
|
2011-06-02 15:52:32 +00:00
|
|
|
VIR_FORCE_CLOSE(argv->handshakefd);
|
2011-06-02 15:01:36 +00:00
|
|
|
|
|
|
|
if (ret == 0) {
|
2015-01-16 17:20:48 +00:00
|
|
|
VIR_DEBUG("Executing init binary");
|
2011-10-10 20:02:06 +00:00
|
|
|
/* this function will only return if an error occurred */
|
2011-06-02 15:01:36 +00:00
|
|
|
ret = virCommandExec(cmd);
|
|
|
|
}
|
2013-10-14 12:04:50 +00:00
|
|
|
|
|
|
|
if (ret != 0) {
|
2015-01-16 17:20:48 +00:00
|
|
|
VIR_DEBUG("Tearing down container");
|
2013-10-14 12:04:50 +00:00
|
|
|
virErrorPtr err = virGetLastError();
|
|
|
|
if (err && err->message)
|
|
|
|
fprintf(stderr, "%s\n", err->message);
|
|
|
|
else
|
|
|
|
fprintf(stderr, "%s\n",
|
|
|
|
_("Unknown failure in libvirt_lxc startup"));
|
|
|
|
}
|
2011-06-02 15:01:36 +00:00
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
virCommandFree(cmd);
|
|
|
|
return ret;
|
2008-08-13 10:25:34 +00:00
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
static int userns_supported(void)
|
|
|
|
{
|
|
|
|
return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
|
2013-06-07 07:12:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int userns_required(virDomainDefPtr def)
|
|
|
|
{
|
|
|
|
return def->idmap.uidmap && def->idmap.gidmap;
|
2009-04-20 12:27:12 +00:00
|
|
|
}
|
|
|
|
|
2012-12-10 22:28:09 +00:00
|
|
|
virArch lxcContainerGetAlt32bitArch(virArch arch)
|
2011-02-23 17:17:53 +00:00
|
|
|
{
|
|
|
|
/* Any Linux 64bit arch which has a 32bit
|
|
|
|
* personality available should be listed here */
|
2012-12-10 22:28:09 +00:00
|
|
|
if (arch == VIR_ARCH_X86_64)
|
|
|
|
return VIR_ARCH_I686;
|
|
|
|
if (arch == VIR_ARCH_S390X)
|
|
|
|
return VIR_ARCH_S390;
|
|
|
|
if (arch == VIR_ARCH_PPC64)
|
|
|
|
return VIR_ARCH_PPC;
|
|
|
|
if (arch == VIR_ARCH_PARISC64)
|
|
|
|
return VIR_ARCH_PARISC;
|
|
|
|
if (arch == VIR_ARCH_SPARC64)
|
|
|
|
return VIR_ARCH_SPARC;
|
|
|
|
if (arch == VIR_ARCH_MIPS64)
|
|
|
|
return VIR_ARCH_MIPS;
|
|
|
|
if (arch == VIR_ARCH_MIPS64EL)
|
|
|
|
return VIR_ARCH_MIPSEL;
|
|
|
|
|
|
|
|
return VIR_ARCH_NONE;
|
2011-02-23 17:17:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/**
|
|
|
|
* lxcContainerStart:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @def: pointer to virtual machine structure
|
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
|
|
|
* @control: control FD to the container
|
|
|
|
* @ttyPath: path of tty to set as the container console
|
2008-08-13 10:25:34 +00:00
|
|
|
*
|
|
|
|
* Starts a container process by calling clone() with the namespace flags
|
|
|
|
*
|
|
|
|
* Returns PID of container on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 12:50:55 +00:00
|
|
|
int lxcContainerStart(virDomainDefPtr def,
|
2012-01-25 14:12:53 +00:00
|
|
|
virSecurityManagerPtr securityDriver,
|
2012-07-03 11:06:38 +00:00
|
|
|
size_t nveths,
|
2008-08-13 10:52:15 +00:00
|
|
|
char **veths,
|
2013-07-09 17:15:45 +00:00
|
|
|
size_t npassFDs,
|
|
|
|
int *passFDs,
|
2008-08-13 10:25:34 +00:00
|
|
|
int control,
|
2011-06-02 15:52:32 +00:00
|
|
|
int handshakefd,
|
2015-08-20 13:46:17 +00:00
|
|
|
int *nsInheritFDs,
|
2013-07-09 17:15:45 +00:00
|
|
|
size_t nttyPaths,
|
|
|
|
char **ttyPaths)
|
2008-08-13 10:25:34 +00:00
|
|
|
{
|
|
|
|
pid_t pid;
|
2011-07-06 22:33:53 +00:00
|
|
|
int cflags;
|
2008-08-13 10:25:34 +00:00
|
|
|
int stacksize = getpagesize() * 4;
|
|
|
|
char *stack, *stacktop;
|
2013-07-09 17:15:45 +00:00
|
|
|
lxc_child_argv_t args = {
|
|
|
|
.config = def,
|
|
|
|
.securityDriver = securityDriver,
|
|
|
|
.nveths = nveths,
|
|
|
|
.veths = veths,
|
|
|
|
.npassFDs = npassFDs,
|
|
|
|
.passFDs = passFDs,
|
|
|
|
.monitor = control,
|
|
|
|
.nttyPaths = nttyPaths,
|
|
|
|
.ttyPaths = ttyPaths,
|
2015-08-20 13:46:17 +00:00
|
|
|
.handshakefd = handshakefd,
|
|
|
|
.nsInheritFDs = nsInheritFDs,
|
2013-07-09 17:15:45 +00:00
|
|
|
};
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
/* allocate a stack for the container */
|
2013-07-04 10:11:37 +00:00
|
|
|
if (VIR_ALLOC_N(stack, stacksize) < 0)
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2013-11-25 07:06:29 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
stacktop = stack + stacksize;
|
|
|
|
|
2015-08-20 13:46:17 +00:00
|
|
|
cflags = CLONE_NEWPID|CLONE_NEWNS|SIGCHLD;
|
2009-04-20 12:27:12 +00:00
|
|
|
|
2013-06-07 07:12:19 +00:00
|
|
|
if (userns_required(def)) {
|
|
|
|
if (userns_supported()) {
|
|
|
|
VIR_DEBUG("Enable user namespace");
|
|
|
|
cflags |= CLONE_NEWUSER;
|
|
|
|
} else {
|
2014-11-28 08:37:42 +00:00
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
|
|
|
_("Kernel doesn't support user namespace"));
|
2013-07-03 11:19:50 +00:00
|
|
|
VIR_FREE(stack);
|
2013-06-07 07:12:19 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2015-08-20 13:46:17 +00:00
|
|
|
if (!nsInheritFDs || nsInheritFDs[VIR_LXC_DOMAIN_NAMESPACE_SHARENET] == -1) {
|
|
|
|
if (lxcNeedNetworkNamespace(def)) {
|
|
|
|
VIR_DEBUG("Enable network namespaces");
|
|
|
|
cflags |= CLONE_NEWNET;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (lxcNeedNetworkNamespace(def)) {
|
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
|
|
|
_("Config askes for inherit net namespace "
|
|
|
|
"as well as private network interfaces"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_DEBUG("Inheriting a net namespace");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nsInheritFDs || nsInheritFDs[VIR_LXC_DOMAIN_NAMESPACE_SHAREIPC] == -1) {
|
|
|
|
cflags |= CLONE_NEWIPC;
|
|
|
|
} else {
|
|
|
|
VIR_DEBUG("Inheriting an IPC namespace");
|
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2015-08-20 13:46:17 +00:00
|
|
|
if (!nsInheritFDs || nsInheritFDs[VIR_LXC_DOMAIN_NAMESPACE_SHAREUTS] == -1) {
|
|
|
|
cflags |= CLONE_NEWUTS;
|
|
|
|
} else {
|
|
|
|
VIR_DEBUG("Inheriting a UTS namespace");
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2015-01-16 17:20:48 +00:00
|
|
|
VIR_DEBUG("Cloning container init process");
|
2011-07-06 22:33:53 +00:00
|
|
|
pid = clone(lxcContainerChild, stacktop, cflags, &args);
|
2008-08-13 10:25:34 +00:00
|
|
|
VIR_FREE(stack);
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("clone() completed, new container PID is %d", pid);
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
if (pid < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to run clone container"));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
|
2010-07-16 16:16:19 +00:00
|
|
|
ATTRIBUTE_NORETURN static int
|
|
|
|
lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
|
2008-08-13 10:25:34 +00:00
|
|
|
{
|
|
|
|
_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int lxcContainerAvailable(int features)
|
|
|
|
{
|
2009-04-20 12:27:12 +00:00
|
|
|
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
|
2008-08-13 10:25:34 +00:00
|
|
|
CLONE_NEWIPC|SIGCHLD;
|
|
|
|
int cpid;
|
|
|
|
char *childStack;
|
|
|
|
char *stack;
|
2013-11-25 07:06:29 +00:00
|
|
|
int stacksize = getpagesize() * 4;
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_USER)
|
|
|
|
flags |= CLONE_NEWUSER;
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_NET)
|
|
|
|
flags |= CLONE_NEWNET;
|
|
|
|
|
2013-11-25 07:06:29 +00:00
|
|
|
if (VIR_ALLOC_N(stack, stacksize) < 0)
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
|
2013-11-25 07:06:29 +00:00
|
|
|
childStack = stack + stacksize;
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
|
|
|
|
VIR_FREE(stack);
|
|
|
|
if (cpid < 0) {
|
2011-09-26 16:51:47 +00:00
|
|
|
char ebuf[1024] ATTRIBUTE_UNUSED;
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("clone call returned %s, container support is not enabled",
|
2012-03-29 09:52:04 +00:00
|
|
|
virStrerror(errno, ebuf, sizeof(ebuf)));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2014-02-20 03:23:44 +00:00
|
|
|
} else if (virProcessWait(cpid, NULL, false) < 0) {
|
2011-10-21 17:09:23 +00:00
|
|
|
return -1;
|
2008-08-13 10:25:34 +00:00
|
|
|
}
|
|
|
|
|
2012-06-15 07:41:05 +00:00
|
|
|
VIR_DEBUG("container support is enabled");
|
2008-08-13 10:25:34 +00:00
|
|
|
return 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
2013-07-16 02:00:02 +00:00
|
|
|
|
|
|
|
int lxcContainerChown(virDomainDefPtr def, const char *path)
|
|
|
|
{
|
|
|
|
uid_t uid;
|
|
|
|
gid_t gid;
|
|
|
|
|
|
|
|
if (!def->idmap.uidmap)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
uid = def->idmap.uidmap[0].target;
|
|
|
|
gid = def->idmap.gidmap[0].target;
|
|
|
|
|
|
|
|
if (chown(path, uid, gid) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to change owner of %s to %u:%u"),
|
|
|
|
path, uid, gid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|