2008-04-10 07:30:52 +00:00
|
|
|
/*
|
2010-03-12 17:47:26 +00:00
|
|
|
* Copyright (C) 2008-2010 Red Hat, Inc.
|
|
|
|
* Copyright (C) 2008 IBM Corp.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* lxc_container.c: file description
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
|
2008-08-28 22:40:50 +00:00
|
|
|
* Daniel P. Berrange <berrange@redhat.com>
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdlib.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <stdio.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/mount.h>
|
2008-08-13 10:25:34 +00:00
|
|
|
#include <sys/wait.h>
|
2010-01-22 13:21:16 +00:00
|
|
|
#include <sys/stat.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <unistd.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <mntent.h>
|
|
|
|
|
|
|
|
/* Yes, we want linux private one, for _syscall2() macro */
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
|
|
|
|
/* For MS_MOVE */
|
|
|
|
#include <linux/fs.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
#if HAVE_CAPNG
|
2010-03-09 18:22:22 +00:00
|
|
|
# include <cap-ng.h>
|
2009-06-29 17:09:42 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2008-11-04 22:30:33 +00:00
|
|
|
#include "virterror_internal.h"
|
2008-11-06 16:36:07 +00:00
|
|
|
#include "logging.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
#include "lxc_container.h"
|
|
|
|
#include "util.h"
|
2008-06-06 11:09:57 +00:00
|
|
|
#include "memory.h"
|
2008-06-26 16:09:48 +00:00
|
|
|
#include "veth.h"
|
2010-11-09 20:48:48 +00:00
|
|
|
#include "files.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-01-20 17:13:33 +00:00
|
|
|
#define VIR_FROM_THIS VIR_FROM_LXC
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/*
|
|
|
|
* GLibc headers are behind the kernel, so we define these
|
|
|
|
* constants if they're not present already.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CLONE_NEWPID
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWPID 0x20000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUTS
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUTS 0x04000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUSER
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUSER 0x10000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWIPC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWIPC 0x08000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWNET
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWNET 0x40000000 /* New network namespace */
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* messages between parent and container */
|
|
|
|
typedef char lxc_message_t;
|
|
|
|
#define LXC_CONTINUE_MSG 'c'
|
|
|
|
|
|
|
|
typedef struct __lxc_child_argv lxc_child_argv_t;
|
|
|
|
struct __lxc_child_argv {
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr config;
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int nveths;
|
|
|
|
char **veths;
|
2008-08-13 10:25:34 +00:00
|
|
|
int monitor;
|
|
|
|
char *ttyPath;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerExecInit:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @vmDef: pointer to vm definition structure
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2008-08-13 10:25:34 +00:00
|
|
|
* Exec the container init string. The container init will replace then
|
2008-04-10 07:30:52 +00:00
|
|
|
* be running in the current process
|
|
|
|
*
|
2008-08-13 10:25:34 +00:00
|
|
|
* Does not return
|
2008-04-10 07:30:52 +00:00
|
|
|
*/
|
2008-08-13 12:50:55 +00:00
|
|
|
static int lxcContainerExecInit(virDomainDefPtr vmDef)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2008-08-13 10:25:34 +00:00
|
|
|
const char *const argv[] = {
|
2008-08-13 12:50:55 +00:00
|
|
|
vmDef->os.init,
|
2008-08-13 10:25:34 +00:00
|
|
|
NULL,
|
|
|
|
};
|
2010-03-04 11:23:28 +00:00
|
|
|
const char *const envp[] = {
|
|
|
|
"PATH=/bin:/sbin",
|
|
|
|
"TERM=linux",
|
|
|
|
NULL,
|
|
|
|
};
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2010-03-04 11:23:28 +00:00
|
|
|
return execve(argv[0], (char **)argv,(char**)envp);
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerSetStdio:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD from parent
|
|
|
|
* @ttyfd: FD of tty to set as the container console
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Sets the given tty as the primary conosole for the container as well as
|
|
|
|
* stdout, stdin and stderr.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-28 22:40:50 +00:00
|
|
|
static int lxcContainerSetStdio(int control, int ttyfd)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
2008-08-13 10:14:47 +00:00
|
|
|
int open_max, i;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (setsid() < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("setsid failed"));
|
2008-08-28 22:40:50 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("ioctl(TIOCSTTY) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2008-08-13 10:14:47 +00:00
|
|
|
/* Just in case someone forget to set FD_CLOEXEC, explicitly
|
|
|
|
* close all FDs before executing the container */
|
|
|
|
open_max = sysconf (_SC_OPEN_MAX);
|
|
|
|
for (i = 0; i < open_max; i++)
|
2010-11-09 20:48:48 +00:00
|
|
|
if (i != ttyfd && i != control) {
|
|
|
|
int tmpfd = i;
|
|
|
|
VIR_FORCE_CLOSE(tmpfd);
|
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (dup2(ttyfd, 0) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stdin) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dup2(ttyfd, 1) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stdout) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dup2(ttyfd, 2) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stderr) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerSendContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD to child
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2008-08-13 10:25:34 +00:00
|
|
|
* Sends the continue message via the socket pair stored in the vm
|
|
|
|
* structure.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:52:15 +00:00
|
|
|
int lxcContainerSendContinue(int control)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
2008-08-13 10:25:34 +00:00
|
|
|
lxc_message_t msg = LXC_CONTINUE_MSG;
|
|
|
|
int writeCount = 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
writeCount = safewrite(control, &msg, sizeof(msg));
|
|
|
|
if (writeCount != sizeof(msg)) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Unable to send container continue message"));
|
2008-08-13 10:25:34 +00:00
|
|
|
goto error_out;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
rc = 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
error_out:
|
|
|
|
return rc;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerWaitForContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: Control FD from parent
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* This function will wait for the container continue message from the
|
|
|
|
* parent process. It will send this message on the socket pair stored in
|
|
|
|
* the vm structure once it has completed the post clone container setup.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:25:34 +00:00
|
|
|
static int lxcContainerWaitForContinue(int control)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
lxc_message_t msg;
|
|
|
|
int readLen;
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
readLen = saferead(control, &msg, sizeof(msg));
|
2008-08-13 10:14:47 +00:00
|
|
|
if (readLen != sizeof(msg) ||
|
|
|
|
msg != LXC_CONTINUE_MSG) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("Failed to read the container continue message"));
|
2008-08-13 10:14:47 +00:00
|
|
|
return -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
2010-11-09 20:48:48 +00:00
|
|
|
VIR_FORCE_CLOSE(control);
|
2008-06-26 16:09:48 +00:00
|
|
|
|
|
|
|
DEBUG0("Received container continue message");
|
|
|
|
|
2008-08-13 10:14:47 +00:00
|
|
|
return 0;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2009-11-05 13:11:30 +00:00
|
|
|
* lxcContainerRenameAndEnableInterfaces:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
2009-11-05 13:11:30 +00:00
|
|
|
* This function will rename the interfaces to ethN
|
|
|
|
* with id ascending order from zero and enable the
|
|
|
|
* renamed interfaces for this container.
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or nonzero in case of error
|
|
|
|
*/
|
2009-11-05 13:11:30 +00:00
|
|
|
static int lxcContainerRenameAndEnableInterfaces(unsigned int nveths,
|
|
|
|
char **veths)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int i;
|
2009-11-05 13:11:30 +00:00
|
|
|
char *newname = NULL;
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2008-08-13 10:52:15 +00:00
|
|
|
for (i = 0 ; i < nveths ; i++) {
|
2010-07-23 17:25:56 +00:00
|
|
|
if (virAsprintf(&newname, "eth%d", i) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
rc = -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
}
|
2009-11-05 13:11:30 +00:00
|
|
|
|
|
|
|
DEBUG("Renaming %s to %s", veths[i], newname);
|
|
|
|
rc = setInterfaceName(veths[i], newname);
|
2010-07-23 17:25:56 +00:00
|
|
|
if (rc < 0)
|
2009-11-05 13:11:30 +00:00
|
|
|
goto error_out;
|
|
|
|
|
|
|
|
DEBUG("Enabling %s", newname);
|
2010-06-26 20:59:59 +00:00
|
|
|
rc = vethInterfaceUpOrDown(newname, 1);
|
2010-07-23 17:25:56 +00:00
|
|
|
if (rc < 0)
|
2009-11-05 13:11:30 +00:00
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* enable lo device only if there were other net devices */
|
2008-08-13 10:52:15 +00:00
|
|
|
if (veths)
|
2008-06-26 16:09:48 +00:00
|
|
|
rc = vethInterfaceUpOrDown("lo", 1);
|
|
|
|
|
|
|
|
error_out:
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)
|
|
|
|
extern int pivot_root(const char * new_root,const char * put_old);
|
|
|
|
|
|
|
|
static int lxcContainerChildMountSort(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const char **sa = (const char**)a;
|
|
|
|
const char **sb = (const char**)b;
|
|
|
|
|
|
|
|
/* Delibrately reversed args - we need to unmount deepest
|
|
|
|
children first */
|
|
|
|
return strcmp(*sb, *sa);
|
|
|
|
}
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
#ifndef MS_REC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_REC 16384
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MNT_DETACH
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MNT_DETACH 0x00000002
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_PRIVATE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_PRIVATE (1<<18)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_SLAVE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_SLAVE (1<<19)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
static int lxcContainerPivotRoot(virDomainFSDefPtr root)
|
|
|
|
{
|
2009-04-16 13:08:03 +00:00
|
|
|
int rc, ret;
|
2009-04-14 17:51:12 +00:00
|
|
|
char *oldroot = NULL, *newroot = NULL;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = -1;
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* root->parent must be private, so make / private. */
|
|
|
|
if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make root private"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2008-12-23 13:03:29 +00:00
|
|
|
if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2010-01-20 23:52:13 +00:00
|
|
|
if ((rc = virFileMakePath(oldroot)) != 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(rc,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
oldroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a tmpfs root since old and new roots must be
|
|
|
|
* on separate filesystems */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount empty tmpfs at %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
oldroot);
|
|
|
|
goto err;
|
|
|
|
}
|
2009-04-16 13:08:03 +00:00
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* Create a directory called 'new' in tmpfs */
|
|
|
|
if (virAsprintf(&newroot, "%s/new", oldroot) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2010-01-20 23:52:13 +00:00
|
|
|
if ((rc = virFileMakePath(newroot)) != 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(rc,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
newroot);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ... and mount our root onto it */
|
|
|
|
if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to bind new root %s into tmpfs"),
|
2009-04-14 17:51:12 +00:00
|
|
|
root->src);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we chroot into the tmpfs, then pivot into the
|
|
|
|
* root->src bind-mounted onto '/new' */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (chdir(newroot) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to chroot into %s"), newroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The old root directory will live at /.oldroot after
|
|
|
|
* this and will soon be unmounted completely */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (pivot_root(".", ".oldroot") < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to pivot root"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* CWD is undefined after pivot_root, so go to / */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (chdir("/") < 0)
|
|
|
|
goto err;
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
err:
|
2009-04-14 17:51:12 +00:00
|
|
|
VIR_FREE(oldroot);
|
|
|
|
VIR_FREE(newroot);
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
|
|
|
const struct {
|
2009-04-22 14:26:50 +00:00
|
|
|
const char *src;
|
|
|
|
const char *dst;
|
|
|
|
const char *type;
|
|
|
|
} mnts[] = {
|
|
|
|
{ "/dev", "/dev", "tmpfs" },
|
|
|
|
{ "/proc", "/proc", "proc" },
|
|
|
|
{ "/sys", "/sys", "sysfs" },
|
|
|
|
#if WITH_SELINUX
|
|
|
|
{ "none", "/selinux", "selinuxfs" },
|
|
|
|
#endif
|
2008-08-28 22:40:50 +00:00
|
|
|
};
|
2009-09-04 14:12:35 +00:00
|
|
|
int i, rc = -1;
|
2009-04-22 14:26:50 +00:00
|
|
|
char *devpts;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-09-04 14:12:35 +00:00
|
|
|
return rc;
|
2009-01-20 17:13:33 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
|
2010-01-20 23:52:13 +00:00
|
|
|
if (virFileMakePath(mnts[i].dst) != 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mkdir %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
mnts[i].src);
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount %s on %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
mnts[i].type, mnts[i].type);
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2010-01-20 23:52:13 +00:00
|
|
|
if ((rc = virFileMakePath("/dev/pts") != 0)) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(rc, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Cannot create /dev/pts"));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-01-20 17:13:33 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
|
|
|
|
if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount /dev/pts in container"));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2009-09-04 14:12:35 +00:00
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
cleanup:
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_FREE(devpts);
|
|
|
|
|
2009-09-04 14:12:35 +00:00
|
|
|
return rc;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int lxcContainerPopulateDevices(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
const struct {
|
|
|
|
int maj;
|
|
|
|
int min;
|
|
|
|
mode_t mode;
|
|
|
|
const char *path;
|
|
|
|
} devs[] = {
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/dev/random" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
|
|
|
|
};
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Populate /dev/ with a few important bits */
|
|
|
|
for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
|
|
|
|
dev_t dev = makedev(devs[i].maj, devs[i].min);
|
2009-05-08 10:22:46 +00:00
|
|
|
if (mknod(devs[i].path, S_IFCHR, dev) < 0 ||
|
2008-08-28 22:40:50 +00:00
|
|
|
chmod(devs[i].path, devs[i].mode)) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make device %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
devs[i].path);
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
if (access("/dev/pts/ptmx", W_OK) == 0) {
|
|
|
|
if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create symlink /dev/ptmx to /dev/pts/ptmx"));
|
2009-04-22 14:26:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
|
2009-05-08 10:22:46 +00:00
|
|
|
if (mknod("/dev/ptmx", S_IFCHR, dev) < 0 ||
|
2009-04-22 14:26:50 +00:00
|
|
|
chmod("/dev/ptmx", 0666)) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make device /dev/ptmx"));
|
2009-04-22 14:26:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-04 11:23:28 +00:00
|
|
|
/* XXX we should allow multiple consoles per container
|
|
|
|
* for tty2, tty3, etc, but the domain XML does not
|
|
|
|
* handle this yet
|
|
|
|
*/
|
|
|
|
if (symlink("/dev/pts/0", "/dev/tty1") < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to symlink /dev/pts/0 to /dev/tty1"));
|
|
|
|
return -1;
|
|
|
|
}
|
2010-11-05 13:27:34 +00:00
|
|
|
if (symlink("/dev/pts/0", "/dev/console") < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to symlink /dev/pts/0 to /dev/console"));
|
|
|
|
return -1;
|
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
|
|
|
|
{
|
2008-10-10 16:08:01 +00:00
|
|
|
int i;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Pull in rest of container's mounts */
|
2008-10-10 16:08:01 +00:00
|
|
|
for (i = 0 ; i < vmDef->nfss ; i++) {
|
2008-08-28 22:40:50 +00:00
|
|
|
char *src;
|
2008-10-10 16:08:01 +00:00
|
|
|
if (STREQ(vmDef->fss[i]->dst, "/"))
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
// XXX fix
|
2008-10-10 16:08:01 +00:00
|
|
|
if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
2008-12-23 13:03:29 +00:00
|
|
|
if (virAsprintf(&src, "/.oldroot/%s", vmDef->fss[i]->src) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2010-01-20 23:52:13 +00:00
|
|
|
if (virFileMakePath(vmDef->fss[i]->dst) != 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
vmDef->fss[i]->dst);
|
2008-08-28 22:40:50 +00:00
|
|
|
VIR_FREE(src);
|
2009-01-20 17:13:33 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (mount(src, vmDef->fss[i]->dst, NULL, MS_BIND, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount %s at %s"),
|
|
|
|
src, vmDef->fss[i]->dst);
|
|
|
|
VIR_FREE(src);
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FREE(src);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerUnmountOldFS(void)
|
|
|
|
{
|
2009-01-22 19:41:48 +00:00
|
|
|
struct mntent mntent;
|
2008-08-28 22:40:50 +00:00
|
|
|
char **mounts = NULL;
|
|
|
|
int nmounts = 0;
|
|
|
|
FILE *procmnt;
|
|
|
|
int i;
|
2009-01-22 19:41:48 +00:00
|
|
|
char mntbuf[1024];
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
if (!(procmnt = setmntent("/proc/mounts", "r"))) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to read /proc/mounts"));
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2009-01-22 19:41:48 +00:00
|
|
|
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_DEBUG("Got %s", mntent.mnt_dir);
|
2009-01-22 19:41:48 +00:00
|
|
|
if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
|
|
|
|
endmntent(procmnt);
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2009-01-22 19:41:48 +00:00
|
|
|
if (!(mounts[nmounts++] = strdup(mntent.mnt_dir))) {
|
2008-08-28 22:40:50 +00:00
|
|
|
endmntent(procmnt);
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
endmntent(procmnt);
|
|
|
|
|
2009-09-02 07:58:50 +00:00
|
|
|
if (mounts)
|
|
|
|
qsort(mounts, nmounts, sizeof(mounts[0]),
|
|
|
|
lxcContainerChildMountSort);
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
for (i = 0 ; i < nmounts ; i++) {
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_DEBUG("Umount %s", mounts[i]);
|
2008-08-28 22:40:50 +00:00
|
|
|
if (umount(mounts[i]) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to unmount '%s'"),
|
2009-01-20 17:13:33 +00:00
|
|
|
mounts[i]);
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FREE(mounts[i]);
|
|
|
|
}
|
|
|
|
VIR_FREE(mounts);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Got a FS mapped to /, we're going the pivot_root
|
|
|
|
* approach to do a better-chroot-than-chroot
|
|
|
|
* this is based on this thread http://lkml.org/lkml/2008/3/5/29
|
|
|
|
*/
|
|
|
|
static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
|
|
|
|
virDomainFSDefPtr root)
|
|
|
|
{
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Gives us a private root, leaving all parent OS mounts on /.oldroot */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerPivotRoot(root) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
|
|
|
|
if (lxcContainerMountBasicFS(root) < 0)
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Populates device nodes in /dev/ */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerPopulateDevices() < 0)
|
|
|
|
return -1;
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Sets up any non-root mounts from guest config */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerMountNewFS(vmDef) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerUnmountOldFS() < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Nothing mapped to /, we're using the main root,
|
|
|
|
but with extra stuff mapped in */
|
|
|
|
static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef)
|
|
|
|
{
|
2008-10-10 16:08:01 +00:00
|
|
|
int i;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make / slave"));
|
2009-04-14 17:51:12 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2008-10-10 16:08:01 +00:00
|
|
|
for (i = 0 ; i < vmDef->nfss ; i++) {
|
2008-08-28 22:40:50 +00:00
|
|
|
// XXX fix to support other mount types
|
2008-10-10 16:08:01 +00:00
|
|
|
if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
2008-10-10 16:08:01 +00:00
|
|
|
if (mount(vmDef->fss[i]->src,
|
|
|
|
vmDef->fss[i]->dst,
|
2008-08-28 22:40:50 +00:00
|
|
|
NULL,
|
|
|
|
MS_BIND,
|
|
|
|
NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount %s at %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
vmDef->fss[i]->src,
|
|
|
|
vmDef->fss[i]->dst);
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mount /proc */
|
|
|
|
if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount /proc"));
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
|
|
|
|
virDomainFSDefPtr root)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
|
|
|
if (root)
|
|
|
|
return lxcContainerSetupPivotRoot(vmDef, root);
|
|
|
|
else
|
|
|
|
return lxcContainerSetupExtraMounts(vmDef);
|
|
|
|
}
|
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is running as the 'init' process insid the container.
|
|
|
|
* It removes some capabilities that could be dangerous to
|
|
|
|
* host system, since they are not currently "containerized"
|
|
|
|
*/
|
|
|
|
static int lxcContainerDropCapabilities(void)
|
2009-05-11 14:05:27 +00:00
|
|
|
{
|
2009-06-29 17:09:42 +00:00
|
|
|
#if HAVE_CAPNG
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
capng_get_caps_process();
|
|
|
|
|
|
|
|
if ((ret = capng_updatev(CAPNG_DROP,
|
|
|
|
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
|
|
|
|
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
|
|
|
|
CAP_SYS_BOOT, /* No use of reboot */
|
|
|
|
CAP_SYS_MODULE, /* No kernel module loading */
|
|
|
|
CAP_SYS_TIME, /* No changing the clock */
|
|
|
|
CAP_AUDIT_CONTROL, /* No messing with auditing status */
|
|
|
|
CAP_MAC_ADMIN, /* No messing with LSM config */
|
|
|
|
-1 /* sentinal */)) < 0) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to remove capabilities: %d"), ret);
|
2009-06-29 17:09:42 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to apply capabilities: %d"), ret);
|
2009-06-29 17:09:42 +00:00
|
|
|
return -1;
|
2009-05-11 14:05:27 +00:00
|
|
|
}
|
2009-06-29 17:09:42 +00:00
|
|
|
|
2009-11-12 11:03:23 +00:00
|
|
|
/* We do not need to call capng_lock() in this case. The bounding
|
|
|
|
* set restriction will prevent them reacquiring sys_boot/module/time,
|
|
|
|
* etc which is all that matters for the container. Once inside the
|
|
|
|
* container it is fine for SECURE_NOROOT / SECURE_NO_SETUID_FIXUP to
|
|
|
|
* be unmasked - they can never escape the bounding set. */
|
2009-06-29 17:09:42 +00:00
|
|
|
|
|
|
|
#else
|
2010-05-19 10:00:18 +00:00
|
|
|
VIR_WARN0("libcap-ng support not compiled in, unable to clear capabilities");
|
2009-05-29 14:27:04 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2009-11-05 12:35:13 +00:00
|
|
|
* lxcContainerChild:
|
|
|
|
* @data: pointer to container arguments
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This function is run in the process clone()'d in lxcStartContainer.
|
|
|
|
* Perform a number of container setup tasks:
|
|
|
|
* Setup container file system
|
|
|
|
* mount container /proca
|
|
|
|
* Then exec's the container init
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:25:34 +00:00
|
|
|
static int lxcContainerChild( void *data )
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2008-08-13 10:14:47 +00:00
|
|
|
lxc_child_argv_t *argv = data;
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr vmDef = argv->config;
|
2008-08-28 22:40:50 +00:00
|
|
|
int ttyfd;
|
2009-04-22 14:26:50 +00:00
|
|
|
char *ttyPath;
|
|
|
|
virDomainFSDefPtr root;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (NULL == vmDef) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
avoid many format string warnings
Building with --disable-nls exposed many new warnings like these:
virsh.c:4952: warning: format not a string literal and no format ...
util.c:163: warning: format not a string literal and no format arguments
All but one of the following changes add a "%s" argument before
the offending _(...) argument.
This was the only manual change:
* src/lxc_driver.c (lxcVersion): Use %s and strerror(errno)
rather than %m, to avoid a warning from gcc -Wformat-security.
Add "%s" before each warned about format-string-with-no-%-directive:
* src/domain_conf.c (virDomainHostdevSubsysUsbDefParseXML)
(virDomainDefParseString, virDomainDefParseFile):
* src/hash.c (virGetConnect, __virGetDomain, virReleaseDomain)
(__virGetNetwork, virReleaseNetwork, __virGetStoragePool)
(virReleaseStoragePool, __virGetStorageVol, virReleaseStorageVol):
* src/lxc_container.c (lxcContainerChild):
* src/lxc_driver.c (lxcDomainDefine, lxcDomainUndefine)
(lxcDomainGetInfo, lxcGetOSType, lxcDomainDumpXML)
(lxcSetupInterfaces, lxcDomainStart, lxcDomainCreateAndStart)
(lxcVersion, lxcGetSchedulerParameters):
* src/network_conf.c (virNetworkDefParseString)
(virNetworkDefParseFile):
* src/openvz_conf.c (openvzReadNetworkConf, openvzLoadDomains):
* src/openvz_driver.c (openvzDomainDefineCmd)
(openvzDomainGetInfo, openvzDomainDumpXML, openvzDomainShutdown)
(openvzDomainReboot, ADD_ARG_LIT, openvzDomainDefineXML)
(openvzDomainCreateXML, openvzDomainCreate, openvzDomainUndefine)
(openvzDomainSetAutostart, openvzDomainGetAutostart)
(openvzDomainSetVcpus):
* src/qemu_driver.c (qemudDomainBlockPeek, qemudDomainMemoryPeek):
* src/remote_internal.c (remoteDomainBlockPeek)
(remoteDomainMemoryPeek, remoteAuthPolkit):
* src/sexpr.c (sexpr_new, _string2sexpr):
* src/storage_backend_disk.c (virStorageBackendDiskMakeDataVol)
(virStorageBackendDiskCreateVol):
* src/storage_backend_fs.c
(virStorageBackendFileSystemNetFindPoolSources):
* src/storage_backend_logical.c (virStorageBackendLogicalFindLVs)
(virStorageBackendLogicalFindPoolSources):
* src/test.c (testOpenDefault, testOpenFromFile, testOpen)
(testGetDomainInfo, testDomainRestore)
(testNodeGetCellsFreeMemory):
* src/util.c (virExec):
* src/virsh.c (cmdAttachDevice, cmdDetachDevice)
(cmdAttachInterface, cmdDetachInterface, cmdAttachDisk)
(cmdDetachDisk, cmdEdit):
* src/xend_internal.c (do_connect, wr_sync, xend_op_ext)
(urlencode, xenDaemonDomainCreateXML)
(xenDaemonDomainLookupByName_ids, xenDaemonDomainLookupByID)
(xenDaemonParseSxprOS, xend_parse_sexp_desc_char)
(xenDaemonParseSxprChar, xenDaemonParseSxprDisks)
(xenDaemonParseSxpr, sexpr_to_xend_topology, sexpr_to_domain)
(xenDaemonDomainFetch, xenDaemonDomainGetAutostart)
(xenDaemonDomainSetAutostart, xenDaemonDomainMigratePerform)
(xenDaemonDomainDefineXML, xenDaemonGetSchedulerType)
(xenDaemonGetSchedulerParameters)
(xenDaemonSetSchedulerParameters, xenDaemonDomainBlockPeek)
(xenDaemonFormatSxprChr, virDomainXMLDevID):
* src/xm_internal.c (xenXMConfigCacheRefresh, xenXMDomainPinVcpu)
(xenXMDomainCreate, xenXMDomainDefineXML)
(xenXMDomainAttachDevice, xenXMDomainDetachDevice):
* src/xml.c (virXPathString, virXPathNumber, virXPathLong)
(virXPathULong, virXPathBoolean, virXPathNode, virXPathNodeSet):
* src/xs_internal.c (xenStoreOpen):
2008-10-13 16:46:28 +00:00
|
|
|
"%s", _("lxcChild() passed invalid vm definition"));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
root = virDomainGetRootFilesystem(vmDef);
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
if (root) {
|
|
|
|
if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-22 14:26:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!(ttyPath = strdup(argv->ttyPath))) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-22 14:26:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
|
2008-08-28 22:40:50 +00:00
|
|
|
if (ttyfd < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to open tty %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
ttyPath);
|
2009-11-10 11:56:11 +00:00
|
|
|
VIR_FREE(ttyPath);
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_FREE(ttyPath);
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
|
2010-11-09 20:48:48 +00:00
|
|
|
VIR_FORCE_CLOSE(ttyfd);
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2010-11-09 20:48:48 +00:00
|
|
|
VIR_FORCE_CLOSE(ttyfd);
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
if (lxcContainerSetupMounts(vmDef, root) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/* Wait for interface devices to show up */
|
2008-08-13 10:25:34 +00:00
|
|
|
if (lxcContainerWaitForContinue(argv->monitor) < 0)
|
|
|
|
return -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
/* rename and enable interfaces */
|
|
|
|
if (lxcContainerRenameAndEnableInterfaces(argv->nveths,
|
|
|
|
argv->veths) < 0)
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2009-05-11 14:05:27 +00:00
|
|
|
/* drop a set of root capabilities */
|
2009-06-29 17:09:42 +00:00
|
|
|
if (lxcContainerDropCapabilities() < 0)
|
2009-05-11 14:05:27 +00:00
|
|
|
return -1;
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/* this function will only return if an error occured */
|
2008-08-13 10:25:34 +00:00
|
|
|
return lxcContainerExecInit(vmDef);
|
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
static int userns_supported(void)
|
|
|
|
{
|
|
|
|
return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
|
|
|
|
}
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/**
|
|
|
|
* lxcContainerStart:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @def: pointer to virtual machine structure
|
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
|
|
|
* @control: control FD to the container
|
|
|
|
* @ttyPath: path of tty to set as the container console
|
2008-08-13 10:25:34 +00:00
|
|
|
*
|
|
|
|
* Starts a container process by calling clone() with the namespace flags
|
|
|
|
*
|
|
|
|
* Returns PID of container on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 12:50:55 +00:00
|
|
|
int lxcContainerStart(virDomainDefPtr def,
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int nveths,
|
|
|
|
char **veths,
|
2008-08-13 10:25:34 +00:00
|
|
|
int control,
|
|
|
|
char *ttyPath)
|
|
|
|
{
|
|
|
|
pid_t pid;
|
|
|
|
int flags;
|
|
|
|
int stacksize = getpagesize() * 4;
|
|
|
|
char *stack, *stacktop;
|
2008-08-13 10:52:15 +00:00
|
|
|
lxc_child_argv_t args = { def, nveths, veths, control, ttyPath };
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
/* allocate a stack for the container */
|
|
|
|
if (VIR_ALLOC_N(stack, stacksize) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
stacktop = stack + stacksize;
|
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD;
|
|
|
|
|
2010-03-04 11:23:28 +00:00
|
|
|
if (userns_supported()) {
|
|
|
|
DEBUG0("Enable user namespaces");
|
2009-04-20 12:27:12 +00:00
|
|
|
flags |= CLONE_NEWUSER;
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2010-03-04 11:23:28 +00:00
|
|
|
if (def->nets != NULL) {
|
|
|
|
DEBUG0("Enable network namespaces");
|
2008-08-13 10:25:34 +00:00
|
|
|
flags |= CLONE_NEWNET;
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
pid = clone(lxcContainerChild, stacktop, flags, &args);
|
|
|
|
VIR_FREE(stack);
|
2010-03-04 11:23:28 +00:00
|
|
|
DEBUG("clone() completed, new container PID is %d", pid);
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
if (pid < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to run clone container"));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int lxcContainerAvailable(int features)
|
|
|
|
{
|
2009-04-20 12:27:12 +00:00
|
|
|
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
|
2008-08-13 10:25:34 +00:00
|
|
|
CLONE_NEWIPC|SIGCHLD;
|
|
|
|
int cpid;
|
|
|
|
char *childStack;
|
|
|
|
char *stack;
|
|
|
|
int childStatus;
|
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_USER)
|
|
|
|
flags |= CLONE_NEWUSER;
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_NET)
|
|
|
|
flags |= CLONE_NEWNET;
|
|
|
|
|
|
|
|
if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
|
|
|
|
DEBUG0("Unable to allocate stack");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
childStack = stack + (getpagesize() * 4);
|
|
|
|
|
|
|
|
cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
|
|
|
|
VIR_FREE(stack);
|
|
|
|
if (cpid < 0) {
|
2009-02-05 16:28:30 +00:00
|
|
|
char ebuf[1024];
|
2008-08-13 10:25:34 +00:00
|
|
|
DEBUG("clone call returned %s, container support is not enabled",
|
2009-02-05 16:28:30 +00:00
|
|
|
virStrerror(errno, ebuf, sizeof ebuf));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
waitpid(cpid, &childStatus, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|