2008-04-10 07:30:52 +00:00
|
|
|
/*
|
2012-01-24 18:51:01 +00:00
|
|
|
* Copyright (C) 2008-2012 Red Hat, Inc.
|
2010-03-12 17:47:26 +00:00
|
|
|
* Copyright (C) 2008 IBM Corp.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* lxc_container.c: file description
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
|
2008-08-28 22:40:50 +00:00
|
|
|
* Daniel P. Berrange <berrange@redhat.com>
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdlib.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <stdio.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/mount.h>
|
2008-08-13 10:25:34 +00:00
|
|
|
#include <sys/wait.h>
|
2010-01-22 13:21:16 +00:00
|
|
|
#include <sys/stat.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
#include <unistd.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
#include <mntent.h>
|
2012-05-11 16:26:48 +00:00
|
|
|
#include <dirent.h>
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Yes, we want linux private one, for _syscall2() macro */
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
|
|
|
|
/* For MS_MOVE */
|
|
|
|
#include <linux/fs.h>
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
#if HAVE_CAPNG
|
2010-03-09 18:22:22 +00:00
|
|
|
# include <cap-ng.h>
|
2009-06-29 17:09:42 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2011-11-01 14:59:51 +00:00
|
|
|
#if HAVE_LIBBLKID
|
|
|
|
# include <blkid/blkid.h>
|
|
|
|
#endif
|
|
|
|
|
2008-11-04 22:30:33 +00:00
|
|
|
#include "virterror_internal.h"
|
2008-11-06 16:36:07 +00:00
|
|
|
#include "logging.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
#include "lxc_container.h"
|
|
|
|
#include "util.h"
|
2008-06-06 11:09:57 +00:00
|
|
|
#include "memory.h"
|
2011-11-02 16:03:09 +00:00
|
|
|
#include "virnetdevveth.h"
|
2011-02-22 13:09:19 +00:00
|
|
|
#include "uuid.h"
|
2011-07-19 18:32:58 +00:00
|
|
|
#include "virfile.h"
|
2011-05-05 21:38:09 +00:00
|
|
|
#include "command.h"
|
2011-11-02 15:53:39 +00:00
|
|
|
#include "virnetdev.h"
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-01-20 17:13:33 +00:00
|
|
|
#define VIR_FROM_THIS VIR_FROM_LXC
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/*
|
|
|
|
* GLibc headers are behind the kernel, so we define these
|
|
|
|
* constants if they're not present already.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CLONE_NEWPID
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWPID 0x20000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUTS
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUTS 0x04000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUSER
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWUSER 0x10000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWIPC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWIPC 0x08000000
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWNET
|
2010-03-09 18:22:22 +00:00
|
|
|
# define CLONE_NEWNET 0x40000000 /* New network namespace */
|
2008-08-13 10:25:34 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* messages between parent and container */
|
|
|
|
typedef char lxc_message_t;
|
|
|
|
#define LXC_CONTINUE_MSG 'c'
|
|
|
|
|
|
|
|
typedef struct __lxc_child_argv lxc_child_argv_t;
|
|
|
|
struct __lxc_child_argv {
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr config;
|
2012-01-25 14:12:53 +00:00
|
|
|
virSecurityManagerPtr securityDriver;
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int nveths;
|
|
|
|
char **veths;
|
2008-08-13 10:25:34 +00:00
|
|
|
int monitor;
|
2011-10-20 08:44:31 +00:00
|
|
|
char **ttyPaths;
|
|
|
|
size_t nttyPaths;
|
2011-06-02 15:52:32 +00:00
|
|
|
int handshakefd;
|
2008-08-13 10:25:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2011-05-06 14:50:00 +00:00
|
|
|
* lxcContainerBuildInitCmd:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @vmDef: pointer to vm definition structure
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2011-05-06 14:50:00 +00:00
|
|
|
* Build a virCommandPtr for launching the container 'init' process
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2011-05-06 14:50:00 +00:00
|
|
|
* Returns a virCommandPtr
|
2008-04-10 07:30:52 +00:00
|
|
|
*/
|
2011-05-06 14:50:00 +00:00
|
|
|
static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2011-02-22 13:09:19 +00:00
|
|
|
char uuidstr[VIR_UUID_STRING_BUFLEN];
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandPtr cmd;
|
2011-02-22 13:09:19 +00:00
|
|
|
|
|
|
|
virUUIDFormat(vmDef->uuid, uuidstr);
|
|
|
|
|
2011-05-05 21:38:09 +00:00
|
|
|
cmd = virCommandNew(vmDef->os.init);
|
|
|
|
|
2012-03-26 17:09:31 +00:00
|
|
|
if (vmDef->os.initargv && vmDef->os.initargv[0])
|
|
|
|
virCommandAddArgSet(cmd, (const char **)vmDef->os.initargv);
|
|
|
|
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandAddEnvString(cmd, "PATH=/bin:/sbin");
|
|
|
|
virCommandAddEnvString(cmd, "TERM=linux");
|
2012-01-24 18:51:01 +00:00
|
|
|
virCommandAddEnvString(cmd, "container=lxc-libvirt");
|
2012-03-14 12:52:58 +00:00
|
|
|
virCommandAddEnvPair(cmd, "container_uuid", uuidstr);
|
2011-05-05 21:38:09 +00:00
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_UUID", uuidstr);
|
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_NAME", vmDef->name);
|
2011-10-03 17:37:47 +00:00
|
|
|
if (vmDef->os.cmdline)
|
|
|
|
virCommandAddEnvPair(cmd, "LIBVIRT_LXC_CMDLINE", vmDef->os.cmdline);
|
2011-05-05 21:38:09 +00:00
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
return cmd;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerSetStdio:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD from parent
|
|
|
|
* @ttyfd: FD of tty to set as the container console
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Sets the given tty as the primary conosole for the container as well as
|
|
|
|
* stdout, stdin and stderr.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2011-06-02 15:52:32 +00:00
|
|
|
static int lxcContainerSetStdio(int control, int ttyfd, int handshakefd)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
2008-08-13 10:14:47 +00:00
|
|
|
int open_max, i;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (setsid() < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("setsid failed"));
|
2008-08-28 22:40:50 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("ioctl(TIOCSTTY) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2008-08-13 10:14:47 +00:00
|
|
|
/* Just in case someone forget to set FD_CLOEXEC, explicitly
|
|
|
|
* close all FDs before executing the container */
|
|
|
|
open_max = sysconf (_SC_OPEN_MAX);
|
|
|
|
for (i = 0; i < open_max; i++)
|
2011-06-02 15:52:32 +00:00
|
|
|
if (i != ttyfd && i != control && i != handshakefd) {
|
2010-11-09 20:48:48 +00:00
|
|
|
int tmpfd = i;
|
|
|
|
VIR_FORCE_CLOSE(tmpfd);
|
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (dup2(ttyfd, 0) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stdin) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dup2(ttyfd, 1) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stdout) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dup2(ttyfd, 2) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-01-20 17:13:33 +00:00
|
|
|
_("dup2(stderr) failed"));
|
2008-04-10 07:30:52 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
cleanup:
|
2011-07-22 11:11:12 +00:00
|
|
|
VIR_DEBUG("rc=%d", rc);
|
2008-04-10 07:30:52 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerSendContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: control FD to child
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
2008-08-13 10:25:34 +00:00
|
|
|
* Sends the continue message via the socket pair stored in the vm
|
|
|
|
* structure.
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:52:15 +00:00
|
|
|
int lxcContainerSendContinue(int control)
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
2008-08-13 10:25:34 +00:00
|
|
|
lxc_message_t msg = LXC_CONTINUE_MSG;
|
|
|
|
int writeCount = 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
writeCount = safewrite(control, &msg, sizeof(msg));
|
|
|
|
if (writeCount != sizeof(msg)) {
|
|
|
|
goto error_out;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
rc = 0;
|
|
|
|
error_out:
|
|
|
|
return rc;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2008-08-13 10:25:34 +00:00
|
|
|
* lxcContainerWaitForContinue:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @control: Control FD from parent
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* This function will wait for the container continue message from the
|
|
|
|
* parent process. It will send this message on the socket pair stored in
|
|
|
|
* the vm structure once it has completed the post clone container setup.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2011-06-01 22:17:00 +00:00
|
|
|
int lxcContainerWaitForContinue(int control)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
lxc_message_t msg;
|
|
|
|
int readLen;
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
readLen = saferead(control, &msg, sizeof(msg));
|
2011-11-01 12:28:26 +00:00
|
|
|
if (readLen != sizeof(msg)) {
|
|
|
|
if (readLen >= 0)
|
|
|
|
errno = EIO;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (msg != LXC_CONTINUE_MSG) {
|
|
|
|
errno = EINVAL;
|
2008-08-13 10:14:47 +00:00
|
|
|
return -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
2008-08-13 10:14:47 +00:00
|
|
|
return 0;
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/**
|
2009-11-05 13:11:30 +00:00
|
|
|
* lxcContainerRenameAndEnableInterfaces:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
2009-11-05 13:11:30 +00:00
|
|
|
* This function will rename the interfaces to ethN
|
|
|
|
* with id ascending order from zero and enable the
|
|
|
|
* renamed interfaces for this container.
|
2008-06-26 16:09:48 +00:00
|
|
|
*
|
|
|
|
* Returns 0 on success or nonzero in case of error
|
|
|
|
*/
|
2012-01-18 11:38:49 +00:00
|
|
|
static int lxcContainerRenameAndEnableInterfaces(bool privNet,
|
|
|
|
unsigned int nveths,
|
2009-11-05 13:11:30 +00:00
|
|
|
char **veths)
|
2008-06-26 16:09:48 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int i;
|
2009-11-05 13:11:30 +00:00
|
|
|
char *newname = NULL;
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2008-08-13 10:52:15 +00:00
|
|
|
for (i = 0 ; i < nveths ; i++) {
|
2010-07-23 17:25:56 +00:00
|
|
|
if (virAsprintf(&newname, "eth%d", i) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
rc = -1;
|
2008-06-26 16:09:48 +00:00
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
}
|
2009-11-05 13:11:30 +00:00
|
|
|
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("Renaming %s to %s", veths[i], newname);
|
2011-11-02 15:53:39 +00:00
|
|
|
rc = virNetDevSetName(veths[i], newname);
|
2010-07-23 17:25:56 +00:00
|
|
|
if (rc < 0)
|
2009-11-05 13:11:30 +00:00
|
|
|
goto error_out;
|
|
|
|
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("Enabling %s", newname);
|
2011-11-02 15:53:39 +00:00
|
|
|
rc = virNetDevSetOnline(newname, true);
|
2010-07-23 17:25:56 +00:00
|
|
|
if (rc < 0)
|
2009-11-05 13:11:30 +00:00
|
|
|
goto error_out;
|
2010-07-23 17:25:56 +00:00
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* enable lo device only if there were other net devices */
|
2012-01-18 11:38:49 +00:00
|
|
|
if (veths || privNet)
|
2011-11-02 15:53:39 +00:00
|
|
|
rc = virNetDevSetOnline("lo", true);
|
2008-06-26 16:09:48 +00:00
|
|
|
|
|
|
|
error_out:
|
2009-11-05 13:11:30 +00:00
|
|
|
VIR_FREE(newname);
|
2008-06-26 16:09:48 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2011-01-28 21:38:06 +00:00
|
|
|
/*_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)*/
|
2008-08-28 22:40:50 +00:00
|
|
|
extern int pivot_root(const char * new_root,const char * put_old);
|
|
|
|
|
|
|
|
static int lxcContainerChildMountSort(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const char **sa = (const char**)a;
|
|
|
|
const char **sb = (const char**)b;
|
|
|
|
|
2011-08-23 17:02:02 +00:00
|
|
|
/* Deliberately reversed args - we need to unmount deepest
|
2008-08-28 22:40:50 +00:00
|
|
|
children first */
|
|
|
|
return strcmp(*sb, *sa);
|
|
|
|
}
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
#ifndef MS_REC
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_REC 16384
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MNT_DETACH
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MNT_DETACH 0x00000002
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_PRIVATE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_PRIVATE (1<<18)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MS_SLAVE
|
2010-03-09 18:22:22 +00:00
|
|
|
# define MS_SLAVE (1<<19)
|
2009-04-14 17:51:12 +00:00
|
|
|
#endif
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
static int lxcContainerPivotRoot(virDomainFSDefPtr root)
|
|
|
|
{
|
2011-07-05 21:02:53 +00:00
|
|
|
int ret;
|
2009-04-14 17:51:12 +00:00
|
|
|
char *oldroot = NULL, *newroot = NULL;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = -1;
|
|
|
|
|
2012-05-08 16:50:48 +00:00
|
|
|
VIR_DEBUG("Pivot via %s", root->src);
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* root->parent must be private, so make / private. */
|
|
|
|
if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make root private"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2008-12-23 13:03:29 +00:00
|
|
|
if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath(oldroot) < 0) {
|
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
oldroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a tmpfs root since old and new roots must be
|
|
|
|
* on separate filesystems */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount empty tmpfs at %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
oldroot);
|
|
|
|
goto err;
|
|
|
|
}
|
2009-04-16 13:08:03 +00:00
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* Create a directory called 'new' in tmpfs */
|
|
|
|
if (virAsprintf(&newroot, "%s/new", oldroot) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath(newroot) < 0) {
|
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to create %s"),
|
2009-04-14 17:51:12 +00:00
|
|
|
newroot);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ... and mount our root onto it */
|
|
|
|
if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to bind new root %s into tmpfs"),
|
2009-04-14 17:51:12 +00:00
|
|
|
root->src);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
if (root->readonly) {
|
|
|
|
if (mount(root->src, newroot, NULL, MS_BIND|MS_REC|MS_RDONLY|MS_REMOUNT, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make new root %s readonly"),
|
|
|
|
root->src);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-14 17:51:12 +00:00
|
|
|
/* Now we chroot into the tmpfs, then pivot into the
|
|
|
|
* root->src bind-mounted onto '/new' */
|
2009-04-22 14:26:50 +00:00
|
|
|
if (chdir(newroot) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to chroot into %s"), newroot);
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The old root directory will live at /.oldroot after
|
|
|
|
* this and will soon be unmounted completely */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (pivot_root(".", ".oldroot") < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to pivot root"));
|
2009-04-14 17:51:12 +00:00
|
|
|
goto err;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* CWD is undefined after pivot_root, so go to / */
|
2009-04-14 17:51:12 +00:00
|
|
|
if (chdir("/") < 0)
|
|
|
|
goto err;
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
err:
|
2009-04-14 17:51:12 +00:00
|
|
|
VIR_FREE(oldroot);
|
|
|
|
VIR_FREE(newroot);
|
|
|
|
|
2009-04-16 13:08:03 +00:00
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2012-05-11 10:02:50 +00:00
|
|
|
static int lxcContainerMountBasicFS(virDomainDefPtr def,
|
|
|
|
bool pivotRoot,
|
|
|
|
virSecurityManagerPtr securityDriver)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
|
|
|
const struct {
|
2009-04-22 14:26:50 +00:00
|
|
|
const char *src;
|
|
|
|
const char *dst;
|
|
|
|
const char *type;
|
2011-07-22 12:02:05 +00:00
|
|
|
const char *opts;
|
|
|
|
int mflags;
|
2009-04-22 14:26:50 +00:00
|
|
|
} mnts[] = {
|
2011-07-22 12:08:20 +00:00
|
|
|
/* When we want to make a bind mount readonly, for unknown reasons,
|
2012-02-03 18:20:22 +00:00
|
|
|
* it is currently necessary to bind it once, and then remount the
|
2011-07-22 12:08:20 +00:00
|
|
|
* bind with the readonly flag. If this is not done, then the original
|
2011-08-04 16:16:56 +00:00
|
|
|
* mount point in the main OS becomes readonly too which is not what
|
2011-07-22 12:08:20 +00:00
|
|
|
* we want. Hence some things have two entries here.
|
|
|
|
*/
|
2012-05-10 16:16:11 +00:00
|
|
|
{ "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV },
|
|
|
|
{ "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND },
|
|
|
|
{ "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
|
|
|
|
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV },
|
|
|
|
{ "sysfs", "/sys", "sysfs", NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
|
2012-03-26 15:39:30 +00:00
|
|
|
#if HAVE_SELINUX
|
2012-05-10 16:16:11 +00:00
|
|
|
{ SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV },
|
|
|
|
{ SELINUX_MOUNT, SELINUX_MOUNT, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
|
2012-03-26 15:39:30 +00:00
|
|
|
#endif
|
2008-08-28 22:40:50 +00:00
|
|
|
};
|
2009-09-04 14:12:35 +00:00
|
|
|
int i, rc = -1;
|
2012-01-25 14:12:54 +00:00
|
|
|
char *opts = NULL;
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2012-05-10 16:16:11 +00:00
|
|
|
VIR_DEBUG("Mounting basic filesystems pivotRoot=%d", pivotRoot);
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
|
2011-07-22 12:02:05 +00:00
|
|
|
const char *srcpath = NULL;
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
VIR_DEBUG("Processing %s -> %s",
|
|
|
|
mnts[i].src, mnts[i].dst);
|
2011-08-04 16:16:56 +00:00
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath(mnts[i].dst) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mkdir %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
mnts[i].src);
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2012-05-10 16:16:11 +00:00
|
|
|
srcpath = mnts[i].src;
|
2011-07-22 12:02:05 +00:00
|
|
|
|
|
|
|
/* Skip if mount doesn't exist in source */
|
|
|
|
if ((srcpath[0] == '/') &&
|
2012-06-11 03:50:53 +00:00
|
|
|
(access(srcpath, R_OK) < 0))
|
2011-07-22 12:02:05 +00:00
|
|
|
continue;
|
|
|
|
|
2011-08-04 16:16:56 +00:00
|
|
|
VIR_DEBUG("Mount %s on %s type=%s flags=%x, opts=%s",
|
|
|
|
srcpath, mnts[i].dst, mnts[i].type, mnts[i].mflags, mnts[i].opts);
|
2011-07-22 12:02:05 +00:00
|
|
|
if (mount(srcpath, mnts[i].dst, mnts[i].type, mnts[i].mflags, mnts[i].opts) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2011-07-22 12:02:05 +00:00
|
|
|
_("Failed to mount %s on %s type %s"),
|
|
|
|
mnts[i].src, mnts[i].dst, NULLSTR(mnts[i].type));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
2011-07-22 12:02:05 +00:00
|
|
|
}
|
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
if (pivotRoot) {
|
|
|
|
/*
|
|
|
|
* tmpfs is limited to 64kb, since we only have device nodes in there
|
|
|
|
* and don't want to DOS the entire OS RAM usage
|
|
|
|
*/
|
2012-02-10 15:22:50 +00:00
|
|
|
|
2012-05-11 10:02:50 +00:00
|
|
|
char *mount_options = virSecurityManagerGetMountOptions(securityDriver, def);
|
|
|
|
ignore_value(virAsprintf(&opts,
|
|
|
|
"mode=755,size=65536%s",(mount_options ? mount_options : "")));
|
|
|
|
VIR_FREE(mount_options);
|
2012-02-10 15:22:50 +00:00
|
|
|
if (!opts) {
|
2012-01-25 14:12:54 +00:00
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Mount devfs on /dev type=tmpfs flags=%x, opts=%s",
|
|
|
|
MS_NOSUID, opts);
|
|
|
|
if (mount("devfs", "/dev", "tmpfs", MS_NOSUID, opts) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on %s type %s"),
|
|
|
|
"devfs", "/dev", "tmpfs");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_DEBUG("rc=%d", rc);
|
2012-01-25 14:12:54 +00:00
|
|
|
VIR_FREE(opts);
|
2011-07-22 12:02:05 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
static int lxcContainerMountFSDevPTS(virDomainFSDefPtr root)
|
2011-07-22 12:02:05 +00:00
|
|
|
{
|
|
|
|
char *devpts = NULL;
|
|
|
|
int rc = -1;
|
|
|
|
|
|
|
|
if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2011-07-05 21:02:53 +00:00
|
|
|
if (virFileMakePath("/dev/pts") < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Cannot create /dev/pts"));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2009-01-20 17:13:33 +00:00
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
|
|
|
|
if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to mount /dev/pts in container"));
|
2009-09-04 14:12:35 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2009-09-04 14:12:35 +00:00
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
|
|
|
|
cleanup:
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_FREE(devpts);
|
|
|
|
|
2009-09-04 14:12:35 +00:00
|
|
|
return rc;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
|
2011-10-20 08:44:31 +00:00
|
|
|
static int lxcContainerPopulateDevices(char **ttyPaths, size_t nttyPaths)
|
2009-04-22 14:26:50 +00:00
|
|
|
{
|
2011-10-20 08:44:31 +00:00
|
|
|
size_t i;
|
2009-04-22 14:26:50 +00:00
|
|
|
const struct {
|
|
|
|
int maj;
|
|
|
|
int min;
|
|
|
|
mode_t mode;
|
|
|
|
const char *path;
|
|
|
|
} devs[] = {
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/dev/random" },
|
|
|
|
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
|
|
|
|
};
|
2012-02-08 14:21:28 +00:00
|
|
|
const struct {
|
|
|
|
const char *src;
|
|
|
|
const char *dst;
|
|
|
|
} links[] = {
|
|
|
|
{ "/proc/self/fd/0", "/dev/stdin" },
|
|
|
|
{ "/proc/self/fd/1", "/dev/stdout" },
|
|
|
|
{ "/proc/self/fd/2", "/dev/stderr" },
|
|
|
|
{ "/proc/self/fd", "/dev/fd" },
|
|
|
|
};
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Populate /dev/ with a few important bits */
|
|
|
|
for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
|
|
|
|
dev_t dev = makedev(devs[i].maj, devs[i].min);
|
2009-05-08 10:22:46 +00:00
|
|
|
if (mknod(devs[i].path, S_IFCHR, dev) < 0 ||
|
2008-08-28 22:40:50 +00:00
|
|
|
chmod(devs[i].path, devs[i].mode)) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make device %s"),
|
2009-01-20 17:13:33 +00:00
|
|
|
devs[i].path);
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-08 14:21:28 +00:00
|
|
|
for (i = 0 ; i < ARRAY_CARDINALITY(links) ; i++) {
|
|
|
|
if (symlink(links[i].src, links[i].dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink device %s to %s"),
|
|
|
|
links[i].dst, links[i].src);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
if (access("/dev/pts/ptmx", W_OK) == 0) {
|
2012-01-11 09:59:37 +00:00
|
|
|
/* We have private devpts capability, so bind that */
|
|
|
|
if (virFileTouch("/dev/ptmx", 0666) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2011-08-31 18:18:33 +00:00
|
|
|
if (mount("/dev/pts/ptmx", "/dev/ptmx", "ptmx", MS_BIND, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2012-01-11 09:59:37 +00:00
|
|
|
_("Failed to bind /dev/pts/ptmx on to /dev/ptmx"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Legacy devpts, so we need to just use shared one */
|
|
|
|
dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
|
|
|
|
if (mknod("/dev/ptmx", S_IFCHR, dev) < 0 ||
|
|
|
|
chmod("/dev/ptmx", 0666)) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to make device /dev/ptmx"));
|
2009-04-22 14:26:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-20 08:44:31 +00:00
|
|
|
for (i = 0 ; i < nttyPaths ; i++) {
|
|
|
|
char *tty;
|
|
|
|
if (virAsprintf(&tty, "/dev/tty%zu", i+1) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (symlink(ttyPaths[i], tty) < 0) {
|
|
|
|
VIR_FREE(tty);
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink %s to %s"),
|
|
|
|
ttyPaths[i], tty);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
VIR_FREE(tty);
|
|
|
|
if (i == 0 &&
|
|
|
|
symlink(ttyPaths[i], "/dev/console") < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to symlink %s to /dev/console"),
|
|
|
|
ttyPaths[i]);
|
|
|
|
return -1;
|
|
|
|
}
|
2010-11-05 13:27:34 +00:00
|
|
|
}
|
2008-08-28 22:40:50 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
static int lxcContainerMountFSBind(virDomainFSDefPtr fs,
|
|
|
|
const char *srcprefix)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2011-07-22 11:11:12 +00:00
|
|
|
char *src = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount(src, fs->dst, NULL, MS_BIND, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to bind mount directory %s to %s"),
|
|
|
|
src, fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
if (fs->readonly) {
|
|
|
|
VIR_DEBUG("Binding %s readonly", fs->dst);
|
2012-06-11 03:37:36 +00:00
|
|
|
if (mount(src, fs->dst, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
|
2011-07-22 12:08:20 +00:00
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make directory %s readonly"),
|
|
|
|
fs->dst);
|
2011-07-22 12:02:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(src);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 14:59:51 +00:00
|
|
|
#ifdef HAVE_LIBBLKID
|
|
|
|
static int
|
|
|
|
lxcContainerMountDetectFilesystem(const char *src, char **type)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
int ret = -1;
|
|
|
|
int rc;
|
|
|
|
const char *data = NULL;
|
|
|
|
blkid_probe blkid = NULL;
|
|
|
|
|
|
|
|
*type = NULL;
|
|
|
|
|
|
|
|
if ((fd = open(src, O_RDONLY)) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to open filesystem %s"), src);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(blkid = blkid_new_probe())) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Unable to create blkid library handle"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (blkid_probe_set_device(blkid, fd, 0, 0) < 0) {
|
|
|
|
virReportSystemError(EINVAL,
|
|
|
|
_("Unable to associate device %s with blkid library"),
|
|
|
|
src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
blkid_probe_enable_superblocks(blkid, 1);
|
|
|
|
|
|
|
|
blkid_probe_set_superblocks_flags(blkid, BLKID_SUBLKS_TYPE);
|
|
|
|
|
|
|
|
rc = blkid_do_safeprobe(blkid);
|
|
|
|
if (rc != 0) {
|
|
|
|
if (rc == 1) /* Nothing found, return success with *type == NULL */
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (rc == -2) {
|
|
|
|
virReportSystemError(EINVAL,
|
|
|
|
_("Too many filesystems detected for %s"),
|
|
|
|
src);
|
|
|
|
} else {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to detect filesystem for %s"),
|
|
|
|
src);
|
|
|
|
}
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (blkid_probe_lookup_value(blkid, "TYPE", &data, NULL) < 0) {
|
|
|
|
virReportSystemError(ENOENT,
|
|
|
|
_("Unable to find filesystem type for %s"),
|
|
|
|
src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(*type = strdup(data))) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
|
|
VIR_FORCE_CLOSE(fd);
|
|
|
|
if (blkid)
|
|
|
|
blkid_free_probe(blkid);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else /* ! HAVE_LIBBLKID */
|
|
|
|
static int
|
|
|
|
lxcContainerMountDetectFilesystem(const char *src ATTRIBUTE_UNUSED,
|
|
|
|
char **type)
|
|
|
|
{
|
|
|
|
/* No libblkid, so just return success with no detected type */
|
|
|
|
*type = NULL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif /* ! HAVE_LIBBLKID */
|
2011-07-22 12:02:51 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This functions attempts to do automatic detection of filesystem
|
|
|
|
* type following the same rules as the util-linux 'mount' binary.
|
|
|
|
*
|
|
|
|
* The main difference is that we don't (currently) try to use
|
|
|
|
* libblkid to detect the format first. We go straight to using
|
|
|
|
* /etc/filesystems, and then /proc/filesystems
|
|
|
|
*/
|
|
|
|
static int lxcContainerMountFSBlockAuto(virDomainFSDefPtr fs,
|
|
|
|
int fsflags,
|
|
|
|
const char *src,
|
|
|
|
const char *srcprefix)
|
|
|
|
{
|
|
|
|
FILE *fp = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
bool tryProc = false;
|
|
|
|
bool gotStar = false;
|
|
|
|
char *fslist = NULL;
|
|
|
|
char *line = NULL;
|
|
|
|
const char *type;
|
|
|
|
|
|
|
|
VIR_DEBUG("src=%s srcprefix=%s dst=%s", src, srcprefix, fs->dst);
|
|
|
|
|
|
|
|
/* First time around we use /etc/filesystems */
|
|
|
|
retry:
|
|
|
|
if (virAsprintf(&fslist, "%s%s",
|
|
|
|
srcprefix, tryProc ? "/proc/filesystems" : "/etc/filesystems") < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Open fslist %s", fslist);
|
|
|
|
if (!(fp = fopen(fslist, "r"))) {
|
|
|
|
/* If /etc/filesystems does not exist, then we need to retry
|
|
|
|
* with /proc/filesystems next
|
|
|
|
*/
|
|
|
|
if (errno == ENOENT &&
|
|
|
|
!tryProc) {
|
|
|
|
tryProc = true;
|
|
|
|
VIR_FREE(fslist);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to read %s"),
|
|
|
|
fslist);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (!feof(fp)) {
|
|
|
|
size_t n;
|
|
|
|
VIR_FREE(line);
|
|
|
|
if (getline(&line, &n, fp) <= 0) {
|
|
|
|
if (feof(fp))
|
|
|
|
break;
|
|
|
|
|
2011-07-22 12:08:20 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
if (strstr(line, "nodev"))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
type = strchr(line, '\n');
|
|
|
|
if (type)
|
|
|
|
line[type-line] = '\0';
|
|
|
|
|
|
|
|
type = line;
|
|
|
|
virSkipSpaces(&type);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* /etc/filesystems is only allowed to contain '*' on the last line
|
|
|
|
*/
|
2011-11-01 12:29:25 +00:00
|
|
|
if (gotStar && !tryProc) {
|
2011-07-22 12:02:51 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("%s has unexpected '*' before last line"),
|
|
|
|
fslist);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* An '*' on the last line in /etc/filesystems
|
|
|
|
* means try /proc/filesystems next. We don't
|
|
|
|
* jump immediately though, since we need to see
|
|
|
|
* if any more lines follow
|
|
|
|
*/
|
|
|
|
if (!tryProc &&
|
|
|
|
STREQ(type, "*"))
|
|
|
|
gotStar = true;
|
|
|
|
|
|
|
|
VIR_DEBUG("Trying mount %s with %s", src, type);
|
|
|
|
if (mount(src, fs->dst, type, fsflags, NULL) < 0) {
|
|
|
|
/* These errnos indicate a bogus filesystem type for
|
|
|
|
* the image we have, so skip to the next type
|
|
|
|
*/
|
|
|
|
if (errno == EINVAL || errno == ENODEV)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
virReportSystemError(errno,
|
2011-11-01 14:34:02 +00:00
|
|
|
_("Failed to mount device %s to %s"),
|
2011-07-22 12:02:51 +00:00
|
|
|
src, fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2011-07-22 12:08:20 +00:00
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
/* We've got to the end of /etc/filesystems and saw
|
|
|
|
* a '*', so we must try /proc/filesystems next
|
|
|
|
*/
|
|
|
|
if (ret != 0 &&
|
|
|
|
!tryProc &&
|
|
|
|
gotStar) {
|
|
|
|
tryProc = true;
|
|
|
|
VIR_FREE(fslist);
|
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2011-11-01 14:34:02 +00:00
|
|
|
if (ret != 0) {
|
|
|
|
virReportSystemError(ENODEV,
|
|
|
|
_("Failed to mount device %s to %s, unable to detect filesystem"),
|
|
|
|
src, fs->dst);
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_DEBUG("Done mounting filesystem ret=%d tryProc=%d", ret, tryProc);
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(line);
|
2012-06-11 05:52:37 +00:00
|
|
|
VIR_FREE(fslist);
|
2011-07-22 12:02:51 +00:00
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mount a block device 'src' on fs->dst, automatically
|
|
|
|
* probing for filesystem type
|
|
|
|
*/
|
|
|
|
static int lxcContainerMountFSBlockHelper(virDomainFSDefPtr fs,
|
|
|
|
const char *src,
|
|
|
|
const char *srcprefix)
|
|
|
|
{
|
|
|
|
int fsflags = 0;
|
|
|
|
int ret = -1;
|
2011-11-01 14:59:51 +00:00
|
|
|
char *format = NULL;
|
|
|
|
|
2011-07-22 12:02:51 +00:00
|
|
|
if (fs->readonly)
|
|
|
|
fsflags |= MS_RDONLY;
|
|
|
|
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-11-01 14:59:51 +00:00
|
|
|
if (lxcContainerMountDetectFilesystem(src, &format) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (format) {
|
|
|
|
VIR_DEBUG("Mount %s with detected format %s", src, format);
|
|
|
|
if (mount(src, fs->dst, format, fsflags, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount device %s to %s as %s"),
|
|
|
|
src, fs->dst, format);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
} else {
|
|
|
|
ret = lxcContainerMountFSBlockAuto(fs, fsflags, src, srcprefix);
|
|
|
|
}
|
2011-07-22 12:02:51 +00:00
|
|
|
|
|
|
|
cleanup:
|
2012-06-12 05:55:48 +00:00
|
|
|
VIR_FREE(format);
|
2011-07-22 12:02:51 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
|
|
|
|
const char *srcprefix)
|
|
|
|
{
|
|
|
|
char *src = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = lxcContainerMountFSBlockHelper(fs, src, srcprefix);
|
2011-07-22 11:11:12 +00:00
|
|
|
|
|
|
|
VIR_DEBUG("Done mounting filesystem ret=%d", ret);
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(src);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-08 16:50:48 +00:00
|
|
|
static int lxcContainerMountFSTmpfs(virDomainFSDefPtr fs)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
char *data = NULL;
|
|
|
|
|
|
|
|
if (virAsprintf(&data, "size=%lldk", fs->usage) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virFileMakePath(fs->dst) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to create %s"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount("tmpfs", fs->dst, "tmpfs", MS_NOSUID|MS_NODEV, data) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount directory %s as tmpfs"),
|
|
|
|
fs->dst);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fs->readonly) {
|
|
|
|
VIR_DEBUG("Binding %s readonly", fs->dst);
|
|
|
|
if (mount(fs->dst, fs->dst, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to make directory %s readonly"),
|
|
|
|
fs->dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(data);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
static int lxcContainerMountFS(virDomainFSDefPtr fs,
|
|
|
|
const char *srcprefix)
|
|
|
|
{
|
|
|
|
switch (fs->type) {
|
|
|
|
case VIR_DOMAIN_FS_TYPE_MOUNT:
|
|
|
|
if (lxcContainerMountFSBind(fs, srcprefix) < 0)
|
|
|
|
return -1;
|
|
|
|
break;
|
2011-07-22 12:02:51 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_BLOCK:
|
|
|
|
if (lxcContainerMountFSBlock(fs, srcprefix) < 0)
|
|
|
|
return -1;
|
|
|
|
break;
|
2012-05-08 16:50:48 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_RAM:
|
|
|
|
if (lxcContainerMountFSTmpfs(fs) < 0)
|
|
|
|
return -1;
|
|
|
|
break;
|
2011-08-04 09:13:02 +00:00
|
|
|
case VIR_DOMAIN_FS_TYPE_FILE:
|
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Unexpected filesystem type %s"),
|
|
|
|
virDomainFSTypeToString(fs->type));
|
|
|
|
break;
|
2011-07-22 11:11:12 +00:00
|
|
|
default:
|
|
|
|
lxcError(VIR_ERR_CONFIG_UNSUPPORTED,
|
|
|
|
_("Cannot mount filesystem type %s"),
|
|
|
|
virDomainFSTypeToString(fs->type));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountAllFS(virDomainDefPtr vmDef,
|
|
|
|
const char *dstprefix,
|
|
|
|
bool skipRoot)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
VIR_DEBUG("Mounting %s %d", dstprefix, skipRoot);
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
/* Pull in rest of container's mounts */
|
2008-10-10 16:08:01 +00:00
|
|
|
for (i = 0 ; i < vmDef->nfss ; i++) {
|
2011-07-22 11:11:12 +00:00
|
|
|
if (skipRoot &&
|
|
|
|
STREQ(vmDef->fss[i]->dst, "/"))
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
if (lxcContainerMountFS(vmDef->fss[i], dstprefix) < 0)
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
VIR_DEBUG("Mounted all filesystems");
|
2008-08-28 22:40:50 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-11 10:35:28 +00:00
|
|
|
static int lxcContainerGetSubtree(const char *prefix,
|
|
|
|
char ***mountsret,
|
|
|
|
size_t *nmountsret)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
|
|
|
FILE *procmnt;
|
2012-05-11 10:35:28 +00:00
|
|
|
struct mntent mntent;
|
2009-01-22 19:41:48 +00:00
|
|
|
char mntbuf[1024];
|
2011-11-01 12:56:53 +00:00
|
|
|
int ret = -1;
|
2012-05-11 10:35:28 +00:00
|
|
|
char **mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
|
|
|
|
*mountsret = NULL;
|
|
|
|
*nmountsret = 0;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
|
|
|
if (!(procmnt = setmntent("/proc/mounts", "r"))) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to read /proc/mounts"));
|
2008-08-28 22:40:50 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2012-05-11 10:35:28 +00:00
|
|
|
|
2009-01-22 19:41:48 +00:00
|
|
|
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_DEBUG("Got %s", mntent.mnt_dir);
|
2012-05-11 10:35:28 +00:00
|
|
|
if (!STRPREFIX(mntent.mnt_dir, prefix))
|
2008-08-28 22:40:50 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2011-11-01 12:56:53 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2012-06-12 06:31:15 +00:00
|
|
|
if (!(mounts[nmounts] = strdup(mntent.mnt_dir))) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2011-11-01 12:56:53 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
2012-06-18 16:13:32 +00:00
|
|
|
nmounts++;
|
2011-11-01 12:56:53 +00:00
|
|
|
VIR_DEBUG("Grabbed %s", mntent.mnt_dir);
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2009-09-02 07:58:50 +00:00
|
|
|
if (mounts)
|
|
|
|
qsort(mounts, nmounts, sizeof(mounts[0]),
|
|
|
|
lxcContainerChildMountSort);
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-05-11 10:35:28 +00:00
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
2012-06-12 06:31:15 +00:00
|
|
|
*mountsret = mounts;
|
|
|
|
*nmountsret = nmounts;
|
2012-05-11 10:35:28 +00:00
|
|
|
endmntent(procmnt);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int lxcContainerUnmountSubtree(const char *prefix,
|
|
|
|
bool isOldRootFS)
|
|
|
|
{
|
|
|
|
char **mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
size_t i;
|
|
|
|
int saveErrno;
|
|
|
|
const char *failedUmount = NULL;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
VIR_DEBUG("Unmount subtreee from %s", prefix);
|
|
|
|
|
|
|
|
if (lxcContainerGetSubtree(prefix, &mounts, &nmounts) < 0)
|
2012-06-12 06:31:15 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
for (i = 0 ; i < nmounts ; i++) {
|
2009-04-22 14:26:50 +00:00
|
|
|
VIR_DEBUG("Umount %s", mounts[i]);
|
2008-08-28 22:40:50 +00:00
|
|
|
if (umount(mounts[i]) < 0) {
|
2011-11-01 12:56:53 +00:00
|
|
|
char ebuf[1024];
|
|
|
|
failedUmount = mounts[i];
|
|
|
|
saveErrno = errno;
|
2012-05-11 10:35:28 +00:00
|
|
|
VIR_WARN("Failed to unmount '%s', trying to detach subtree '%s': %s",
|
2011-11-01 12:56:53 +00:00
|
|
|
failedUmount, mounts[nmounts-1],
|
|
|
|
virStrerror(errno, ebuf, sizeof(ebuf)));
|
|
|
|
break;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
}
|
2011-11-01 12:56:53 +00:00
|
|
|
|
|
|
|
if (failedUmount) {
|
2012-05-11 10:35:28 +00:00
|
|
|
/* This detaches the subtree */
|
2011-11-01 12:56:53 +00:00
|
|
|
if (umount2(mounts[nmounts-1], MNT_DETACH) < 0) {
|
|
|
|
virReportSystemError(saveErrno,
|
2012-05-11 10:35:28 +00:00
|
|
|
_("Failed to unmount '%s' and could not detach subtree '%s'"),
|
2011-11-01 12:56:53 +00:00
|
|
|
failedUmount, mounts[nmounts-1]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* This unmounts the tmpfs on which the old root filesystem was hosted */
|
2012-05-11 10:35:28 +00:00
|
|
|
if (isOldRootFS &&
|
|
|
|
umount(mounts[nmounts-1]) < 0) {
|
2011-11-01 12:56:53 +00:00
|
|
|
virReportSystemError(saveErrno,
|
|
|
|
_("Failed to unmount '%s' and could not unmount old root '%s'"),
|
|
|
|
failedUmount, mounts[nmounts-1]);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
for (i = 0 ; i < nmounts ; i++)
|
|
|
|
VIR_FREE(mounts[i]);
|
2008-08-28 22:40:50 +00:00
|
|
|
VIR_FREE(mounts);
|
|
|
|
|
2011-11-01 12:56:53 +00:00
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-11 16:26:48 +00:00
|
|
|
struct lxcContainerCGroup {
|
|
|
|
const char *dir;
|
|
|
|
const char *linkDest;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static void lxcContainerCGroupFree(struct lxcContainerCGroup *mounts,
|
|
|
|
size_t nmounts)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if (!mounts)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0 ; i < nmounts ; i++) {
|
|
|
|
VIR_FREE(mounts[i].dir);
|
|
|
|
VIR_FREE(mounts[i].linkDest);
|
|
|
|
}
|
|
|
|
VIR_FREE(mounts);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerIdentifyCGroups(struct lxcContainerCGroup **mountsret,
|
|
|
|
size_t *nmountsret)
|
|
|
|
{
|
|
|
|
FILE *procmnt = NULL;
|
|
|
|
struct mntent mntent;
|
|
|
|
struct dirent *dent;
|
|
|
|
char mntbuf[1024];
|
|
|
|
int ret = -1;
|
|
|
|
struct lxcContainerCGroup *mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
DIR *dh = NULL;
|
|
|
|
char *path = NULL;
|
|
|
|
|
|
|
|
*mountsret = NULL;
|
|
|
|
*nmountsret = 0;
|
|
|
|
|
|
|
|
VIR_DEBUG("Finding cgroups mount points under %s", VIR_CGROUP_SYSFS_MOUNT);
|
|
|
|
|
|
|
|
if (!(procmnt = setmntent("/proc/mounts", "r"))) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to read /proc/mounts"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
|
|
|
|
VIR_DEBUG("Got %s", mntent.mnt_dir);
|
|
|
|
if (STRNEQ(mntent.mnt_type, "cgroup") ||
|
|
|
|
!STRPREFIX(mntent.mnt_dir, VIR_CGROUP_SYSFS_MOUNT))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Skip named mounts with no controller since they're
|
|
|
|
* for application use only ie systemd */
|
|
|
|
if (strstr(mntent.mnt_opts, "name="))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (VIR_EXPAND_N(mounts, nmounts, 1) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (!(mounts[nmounts-1].dir = strdup(mntent.mnt_dir))) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
VIR_DEBUG("Grabbed %s", mntent.mnt_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Checking for symlinks in %s", VIR_CGROUP_SYSFS_MOUNT);
|
|
|
|
if (!(dh = opendir(VIR_CGROUP_SYSFS_MOUNT))) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to read directory %s"),
|
|
|
|
VIR_CGROUP_SYSFS_MOUNT);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((dent = readdir(dh)) != NULL) {
|
|
|
|
ssize_t rv;
|
|
|
|
/* The cgroups links are just relative to the local
|
|
|
|
* dir so we don't need a large buf */
|
|
|
|
char linkbuf[100];
|
|
|
|
|
|
|
|
if (dent->d_name[0] == '.')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
VIR_DEBUG("Checking entry %s", dent->d_name);
|
|
|
|
if (virAsprintf(&path, "%s/%s", VIR_CGROUP_SYSFS_MOUNT, dent->d_name) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((rv = readlink(path, linkbuf, sizeof(linkbuf)-1)) < 0) {
|
|
|
|
if (errno != EINVAL) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to resolve link %s"),
|
|
|
|
path);
|
|
|
|
VIR_FREE(path);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* Ok not a link */
|
|
|
|
VIR_FREE(path);
|
|
|
|
} else {
|
|
|
|
linkbuf[rv] = '\0';
|
|
|
|
VIR_DEBUG("Got a link %s to %s", path, linkbuf);
|
|
|
|
if (VIR_EXPAND_N(mounts, nmounts, 1) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (!(mounts[nmounts-1].linkDest = strdup(linkbuf))) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
mounts[nmounts-1].dir = path;
|
|
|
|
path = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*mountsret = mounts;
|
|
|
|
*nmountsret = nmounts;
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
closedir(dh);
|
|
|
|
endmntent(procmnt);
|
|
|
|
VIR_FREE(path);
|
|
|
|
|
|
|
|
if (ret < 0)
|
|
|
|
lxcContainerCGroupFree(mounts, nmounts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts,
|
|
|
|
size_t nmounts)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
VIR_DEBUG("Mounting cgroups at '%s'", VIR_CGROUP_SYSFS_MOUNT);
|
|
|
|
|
|
|
|
if (virFileMakePath(VIR_CGROUP_SYSFS_MOUNT) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to create directory %s"),
|
|
|
|
VIR_CGROUP_SYSFS_MOUNT);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount("tmpfs", VIR_CGROUP_SYSFS_MOUNT, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC, "mode=755") < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on %s type %s"),
|
|
|
|
"tmpfs", VIR_CGROUP_SYSFS_MOUNT, "tmpfs");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0 ; i < nmounts ; i++) {
|
|
|
|
if (mounts[i].linkDest) {
|
|
|
|
VIR_DEBUG("Link mount point '%s' to '%s'",
|
|
|
|
mounts[i].dir, mounts[i].linkDest);
|
|
|
|
if (symlink(mounts[i].linkDest, mounts[i].dir) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to symlink directory %s to %s"),
|
|
|
|
mounts[i].dir, mounts[i].linkDest);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
VIR_DEBUG("Create mount point '%s'", mounts[i].dir);
|
|
|
|
if (virFileMakePath(mounts[i].dir) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to create directory %s"),
|
|
|
|
mounts[i].dir);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount("cgroup", mounts[i].dir, "cgroup",
|
|
|
|
0, mounts[i].dir + strlen(VIR_CGROUP_SYSFS_MOUNT) + 1) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on %s"),
|
|
|
|
"cgroup", mounts[i].dir);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
/* Got a FS mapped to /, we're going the pivot_root
|
|
|
|
* approach to do a better-chroot-than-chroot
|
|
|
|
* this is based on this thread http://lkml.org/lkml/2008/3/5/29
|
|
|
|
*/
|
|
|
|
static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
|
2011-10-20 08:44:31 +00:00
|
|
|
virDomainFSDefPtr root,
|
|
|
|
char **ttyPaths,
|
2012-05-11 10:02:50 +00:00
|
|
|
size_t nttyPaths,
|
|
|
|
virSecurityManagerPtr securityDriver)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2012-05-11 16:26:48 +00:00
|
|
|
struct lxcContainerCGroup *mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
/* Before pivoting we need to identify any
|
|
|
|
* cgroups controllers that are mounted */
|
|
|
|
if (lxcContainerIdentifyCGroups(&mounts, &nmounts) < 0)
|
2012-06-19 01:33:20 +00:00
|
|
|
return -1;
|
2012-05-11 16:26:48 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Gives us a private root, leaving all parent OS mounts on /.oldroot */
|
2008-08-28 22:40:50 +00:00
|
|
|
if (lxcContainerPivotRoot(root) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-06-12 20:26:37 +00:00
|
|
|
/* Gets rid of any existing stuff under /proc, since we need new
|
|
|
|
* namespace aware versions of those. We must do /proc second
|
|
|
|
* otherwise we won't find /proc/mounts :-) */
|
|
|
|
if (lxcContainerUnmountSubtree("/sys", false) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree("/proc", false) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
/* Mounts the core /proc, /sys, etc filesystems */
|
2012-05-10 16:16:11 +00:00
|
|
|
if (lxcContainerMountBasicFS(vmDef, true, securityDriver) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* Now we can re-mount the cgroups controllers in the
|
|
|
|
* same configuration as before */
|
|
|
|
if (lxcContainerMountCGroups(mounts, nmounts) < 0)
|
|
|
|
goto cleanup;
|
2011-07-22 12:02:05 +00:00
|
|
|
|
2012-01-25 14:12:54 +00:00
|
|
|
/* Mounts /dev/pts */
|
|
|
|
if (lxcContainerMountFSDevPTS(root) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Populates device nodes in /dev/ */
|
2011-10-20 08:44:31 +00:00
|
|
|
if (lxcContainerPopulateDevices(ttyPaths, nttyPaths) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Sets up any non-root mounts from guest config */
|
2011-07-22 11:11:12 +00:00
|
|
|
if (lxcContainerMountAllFS(vmDef, "/.oldroot", true) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
/* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
|
2012-05-11 10:35:28 +00:00
|
|
|
if (lxcContainerUnmountSubtree("/.oldroot", true) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-05-11 16:26:48 +00:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
lxcContainerCGroupFree(mounts, nmounts);
|
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
|
2008-08-28 22:40:50 +00:00
|
|
|
/* Nothing mapped to /, we're using the main root,
|
|
|
|
but with extra stuff mapped in */
|
2012-05-11 10:02:50 +00:00
|
|
|
static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef,
|
2012-05-11 14:09:27 +00:00
|
|
|
virDomainFSDefPtr root,
|
2012-05-11 10:02:50 +00:00
|
|
|
virSecurityManagerPtr securityDriver)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2012-05-11 16:26:48 +00:00
|
|
|
int ret = -1;
|
|
|
|
struct lxcContainerCGroup *mounts = NULL;
|
|
|
|
size_t nmounts = 0;
|
|
|
|
|
2011-07-22 11:11:12 +00:00
|
|
|
VIR_DEBUG("def=%p", vmDef);
|
|
|
|
/*
|
|
|
|
* This makes sure that any new filesystems in the
|
|
|
|
* host OS propagate to the container, but any
|
|
|
|
* changes in the container are private
|
|
|
|
*/
|
2009-04-14 17:51:12 +00:00
|
|
|
if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to make / slave"));
|
2009-04-14 17:51:12 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-05-11 14:09:27 +00:00
|
|
|
if (root && root->readonly) {
|
|
|
|
if (mount("", "/", NULL, MS_BIND|MS_REC|MS_RDONLY|MS_REMOUNT, NULL) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to make root readonly"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
VIR_DEBUG("Mounting config FS");
|
2011-07-22 11:11:12 +00:00
|
|
|
if (lxcContainerMountAllFS(vmDef, "", false) < 0)
|
|
|
|
return -1;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2012-05-11 16:26:48 +00:00
|
|
|
/* Before replacing /sys we need to identify any
|
|
|
|
* cgroups controllers that are mounted */
|
|
|
|
if (lxcContainerIdentifyCGroups(&mounts, &nmounts) < 0)
|
2012-06-19 01:33:20 +00:00
|
|
|
return -1;
|
2012-05-11 16:26:48 +00:00
|
|
|
|
2012-05-11 10:35:28 +00:00
|
|
|
/* Gets rid of any existing stuff under /proc, since we need new
|
|
|
|
* namespace aware versions of those. We must do /proc second
|
|
|
|
* otherwise we won't find /proc/mounts :-) */
|
|
|
|
if (lxcContainerUnmountSubtree("/sys", false) < 0 ||
|
|
|
|
lxcContainerUnmountSubtree("/proc", false) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
2012-05-11 10:35:28 +00:00
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
/* Mounts the core /proc, /sys, etc filesystems */
|
2012-05-10 16:16:11 +00:00
|
|
|
if (lxcContainerMountBasicFS(vmDef, false, securityDriver) < 0)
|
2012-05-11 16:26:48 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* Now we can re-mount the cgroups controllers in the
|
|
|
|
* same configuration as before */
|
|
|
|
if (lxcContainerMountCGroups(mounts, nmounts) < 0)
|
|
|
|
goto cleanup;
|
2008-08-28 22:40:50 +00:00
|
|
|
|
2011-07-22 12:02:05 +00:00
|
|
|
VIR_DEBUG("Mounting completed");
|
2012-05-11 16:26:48 +00:00
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
lxcContainerCGroupFree(mounts, nmounts);
|
|
|
|
return ret;
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2012-01-17 21:33:02 +00:00
|
|
|
|
|
|
|
static int lxcContainerResolveSymlinks(virDomainDefPtr vmDef)
|
|
|
|
{
|
|
|
|
char *newroot;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0 ; i < vmDef->nfss ; i++) {
|
|
|
|
virDomainFSDefPtr fs = vmDef->fss[i];
|
2012-05-08 16:50:48 +00:00
|
|
|
if (!fs->src)
|
|
|
|
continue;
|
2012-01-17 21:33:02 +00:00
|
|
|
if (virFileResolveAllLinks(fs->src, &newroot) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
VIR_DEBUG("Resolved '%s' to %s", fs->src, newroot);
|
|
|
|
|
|
|
|
VIR_FREE(fs->src);
|
|
|
|
fs->src = newroot;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
|
2011-10-20 08:44:31 +00:00
|
|
|
virDomainFSDefPtr root,
|
|
|
|
char **ttyPaths,
|
2012-05-11 10:02:50 +00:00
|
|
|
size_t nttyPaths,
|
|
|
|
virSecurityManagerPtr securityDriver)
|
2008-08-28 22:40:50 +00:00
|
|
|
{
|
2012-01-17 21:33:02 +00:00
|
|
|
if (lxcContainerResolveSymlinks(vmDef) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-06-12 20:26:37 +00:00
|
|
|
if (root && root->src)
|
2012-05-11 10:02:50 +00:00
|
|
|
return lxcContainerSetupPivotRoot(vmDef, root, ttyPaths, nttyPaths, securityDriver);
|
2008-08-28 22:40:50 +00:00
|
|
|
else
|
2012-05-11 14:09:27 +00:00
|
|
|
return lxcContainerSetupExtraMounts(vmDef, root, securityDriver);
|
2008-08-28 22:40:50 +00:00
|
|
|
}
|
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is running as the 'init' process insid the container.
|
|
|
|
* It removes some capabilities that could be dangerous to
|
|
|
|
* host system, since they are not currently "containerized"
|
|
|
|
*/
|
|
|
|
static int lxcContainerDropCapabilities(void)
|
2009-05-11 14:05:27 +00:00
|
|
|
{
|
2009-06-29 17:09:42 +00:00
|
|
|
#if HAVE_CAPNG
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
capng_get_caps_process();
|
|
|
|
|
|
|
|
if ((ret = capng_updatev(CAPNG_DROP,
|
|
|
|
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
|
|
|
|
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
|
|
|
|
CAP_SYS_BOOT, /* No use of reboot */
|
|
|
|
CAP_SYS_MODULE, /* No kernel module loading */
|
|
|
|
CAP_SYS_TIME, /* No changing the clock */
|
|
|
|
CAP_AUDIT_CONTROL, /* No messing with auditing status */
|
|
|
|
CAP_MAC_ADMIN, /* No messing with LSM config */
|
|
|
|
-1 /* sentinal */)) < 0) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to remove capabilities: %d"), ret);
|
2009-06-29 17:09:42 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2009-06-29 17:09:42 +00:00
|
|
|
if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to apply capabilities: %d"), ret);
|
2009-06-29 17:09:42 +00:00
|
|
|
return -1;
|
2009-05-11 14:05:27 +00:00
|
|
|
}
|
2009-06-29 17:09:42 +00:00
|
|
|
|
2009-11-12 11:03:23 +00:00
|
|
|
/* We do not need to call capng_lock() in this case. The bounding
|
|
|
|
* set restriction will prevent them reacquiring sys_boot/module/time,
|
|
|
|
* etc which is all that matters for the container. Once inside the
|
|
|
|
* container it is fine for SECURE_NOROOT / SECURE_NO_SETUID_FIXUP to
|
|
|
|
* be unmasked - they can never escape the bounding set. */
|
2009-06-29 17:09:42 +00:00
|
|
|
|
|
|
|
#else
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
|
2009-05-29 14:27:04 +00:00
|
|
|
#endif
|
2009-05-11 14:05:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-04-10 07:30:52 +00:00
|
|
|
/**
|
2009-11-05 12:35:13 +00:00
|
|
|
* lxcContainerChild:
|
|
|
|
* @data: pointer to container arguments
|
2008-04-10 07:30:52 +00:00
|
|
|
*
|
|
|
|
* This function is run in the process clone()'d in lxcStartContainer.
|
|
|
|
* Perform a number of container setup tasks:
|
|
|
|
* Setup container file system
|
|
|
|
* mount container /proca
|
|
|
|
* Then exec's the container init
|
|
|
|
*
|
|
|
|
* Returns 0 on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 10:25:34 +00:00
|
|
|
static int lxcContainerChild( void *data )
|
2008-04-10 07:30:52 +00:00
|
|
|
{
|
2008-08-13 10:14:47 +00:00
|
|
|
lxc_child_argv_t *argv = data;
|
2008-08-13 12:50:55 +00:00
|
|
|
virDomainDefPtr vmDef = argv->config;
|
2011-06-02 15:01:36 +00:00
|
|
|
int ttyfd = -1;
|
2011-05-06 14:50:00 +00:00
|
|
|
int ret = -1;
|
2011-06-02 15:01:36 +00:00
|
|
|
char *ttyPath = NULL;
|
2009-04-22 14:26:50 +00:00
|
|
|
virDomainFSDefPtr root;
|
2011-05-06 14:50:00 +00:00
|
|
|
virCommandPtr cmd = NULL;
|
2008-04-10 07:30:52 +00:00
|
|
|
|
|
|
|
if (NULL == vmDef) {
|
2010-02-09 18:22:56 +00:00
|
|
|
lxcError(VIR_ERR_INTERNAL_ERROR,
|
avoid many format string warnings
Building with --disable-nls exposed many new warnings like these:
virsh.c:4952: warning: format not a string literal and no format ...
util.c:163: warning: format not a string literal and no format arguments
All but one of the following changes add a "%s" argument before
the offending _(...) argument.
This was the only manual change:
* src/lxc_driver.c (lxcVersion): Use %s and strerror(errno)
rather than %m, to avoid a warning from gcc -Wformat-security.
Add "%s" before each warned about format-string-with-no-%-directive:
* src/domain_conf.c (virDomainHostdevSubsysUsbDefParseXML)
(virDomainDefParseString, virDomainDefParseFile):
* src/hash.c (virGetConnect, __virGetDomain, virReleaseDomain)
(__virGetNetwork, virReleaseNetwork, __virGetStoragePool)
(virReleaseStoragePool, __virGetStorageVol, virReleaseStorageVol):
* src/lxc_container.c (lxcContainerChild):
* src/lxc_driver.c (lxcDomainDefine, lxcDomainUndefine)
(lxcDomainGetInfo, lxcGetOSType, lxcDomainDumpXML)
(lxcSetupInterfaces, lxcDomainStart, lxcDomainCreateAndStart)
(lxcVersion, lxcGetSchedulerParameters):
* src/network_conf.c (virNetworkDefParseString)
(virNetworkDefParseFile):
* src/openvz_conf.c (openvzReadNetworkConf, openvzLoadDomains):
* src/openvz_driver.c (openvzDomainDefineCmd)
(openvzDomainGetInfo, openvzDomainDumpXML, openvzDomainShutdown)
(openvzDomainReboot, ADD_ARG_LIT, openvzDomainDefineXML)
(openvzDomainCreateXML, openvzDomainCreate, openvzDomainUndefine)
(openvzDomainSetAutostart, openvzDomainGetAutostart)
(openvzDomainSetVcpus):
* src/qemu_driver.c (qemudDomainBlockPeek, qemudDomainMemoryPeek):
* src/remote_internal.c (remoteDomainBlockPeek)
(remoteDomainMemoryPeek, remoteAuthPolkit):
* src/sexpr.c (sexpr_new, _string2sexpr):
* src/storage_backend_disk.c (virStorageBackendDiskMakeDataVol)
(virStorageBackendDiskCreateVol):
* src/storage_backend_fs.c
(virStorageBackendFileSystemNetFindPoolSources):
* src/storage_backend_logical.c (virStorageBackendLogicalFindLVs)
(virStorageBackendLogicalFindPoolSources):
* src/test.c (testOpenDefault, testOpenFromFile, testOpen)
(testGetDomainInfo, testDomainRestore)
(testNodeGetCellsFreeMemory):
* src/util.c (virExec):
* src/virsh.c (cmdAttachDevice, cmdDetachDevice)
(cmdAttachInterface, cmdDetachInterface, cmdAttachDisk)
(cmdDetachDisk, cmdEdit):
* src/xend_internal.c (do_connect, wr_sync, xend_op_ext)
(urlencode, xenDaemonDomainCreateXML)
(xenDaemonDomainLookupByName_ids, xenDaemonDomainLookupByID)
(xenDaemonParseSxprOS, xend_parse_sexp_desc_char)
(xenDaemonParseSxprChar, xenDaemonParseSxprDisks)
(xenDaemonParseSxpr, sexpr_to_xend_topology, sexpr_to_domain)
(xenDaemonDomainFetch, xenDaemonDomainGetAutostart)
(xenDaemonDomainSetAutostart, xenDaemonDomainMigratePerform)
(xenDaemonDomainDefineXML, xenDaemonGetSchedulerType)
(xenDaemonGetSchedulerParameters)
(xenDaemonSetSchedulerParameters, xenDaemonDomainBlockPeek)
(xenDaemonFormatSxprChr, virDomainXMLDevID):
* src/xm_internal.c (xenXMConfigCacheRefresh, xenXMDomainPinVcpu)
(xenXMDomainCreate, xenXMDomainDefineXML)
(xenXMDomainAttachDevice, xenXMDomainDetachDevice):
* src/xml.c (virXPathString, virXPathNumber, virXPathLong)
(virXPathULong, virXPathBoolean, virXPathNode, virXPathNodeSet):
* src/xs_internal.c (xenStoreOpen):
2008-10-13 16:46:28 +00:00
|
|
|
"%s", _("lxcChild() passed invalid vm definition"));
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
cmd = lxcContainerBuildInitCmd(vmDef);
|
|
|
|
virCommandWriteArgLog(cmd, 1);
|
|
|
|
|
2009-04-22 14:26:50 +00:00
|
|
|
root = virDomainGetRootFilesystem(vmDef);
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2011-10-20 08:44:31 +00:00
|
|
|
if (argv->nttyPaths) {
|
|
|
|
if (root) {
|
|
|
|
if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPaths[0]) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!(ttyPath = strdup(argv->ttyPaths[0]))) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
} else {
|
2011-10-20 08:44:31 +00:00
|
|
|
if (!(ttyPath = strdup("/dev/null"))) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
}
|
|
|
|
}
|
2011-10-20 08:44:31 +00:00
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
VIR_DEBUG("Container TTY path: %s", ttyPath);
|
2009-04-22 14:26:50 +00:00
|
|
|
|
|
|
|
ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
|
2008-08-28 22:40:50 +00:00
|
|
|
if (ttyfd < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno,
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to open tty %s"),
|
2009-04-22 14:26:50 +00:00
|
|
|
ttyPath);
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2012-05-11 10:02:50 +00:00
|
|
|
if (lxcContainerSetupMounts(vmDef, root,
|
|
|
|
argv->ttyPaths, argv->nttyPaths,
|
|
|
|
argv->securityDriver) < 0)
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2009-04-22 14:26:50 +00:00
|
|
|
|
2011-06-02 19:25:21 +00:00
|
|
|
if (!virFileExists(vmDef->os.init)) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("cannot find init path '%s' relative to container root"),
|
|
|
|
vmDef->os.init);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2008-06-26 16:09:48 +00:00
|
|
|
/* Wait for interface devices to show up */
|
2011-06-02 15:18:14 +00:00
|
|
|
if (lxcContainerWaitForContinue(argv->monitor) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("Failed to read the container continue message"));
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2011-06-02 15:18:14 +00:00
|
|
|
}
|
|
|
|
VIR_DEBUG("Received container continue message");
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2009-11-05 13:11:30 +00:00
|
|
|
/* rename and enable interfaces */
|
2012-01-18 11:38:49 +00:00
|
|
|
if (lxcContainerRenameAndEnableInterfaces(!!(vmDef->features &
|
|
|
|
(1 << VIR_DOMAIN_FEATURE_PRIVNET)),
|
|
|
|
argv->nveths,
|
2011-06-02 15:01:36 +00:00
|
|
|
argv->veths) < 0) {
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2011-06-02 15:01:36 +00:00
|
|
|
}
|
2008-06-26 16:09:48 +00:00
|
|
|
|
2009-05-11 14:05:27 +00:00
|
|
|
/* drop a set of root capabilities */
|
2009-06-29 17:09:42 +00:00
|
|
|
if (lxcContainerDropCapabilities() < 0)
|
2011-05-06 14:50:00 +00:00
|
|
|
goto cleanup;
|
2009-05-11 14:05:27 +00:00
|
|
|
|
2011-06-02 15:52:32 +00:00
|
|
|
if (lxcContainerSendContinue(argv->handshakefd) < 0) {
|
|
|
|
virReportSystemError(errno, "%s",
|
|
|
|
_("failed to send continue signal to controller"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2012-01-25 14:12:53 +00:00
|
|
|
VIR_DEBUG("Setting up security labeling");
|
|
|
|
if (virSecurityManagerSetProcessLabel(argv->securityDriver, vmDef) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2012-05-01 09:48:52 +00:00
|
|
|
if (lxcContainerSetStdio(argv->monitor, ttyfd, argv->handshakefd) < 0) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2011-06-02 15:01:36 +00:00
|
|
|
ret = 0;
|
2011-05-06 14:50:00 +00:00
|
|
|
cleanup:
|
2011-06-02 15:01:36 +00:00
|
|
|
VIR_FREE(ttyPath);
|
|
|
|
VIR_FORCE_CLOSE(ttyfd);
|
2011-06-02 15:18:14 +00:00
|
|
|
VIR_FORCE_CLOSE(argv->monitor);
|
2011-06-02 15:52:32 +00:00
|
|
|
VIR_FORCE_CLOSE(argv->handshakefd);
|
2011-06-02 15:01:36 +00:00
|
|
|
|
|
|
|
if (ret == 0) {
|
2011-10-10 20:02:06 +00:00
|
|
|
/* this function will only return if an error occurred */
|
2011-06-02 15:01:36 +00:00
|
|
|
ret = virCommandExec(cmd);
|
|
|
|
}
|
|
|
|
|
2011-05-06 14:50:00 +00:00
|
|
|
virCommandFree(cmd);
|
|
|
|
return ret;
|
2008-08-13 10:25:34 +00:00
|
|
|
}
|
2008-04-10 07:30:52 +00:00
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
static int userns_supported(void)
|
|
|
|
{
|
Don't use CLONE_NEWUSER for now
Until now, user namespaces have not done much, but (for that
reason) have been innocuous to glob in with other CLONE_
flags. Upcoming userns development, however, will make tasks
cloned with CLONE_NEWUSER far more restricted. In particular,
for some time they will be unable to access files with anything
other than the world access perms.
This patch assumes that noone really needs the user namespaces
to be enabled. If that is wrong, then we can try a more
baroque patch where we create a file owned by a test userid with
700 perms and, if we can't access it after setuid'ing to that
userid, then return 0. Otherwise, assume we are using an
older, 'harmless' user namespace implementation.
Comments appreciated. Is it ok to do this?
Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
2011-02-09 02:58:24 +00:00
|
|
|
#if 1
|
|
|
|
/*
|
|
|
|
* put off using userns until uid mapping is implemented
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
#else
|
2009-04-20 12:27:12 +00:00
|
|
|
return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
|
Don't use CLONE_NEWUSER for now
Until now, user namespaces have not done much, but (for that
reason) have been innocuous to glob in with other CLONE_
flags. Upcoming userns development, however, will make tasks
cloned with CLONE_NEWUSER far more restricted. In particular,
for some time they will be unable to access files with anything
other than the world access perms.
This patch assumes that noone really needs the user namespaces
to be enabled. If that is wrong, then we can try a more
baroque patch where we create a file owned by a test userid with
700 perms and, if we can't access it after setuid'ing to that
userid, then return 0. Otherwise, assume we are using an
older, 'harmless' user namespace implementation.
Comments appreciated. Is it ok to do this?
Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
2011-02-09 02:58:24 +00:00
|
|
|
#endif
|
2009-04-20 12:27:12 +00:00
|
|
|
}
|
|
|
|
|
2011-02-23 17:17:53 +00:00
|
|
|
const char *lxcContainerGetAlt32bitArch(const char *arch)
|
|
|
|
{
|
|
|
|
/* Any Linux 64bit arch which has a 32bit
|
|
|
|
* personality available should be listed here */
|
|
|
|
if (STREQ(arch, "x86_64"))
|
|
|
|
return "i686";
|
|
|
|
if (STREQ(arch, "s390x"))
|
|
|
|
return "s390";
|
|
|
|
if (STREQ(arch, "ppc64"))
|
|
|
|
return "ppc";
|
|
|
|
if (STREQ(arch, "parisc64"))
|
|
|
|
return "parisc";
|
|
|
|
if (STREQ(arch, "sparc64"))
|
|
|
|
return "sparc";
|
|
|
|
if (STREQ(arch, "mips64"))
|
|
|
|
return "mips";
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
/**
|
|
|
|
* lxcContainerStart:
|
2009-11-05 12:35:13 +00:00
|
|
|
* @def: pointer to virtual machine structure
|
|
|
|
* @nveths: number of interfaces
|
|
|
|
* @veths: interface names
|
|
|
|
* @control: control FD to the container
|
|
|
|
* @ttyPath: path of tty to set as the container console
|
2008-08-13 10:25:34 +00:00
|
|
|
*
|
|
|
|
* Starts a container process by calling clone() with the namespace flags
|
|
|
|
*
|
|
|
|
* Returns PID of container on success or -1 in case of error
|
|
|
|
*/
|
2008-08-13 12:50:55 +00:00
|
|
|
int lxcContainerStart(virDomainDefPtr def,
|
2012-01-25 14:12:53 +00:00
|
|
|
virSecurityManagerPtr securityDriver,
|
2008-08-13 10:52:15 +00:00
|
|
|
unsigned int nveths,
|
|
|
|
char **veths,
|
2008-08-13 10:25:34 +00:00
|
|
|
int control,
|
2011-06-02 15:52:32 +00:00
|
|
|
int handshakefd,
|
2011-10-20 08:44:31 +00:00
|
|
|
char **ttyPaths,
|
|
|
|
size_t nttyPaths)
|
2008-08-13 10:25:34 +00:00
|
|
|
{
|
|
|
|
pid_t pid;
|
2011-07-06 22:33:53 +00:00
|
|
|
int cflags;
|
2008-08-13 10:25:34 +00:00
|
|
|
int stacksize = getpagesize() * 4;
|
|
|
|
char *stack, *stacktop;
|
2012-01-25 14:12:53 +00:00
|
|
|
lxc_child_argv_t args = { def, securityDriver,
|
|
|
|
nveths, veths, control,
|
2011-10-20 08:44:31 +00:00
|
|
|
ttyPaths, nttyPaths, handshakefd};
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
/* allocate a stack for the container */
|
|
|
|
if (VIR_ALLOC_N(stack, stacksize) < 0) {
|
2010-02-04 18:19:08 +00:00
|
|
|
virReportOOMError();
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
stacktop = stack + stacksize;
|
|
|
|
|
2011-07-06 22:33:53 +00:00
|
|
|
cflags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD;
|
2009-04-20 12:27:12 +00:00
|
|
|
|
2010-03-04 11:23:28 +00:00
|
|
|
if (userns_supported()) {
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_DEBUG("Enable user namespaces");
|
2011-07-06 22:33:53 +00:00
|
|
|
cflags |= CLONE_NEWUSER;
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2012-01-18 11:38:49 +00:00
|
|
|
if (def->nets != NULL ||
|
|
|
|
(def->features & (1 << VIR_DOMAIN_FEATURE_PRIVNET))) {
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_DEBUG("Enable network namespaces");
|
2011-07-06 22:33:53 +00:00
|
|
|
cflags |= CLONE_NEWNET;
|
2010-03-04 11:23:28 +00:00
|
|
|
}
|
2008-08-13 10:25:34 +00:00
|
|
|
|
2011-07-06 22:33:53 +00:00
|
|
|
pid = clone(lxcContainerChild, stacktop, cflags, &args);
|
2008-08-13 10:25:34 +00:00
|
|
|
VIR_FREE(stack);
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("clone() completed, new container PID is %d", pid);
|
2008-08-13 10:25:34 +00:00
|
|
|
|
|
|
|
if (pid < 0) {
|
2010-02-04 20:02:58 +00:00
|
|
|
virReportSystemError(errno, "%s",
|
2009-11-05 12:39:09 +00:00
|
|
|
_("Failed to run clone container"));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
|
2010-07-16 16:16:19 +00:00
|
|
|
ATTRIBUTE_NORETURN static int
|
|
|
|
lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
|
2008-08-13 10:25:34 +00:00
|
|
|
{
|
|
|
|
_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int lxcContainerAvailable(int features)
|
|
|
|
{
|
2009-04-20 12:27:12 +00:00
|
|
|
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
|
2008-08-13 10:25:34 +00:00
|
|
|
CLONE_NEWIPC|SIGCHLD;
|
|
|
|
int cpid;
|
|
|
|
char *childStack;
|
|
|
|
char *stack;
|
|
|
|
|
2009-04-20 12:27:12 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_USER)
|
|
|
|
flags |= CLONE_NEWUSER;
|
|
|
|
|
2008-08-13 10:25:34 +00:00
|
|
|
if (features & LXC_CONTAINER_FEATURE_NET)
|
|
|
|
flags |= CLONE_NEWNET;
|
|
|
|
|
|
|
|
if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_DEBUG("Unable to allocate stack");
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
childStack = stack + (getpagesize() * 4);
|
|
|
|
|
|
|
|
cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
|
|
|
|
VIR_FREE(stack);
|
|
|
|
if (cpid < 0) {
|
2011-09-26 16:51:47 +00:00
|
|
|
char ebuf[1024] ATTRIBUTE_UNUSED;
|
2011-02-16 23:37:57 +00:00
|
|
|
VIR_DEBUG("clone call returned %s, container support is not enabled",
|
2012-03-29 09:52:04 +00:00
|
|
|
virStrerror(errno, ebuf, sizeof(ebuf)));
|
2008-08-13 10:25:34 +00:00
|
|
|
return -1;
|
2011-10-21 17:09:23 +00:00
|
|
|
} else if (virPidWait(cpid, NULL) < 0) {
|
|
|
|
return -1;
|
2008-08-13 10:25:34 +00:00
|
|
|
}
|
|
|
|
|
2012-06-15 07:41:05 +00:00
|
|
|
VIR_DEBUG("container support is enabled");
|
2008-08-13 10:25:34 +00:00
|
|
|
return 0;
|
2008-04-10 07:30:52 +00:00
|
|
|
}
|