diff --git a/ChangeLog b/ChangeLog index 13645f7914..2dfcadeef3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Wed Apr 22 15:27:03 BST 2009 Daniel P. Berrange + + Use private /dev/pts instance for containers (needs 'newinstance' + mount flag for devpts since 2.6.29 kernels), fallback to shared + instance if not supported + * src/domain_conf.h, src/domain_conf.c: Add a convenient + routine virDomainGetRootFilesystem() + * src/libvirt_private.sym: export virDomainGetRootFilesystem + to drivers + * src/util.c, src/util.h: Add virFileOpenTtyAt() to allow + alternate path to /dev/ptmx to be given + * src/lxc_controller.c: Attempt to setup a private /dev/pts + instance for the container's stdio I/O + * src/lxc_container.h, src/lxc_container.c: Pull in the + private /dev/pts instance setup by controller, and create + a symlink for /dev/ptmx. + Tue Apr 21 20:14:03 BST 2009 Daniel P. Berrange * src/qemu_driver.c: Remove pidfile when domain shuts down and diff --git a/src/domain_conf.c b/src/domain_conf.c index 648d9e9622..dde4020642 100644 --- a/src/domain_conf.c +++ b/src/domain_conf.c @@ -3066,7 +3066,8 @@ static int virDomainChrDefFormat(virConnectPtr conn, virBufferPtr buf, virDomainChrDefPtr def, - const char *name) + const char *name, + int flags) { const char *type = virDomainChrTypeToString(def->type); @@ -3081,6 +3082,7 @@ virDomainChrDefFormat(virConnectPtr conn, name, type); if (STREQ(name, "console") && def->type == VIR_DOMAIN_CHR_TYPE_PTY && + !(flags & VIR_DOMAIN_XML_INACTIVE) && def->data.file.path) { virBufferEscapeString(buf, " tty='%s'>\n", def->data.file.path); @@ -3100,7 +3102,7 @@ virDomainChrDefFormat(virConnectPtr conn, case VIR_DOMAIN_CHR_TYPE_FILE: case VIR_DOMAIN_CHR_TYPE_PIPE: if (def->type != VIR_DOMAIN_CHR_TYPE_PTY || - def->data.file.path) { + (def->data.file.path && !(flags & VIR_DOMAIN_XML_INACTIVE))) { virBufferEscapeString(buf, " \n", def->data.file.path); } @@ -3481,21 +3483,21 @@ char *virDomainDefFormat(virConnectPtr conn, goto cleanup; for (n = 0 ; n < def->nserials ; n++) - if (virDomainChrDefFormat(conn, &buf, def->serials[n], "serial") < 0) + if (virDomainChrDefFormat(conn, &buf, def->serials[n], "serial", flags) < 0) goto cleanup; for (n = 0 ; n < def->nparallels ; n++) - if (virDomainChrDefFormat(conn, &buf, def->parallels[n], "parallel") < 0) + if (virDomainChrDefFormat(conn, &buf, def->parallels[n], "parallel", flags) < 0) goto cleanup; /* If there's a PV console that's preferred.. */ if (def->console) { - if (virDomainChrDefFormat(conn, &buf, def->console, "console") < 0) + if (virDomainChrDefFormat(conn, &buf, def->console, "console", flags) < 0) goto cleanup; } else if (def->nserials != 0) { /* ..else for legacy compat duplicate the first serial device as a * console */ - if (virDomainChrDefFormat(conn, &buf, def->serials[0], "console") < 0) + if (virDomainChrDefFormat(conn, &buf, def->serials[0], "console", flags) < 0) goto cleanup; } @@ -3856,6 +3858,21 @@ const char *virDomainDefDefaultEmulator(virConnectPtr conn, return emulator; } +virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def) +{ + int i; + + for (i = 0 ; i < def->nfss ; i++) { + if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + + if (STREQ(def->fss[i]->dst, "/")) + return def->fss[i]; + } + + return NULL; +} + void virDomainObjLock(virDomainObjPtr obj) { diff --git a/src/domain_conf.h b/src/domain_conf.h index f4eea6bf56..d4e7442d4e 100644 --- a/src/domain_conf.h +++ b/src/domain_conf.h @@ -636,6 +636,8 @@ const char *virDomainDefDefaultEmulator(virConnectPtr conn, virDomainDefPtr def, virCapsPtr caps); +virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def); + void virDomainObjLock(virDomainObjPtr obj); void virDomainObjUnlock(virDomainObjPtr obj); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index b6ac8e00c4..d3694e0ffe 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -79,6 +79,7 @@ virDomainDiskQSort; virDomainFindByID; virDomainFindByName; virDomainFindByUUID; +virDomainGetRootFilesystem; virDomainGraphicsTypeFromString; virDomainGraphicsDefFree; virDomainInputDefFree; diff --git a/src/lxc_container.c b/src/lxc_container.c index 26cd619398..3946b84f06 100644 --- a/src/lxc_container.c +++ b/src/lxc_container.c @@ -308,7 +308,7 @@ static int lxcContainerPivotRoot(virDomainFSDefPtr root) /* Create a tmpfs root since old and new roots must be * on separate filesystems */ - if (mount("", oldroot, "tmpfs", 0, NULL) < 0) { + if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) { virReportSystemError(NULL, errno, _("failed to mount empty tmpfs at %s"), oldroot); @@ -338,15 +338,9 @@ static int lxcContainerPivotRoot(virDomainFSDefPtr root) /* Now we chroot into the tmpfs, then pivot into the * root->src bind-mounted onto '/new' */ - if (chroot(oldroot) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to chroot into tmpfs")); - goto err; - } - - if (chdir("/new") < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to chdir into /new on tmpfs")); + if (chdir(newroot) < 0) { + virReportSystemError(NULL, errno, + _("failed to chroot into %s"), newroot); goto err; } @@ -362,12 +356,6 @@ static int lxcContainerPivotRoot(virDomainFSDefPtr root) if (chdir("/") < 0) goto err; - if (umount2(".oldroot", MNT_DETACH) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to lazily unmount old root")); - goto err; - } - ret = 0; err: @@ -377,10 +365,64 @@ err: return ret; } + +static int lxcContainerMountBasicFS(virDomainFSDefPtr root) +{ + const struct { + const char *src; + const char *dst; + const char *type; + } mnts[] = { + { "/dev", "/dev", "tmpfs" }, + { "/proc", "/proc", "proc" }, + { "/sys", "/sys", "sysfs" }, +#if WITH_SELINUX + { "none", "/selinux", "selinuxfs" }, +#endif + }; + int i, rc; + char *devpts; + + if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) { + virReportOOMError(NULL); + return -1; + } + + for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) { + if (virFileMakePath(mnts[i].dst) < 0) { + virReportSystemError(NULL, errno, + _("failed to mkdir %s"), + mnts[i].src); + return -1; + } + if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) { + virReportSystemError(NULL, errno, + _("failed to mount %s on %s"), + mnts[i].type, mnts[i].type); + return -1; + } + } + + if ((rc = virFileMakePath("/dev/pts") < 0)) { + virReportSystemError(NULL, rc, "%s", + _("cannot create /dev/pts")); + return -1; + } + + VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts"); + if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to mount /dev/pts in container")); + return -1; + } + VIR_FREE(devpts); + + return 0; +} + static int lxcContainerPopulateDevices(void) { int i; - int rc; const struct { int maj; int min; @@ -395,33 +437,6 @@ static int lxcContainerPopulateDevices(void) { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" }, }; - if ((rc = virFileMakePath("/dev")) < 0) { - virReportSystemError(NULL, rc, "%s", - _("cannot create /dev/")); - return -1; - } - if (mount("none", "/dev", "tmpfs", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /dev tmpfs")); - return -1; - } - /* Move old devpts into container, since we have to - connect to the master ptmx which was opened in - the parent. - XXX This sucks, we need to figure out how to get our - own private devpts for isolation - */ - if ((rc = virFileMakePath("/dev/pts") < 0)) { - virReportSystemError(NULL, rc, "%s", - _("cannot create /dev/pts")); - return -1; - } - if (mount("devpts", "/dev/pts", "devpts", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /dev/pts in container")); - return -1; - } - /* Populate /dev/ with a few important bits */ for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) { dev_t dev = makedev(devs[i].maj, devs[i].min); @@ -434,6 +449,23 @@ static int lxcContainerPopulateDevices(void) } } + if (access("/dev/pts/ptmx", W_OK) == 0) { + if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to create symlink /dev/ptmx to /dev/pts/ptmx")); + return -1; + } + } else { + dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX); + if (mknod("/dev/ptmx", 0, dev) < 0 || + chmod("/dev/ptmx", 0666)) { + virReportSystemError(NULL, errno, "%s", + _("failed to make device /dev/ptmx")); + return -1; + } + } + + return 0; } @@ -493,6 +525,7 @@ static int lxcContainerUnmountOldFS(void) return -1; } while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) { + VIR_DEBUG("Got %s", mntent.mnt_dir); if (!STRPREFIX(mntent.mnt_dir, "/.oldroot")) continue; @@ -513,6 +546,7 @@ static int lxcContainerUnmountOldFS(void) lxcContainerChildMountSort); for (i = 0 ; i < nmounts ; i++) { + VIR_DEBUG("Umount %s", mounts[i]); if (umount(mounts[i]) < 0) { virReportSystemError(NULL, errno, _("failed to unmount '%s'"), @@ -534,22 +568,23 @@ static int lxcContainerUnmountOldFS(void) static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, virDomainFSDefPtr root) { + /* Gives us a private root, leaving all parent OS mounts on /.oldroot */ if (lxcContainerPivotRoot(root) < 0) return -1; - if (virFileMakePath("/proc") < 0 || - mount("none", "/proc", "proc", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /proc")); + /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */ + if (lxcContainerMountBasicFS(root) < 0) return -1; - } + /* Populates device nodes in /dev/ */ if (lxcContainerPopulateDevices() < 0) return -1; + /* Sets up any non-root mounts from guest config */ if (lxcContainerMountNewFS(vmDef) < 0) return -1; + /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */ if (lxcContainerUnmountOldFS() < 0) return -1; @@ -595,18 +630,9 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef) return 0; } -static int lxcContainerSetupMounts(virDomainDefPtr vmDef) +static int lxcContainerSetupMounts(virDomainDefPtr vmDef, + virDomainFSDefPtr root) { - int i; - virDomainFSDefPtr root = NULL; - - for (i = 0 ; i < vmDef->nfss ; i++) { - if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT) - continue; - if (STREQ(vmDef->fss[i]->dst, "/")) - root = vmDef->fss[i]; - } - if (root) return lxcContainerSetupPivotRoot(vmDef, root); else @@ -630,6 +656,8 @@ static int lxcContainerChild( void *data ) lxc_child_argv_t *argv = data; virDomainDefPtr vmDef = argv->config; int ttyfd; + char *ttyPath; + virDomainFSDefPtr root; if (NULL == vmDef) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -637,16 +665,28 @@ static int lxcContainerChild( void *data ) return -1; } - if (lxcContainerSetupMounts(vmDef) < 0) - return -1; + root = virDomainGetRootFilesystem(vmDef); - ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY); + if (root) { + if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) { + virReportOOMError(NULL); + return -1; + } + } else { + if (!(ttyPath = strdup(argv->ttyPath))) { + virReportOOMError(NULL); + return -1; + } + } + + ttyfd = open(ttyPath, O_RDWR|O_NOCTTY); if (ttyfd < 0) { virReportSystemError(NULL, errno, - _("failed to open %s"), - argv->ttyPath); + _("failed to open tty %s"), + ttyPath); return -1; } + VIR_FREE(ttyPath); if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) { close(ttyfd); @@ -654,6 +694,9 @@ static int lxcContainerChild( void *data ) } close(ttyfd); + if (lxcContainerSetupMounts(vmDef, root) < 0) + return -1; + /* Wait for interface devices to show up */ if (lxcContainerWaitForContinue(argv->monitor) < 0) return -1; diff --git a/src/lxc_container.h b/src/lxc_container.h index b99e83e536..a1dd5a16f9 100644 --- a/src/lxc_container.h +++ b/src/lxc_container.h @@ -40,6 +40,7 @@ enum { #define LXC_DEV_MAJ_TTY 5 #define LXC_DEV_MIN_CONSOLE 1 +#define LXC_DEV_MIN_PTMX 2 #define LXC_DEV_MAJ_PTY 136 diff --git a/src/lxc_controller.c b/src/lxc_controller.c index 58dfe023d8..3f9add2cc1 100644 --- a/src/lxc_controller.c +++ b/src/lxc_controller.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "virterror_internal.h" #include "logging.h" @@ -426,6 +427,13 @@ static int lxcControllerCleanupInterfaces(unsigned int nveths, return 0; } +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) +#endif static int lxcControllerRun(virDomainDefPtr def, @@ -440,6 +448,9 @@ lxcControllerRun(virDomainDefPtr def, int containerPty; char *containerPtyPath; pid_t container = -1; + virDomainFSDefPtr root; + char *devpts = NULL; + char *devptmx = NULL; if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { virReportSystemError(NULL, errno, "%s", @@ -447,14 +458,91 @@ lxcControllerRun(virDomainDefPtr def, goto cleanup; } - if (virFileOpenTty(&containerPty, - &containerPtyPath, - 0) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to allocate tty")); - goto cleanup; + root = virDomainGetRootFilesystem(def); + + /* + * If doing a chroot style setup, we need to prepare + * a private /dev/pts for the child now, which they + * will later move into position. + * + * This is complex because 'virsh console' needs to + * use /dev/pts from the host OS, and the guest OS + * needs to use /dev/pts from the guest. + * + * This means that we (libvirt_lxc) need to see and + * use both /dev/pts instances. We're running in the + * host OS context though and don't want to expose + * the guest OS /dev/pts there. + * + * Thus we call unshare(CLONE_NS) so that we can see + * the guest's new /dev/pts, without it becoming + * visible to the host OS. We also put the root FS + * into slave mode, just in case it was currently + * marked as shared + */ + if (root) { + VIR_DEBUG0("Setting up private /dev/pts"); + if (unshare(CLONE_NEWNS) < 0) { + virReportSystemError(NULL, errno, "%s", + _("cannot unshare mount namespace")); + goto cleanup; + } + + if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to switch root mount into slave mode")); + goto cleanup; + } + + if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 || + virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) { + virReportOOMError(NULL); + goto cleanup; + } + + if (virFileMakePath(devpts) < 0) { + virReportSystemError(NULL, errno, + _("failed to make path %s"), + devpts); + goto cleanup; + } + + VIR_DEBUG("Mouting 'devpts' on %s", devpts); + if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) { + virReportSystemError(NULL, errno, + _("failed to mount devpts on %s"), + devpts); + goto cleanup; + } + + if (access(devptmx, R_OK) < 0) { + VIR_WARN0("kernel does not support private devpts, using shared devpts"); + VIR_FREE(devptmx); + } } + if (devptmx) { + VIR_DEBUG("Opening tty on private %s", devptmx); + if (virFileOpenTtyAt(devptmx, + &containerPty, + &containerPtyPath, + 0) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to allocate tty")); + goto cleanup; + } + } else { + VIR_DEBUG0("Opening tty on shared /dev/ptmx"); + if (virFileOpenTty(&containerPty, + &containerPtyPath, + 0) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to allocate tty")); + goto cleanup; + } + } + + if (lxcSetContainerResources(def) < 0) goto cleanup; @@ -476,6 +564,8 @@ lxcControllerRun(virDomainDefPtr def, rc = lxcControllerMain(monitor, client, appPty, containerPty); cleanup: + VIR_FREE(devptmx); + VIR_FREE(devpts); if (control[0] != -1) close(control[0]); if (control[1] != -1) diff --git a/src/util.c b/src/util.c index 5abdbbcec4..b7e0362406 100644 --- a/src/util.c +++ b/src/util.c @@ -1050,14 +1050,25 @@ int virFileBuildPath(const char *dir, } -#ifdef __linux__ int virFileOpenTty(int *ttymaster, char **ttyName, int rawmode) +{ + return virFileOpenTtyAt("/dev/ptmx", + ttymaster, + ttyName, + rawmode); +} + +#ifdef __linux__ +int virFileOpenTtyAt(const char *ptmx, + int *ttymaster, + char **ttyName, + int rawmode) { int rc = -1; - if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0) + if ((*ttymaster = open(ptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0) goto cleanup; if (unlockpt(*ttymaster) < 0) @@ -1100,9 +1111,10 @@ cleanup: } #else -int virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED, - char **ttyName ATTRIBUTE_UNUSED, - int rawmode ATTRIBUTE_UNUSED) +int virFileOpenTtyAt(const char *ptmx ATTRIBUTE_UNUSED, + int *ttymaster ATTRIBUTE_UNUSED, + char **ttyName ATTRIBUTE_UNUSED, + int rawmode ATTRIBUTE_UNUSED) { return -1; } diff --git a/src/util.h b/src/util.h index 6fe03b634f..4bed0778d8 100644 --- a/src/util.h +++ b/src/util.h @@ -103,6 +103,10 @@ int virFileBuildPath(const char *dir, int virFileOpenTty(int *ttymaster, char **ttyName, int rawmode); +int virFileOpenTtyAt(const char *ptmx, + int *ttymaster, + char **ttyName, + int rawmode); char* virFilePid(const char *dir, const char *name);