From 518c2144e512a8f5813e45e8594eba2e39f78b69 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 13 Aug 2008 10:25:34 +0000 Subject: [PATCH] Re-arrange code between LXC driver files --- src/Makefile.am | 1 + src/lxc_container.c | 220 ++++++++++++++++++++------- src/lxc_container.h | 19 ++- src/lxc_controller.c | 205 +++++++++++++++++++++++++ src/lxc_controller.h | 33 +++++ src/lxc_driver.c | 345 +++---------------------------------------- 6 files changed, 434 insertions(+), 389 deletions(-) create mode 100644 src/lxc_controller.c create mode 100644 src/lxc_controller.h diff --git a/src/Makefile.am b/src/Makefile.am index 5d238cd98c..532b940a2a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -64,6 +64,7 @@ CLIENT_SOURCES = \ openvz_conf.c openvz_conf.h \ openvz_driver.c openvz_driver.h \ lxc_driver.c lxc_driver.h \ + lxc_controller.c lxc_controller.h \ lxc_conf.c lxc_conf.h \ lxc_container.c lxc_container.h \ veth.c veth.h \ diff --git a/src/lxc_container.c b/src/lxc_container.c index ff976d9e4c..510a113054 100644 --- a/src/lxc_container.c +++ b/src/lxc_container.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "lxc_container.h" @@ -40,49 +41,69 @@ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) +/* + * GLibc headers are behind the kernel, so we define these + * constants if they're not present already. + */ + +#ifndef CLONE_NEWPID +#define CLONE_NEWPID 0x20000000 +#endif +#ifndef CLONE_NEWUTS +#define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWIPC +#define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 /* New network namespace */ +#endif + +/* messages between parent and container */ +typedef char lxc_message_t; +#define LXC_CONTINUE_MSG 'c' + +typedef struct __lxc_child_argv lxc_child_argv_t; +struct __lxc_child_argv { + lxc_vm_def_t *config; + int monitor; + char *ttyPath; +}; + + /** - * lxcExecContainerInit: + * lxcContainerExecInit: * @vmDef: Ptr to vm definition structure * - * Exec the container init string. The container init will replace then + * Exec the container init string. The container init will replace then * be running in the current process * - * Returns 0 on success or -1 in case of error + * Does not return */ -static int lxcExecContainerInit(const lxc_vm_def_t *vmDef) +static int lxcContainerExecInit(const lxc_vm_def_t *vmDef) { - int rc = -1; - char* execString; - size_t execStringLen = strlen(vmDef->init) + 1 + 5; + const char *const argv[] = { + vmDef->init, + NULL, + }; - if (VIR_ALLOC_N(execString, execStringLen) < 0) { - lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, - _("failed to calloc memory for init string: %s"), - strerror(errno)); - goto error_out; - } - - strcpy(execString, "exec "); - strcat(execString, vmDef->init); - - execl("/bin/sh", "sh", "-c", execString, (char*)NULL); - lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, - _("execl failed to exec init: %s"), strerror(errno)); - -error_out: - exit(rc); + return execve(argv[0], (char **)argv, NULL); } /** - * lxcSetContainerStdio: - * @ttyName: Name of tty to set as the container console + * lxcContainerSetStdio: + * @control: the conrol FD + * @ttyPath: Name of tty to set as the container console * * Sets the given tty as the primary conosole for the container as well as * stdout, stdin and stderr. * * Returns 0 on success or -1 in case of error */ -static int lxcSetContainerStdio(const char *ttyPath) +static int lxcContainerSetStdio(int control, const char *ttyPath) { int rc = -1; int ttyfd; @@ -111,7 +132,7 @@ static int lxcSetContainerStdio(const char *ttyPath) * close all FDs before executing the container */ open_max = sysconf (_SC_OPEN_MAX); for (i = 0; i < open_max; i++) - if (i != ttyfd) + if (i != ttyfd && i != control) close(i); if (dup2(ttyfd, 0) < 0) { @@ -142,30 +163,38 @@ error_out: } /** - * lxcExecWithTty: - * @vm: Ptr to vm structure + * lxcContainerSendContinue: + * @monitor: control FD to child * - * Sets container console and stdio and then execs container init + * Sends the continue message via the socket pair stored in the vm + * structure. * * Returns 0 on success or -1 in case of error */ -static int lxcExecWithTty(lxc_vm_def_t *vmDef, char *ttyPath) +int lxcContainerSendContinue(virConnectPtr conn, + int control) { int rc = -1; + lxc_message_t msg = LXC_CONTINUE_MSG; + int writeCount = 0; - if(lxcSetContainerStdio(ttyPath) < 0) { - goto exit_with_error; + writeCount = safewrite(control, &msg, sizeof(msg)); + if (writeCount != sizeof(msg)) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("unable to send container continue message: %s"), + strerror(errno)); + goto error_out; } - lxcExecContainerInit(vmDef); + rc = 0; -exit_with_error: - exit(rc); +error_out: + return rc; } /** - * lxcWaitForContinue: - * @monitor: monitor FD from parent + * lxcContainerWaitForContinue: + * @control: control FD from parent * * This function will wait for the container continue message from the * parent process. It will send this message on the socket pair stored in @@ -173,12 +202,12 @@ exit_with_error: * * Returns 0 on success or -1 in case of error */ -static int lxcWaitForContinue(int monitor) +static int lxcContainerWaitForContinue(int control) { lxc_message_t msg; int readLen; - readLen = saferead(monitor, &msg, sizeof(msg)); + readLen = saferead(control, &msg, sizeof(msg)); if (readLen != sizeof(msg) || msg != LXC_CONTINUE_MSG) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -186,6 +215,7 @@ static int lxcWaitForContinue(int monitor) strerror(errno)); return -1; } + close(control); DEBUG0("Received container continue message"); @@ -200,7 +230,7 @@ static int lxcWaitForContinue(int monitor) * * Returns 0 on success or nonzero in case of error */ -static int lxcEnableInterfaces(const lxc_vm_def_t *def) +static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def) { int rc = 0; const lxc_net_def_t *net; @@ -233,7 +263,7 @@ error_out: * * Returns 0 on success or -1 in case of error */ -int lxcChild( void *data ) +static int lxcContainerChild( void *data ) { int rc = -1; lxc_child_argv_t *argv = data; @@ -244,7 +274,7 @@ int lxcChild( void *data ) if (NULL == vmDef) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("lxcChild() passed invalid vm definition")); - goto cleanup; + return -1; } /* handle the bind mounts first before doing anything else that may */ @@ -260,7 +290,7 @@ int lxcChild( void *data ) lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to mount %s at %s for container: %s"), curMount->source, curMount->target, strerror(errno)); - goto cleanup; + return -1; } } @@ -270,24 +300,106 @@ int lxcChild( void *data ) lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to mount /proc for container: %s"), strerror(errno)); - goto cleanup; + return -1; } + if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0) + return -1; + /* Wait for interface devices to show up */ - if (0 != (rc = lxcWaitForContinue(argv->monitor))) { - goto cleanup; - } + if (lxcContainerWaitForContinue(argv->monitor) < 0) + return -1; /* enable interfaces */ - if (0 != (rc = lxcEnableInterfaces(vmDef))) { - goto cleanup; + if (lxcContainerEnableInterfaces(vmDef) < 0) + return -1; + + /* this function will only return if an error occured */ + return lxcContainerExecInit(vmDef); +} + +/** + * lxcContainerStart: + * @conn: pointer to connection + * @driver: pointer to driver structure + * @vm: pointer to virtual machine structure + * + * Starts a container process by calling clone() with the namespace flags + * + * Returns PID of container on success or -1 in case of error + */ +int lxcContainerStart(virConnectPtr conn, + lxc_vm_def_t *def, + int control, + char *ttyPath) +{ + pid_t pid; + int flags; + int stacksize = getpagesize() * 4; + char *stack, *stacktop; + lxc_child_argv_t args = { def, control, ttyPath }; + + /* allocate a stack for the container */ + if (VIR_ALLOC_N(stack, stacksize) < 0) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, + _("unable to allocate container stack")); + return -1; + } + stacktop = stack + stacksize; + + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; + + if (def->nets != NULL) + flags |= CLONE_NEWNET; + + pid = clone(lxcContainerChild, stacktop, flags, &args); + VIR_FREE(stack); + DEBUG("clone() returned, %d", pid); + + if (pid < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("clone() failed, %s"), strerror(errno)); + return -1; } - rc = lxcExecWithTty(vmDef, argv->ttyPath); - /* this function will only return if an error occured */ + return pid; +} -cleanup: - return rc; +static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED) +{ + _exit(0); +} + +int lxcContainerAvailable(int features) +{ + int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| + CLONE_NEWIPC|SIGCHLD; + int cpid; + char *childStack; + char *stack; + int childStatus; + + if (features & LXC_CONTAINER_FEATURE_NET) + flags |= CLONE_NEWNET; + + if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) { + DEBUG0("Unable to allocate stack"); + return -1; + } + + childStack = stack + (getpagesize() * 4); + + cpid = clone(lxcContainerDummyChild, childStack, flags, NULL); + VIR_FREE(stack); + if (cpid < 0) { + DEBUG("clone call returned %s, container support is not enabled", + strerror(errno)); + return -1; + } else { + waitpid(cpid, &childStatus, 0); + } + + return 0; } #endif /* WITH_LXC */ diff --git a/src/lxc_container.h b/src/lxc_container.h index b16138cd15..715753ec38 100644 --- a/src/lxc_container.h +++ b/src/lxc_container.h @@ -28,20 +28,19 @@ #ifdef WITH_LXC -typedef struct __lxc_child_argv lxc_child_argv_t; -struct __lxc_child_argv { - lxc_vm_def_t *config; - int monitor; - char *ttyPath; +enum { + LXC_CONTAINER_FEATURE_NET = (1 << 0), }; -/* messages between parent and container */ -typedef char lxc_message_t; -#define LXC_CONTINUE_MSG 'c' +int lxcContainerSendContinue(virConnectPtr conn, + int control); +int lxcContainerStart(virConnectPtr conn, + lxc_vm_def_t *def, + int control, + char *ttyPath); -/* Function declarations */ -int lxcChild( void *argv ); +int lxcContainerAvailable(int features); #endif /* LXC_DRIVER_H */ diff --git a/src/lxc_controller.c b/src/lxc_controller.c new file mode 100644 index 0000000000..09af605791 --- /dev/null +++ b/src/lxc_controller.c @@ -0,0 +1,205 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_controller.c: linux container process controller + * + * Authors: + * David L. Leskovec + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#ifdef WITH_LXC + +#include +#include + +#include "internal.h" +#include "util.h" + +#include "lxc_conf.h" +#include "lxc_controller.h" + + +#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) + +/** + * lxcFdForward: + * @readFd: file descriptor to read + * @writeFd: file desriptor to write + * + * Reads 1 byte of data from readFd and writes to writeFd. + * + * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error + */ +static int lxcFdForward(int readFd, int writeFd) +{ + int rc = -1; + char buf[2]; + + if (1 != (saferead(readFd, buf, 1))) { + if (EAGAIN == errno) { + rc = EAGAIN; + goto cleanup; + } + + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("read of fd %d failed: %s"), readFd, strerror(errno)); + goto cleanup; + } + + if (1 != (safewrite(writeFd, buf, 1))) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("write to fd %d failed: %s"), writeFd, strerror(errno)); + goto cleanup; + } + + rc = 0; + +cleanup: + return rc; +} + +typedef struct _lxcTtyForwardFd_t { + int fd; + int active; +} lxcTtyForwardFd_t; + +/** + * lxcTtyForward: + * @appPty: Open fd for application facing Pty + * @contPty: Open fd for container facing Pty + * + * Forwards traffic between fds. Data read from appPty will be written to contPty + * This process loops forever. + * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP + * events when the user disconnects the virsh console via ctrl-] + * + * Returns 0 on success or -1 in case of error + */ +int lxcControllerMain(int appPty, int contPty) +{ + int rc = -1; + int epollFd; + struct epoll_event epollEvent; + int numEvents; + int numActive = 0; + lxcTtyForwardFd_t fdArray[2]; + int timeout = -1; + int curFdOff = 0; + int writeFdOff = 0; + + fdArray[0].fd = appPty; + fdArray[0].active = 0; + fdArray[1].fd = contPty; + fdArray[1].active = 0; + + /* create the epoll fild descriptor */ + epollFd = epoll_create(2); + if (0 > epollFd) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_create(2) failed: %s"), strerror(errno)); + goto cleanup; + } + + /* add the file descriptors the epoll fd */ + memset(&epollEvent, 0x00, sizeof(epollEvent)); + epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */ + epollEvent.data.fd = appPty; + epollEvent.data.u32 = 0; /* fdArray position */ + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(appPty) failed: %s"), strerror(errno)); + goto cleanup; + } + epollEvent.data.fd = contPty; + epollEvent.data.u32 = 1; /* fdArray position */ + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + + while (1) { + /* if active fd's, return if no events, else wait forever */ + timeout = (numActive > 0) ? 0 : -1; + numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout); + if (0 < numEvents) { + if (epollEvent.events & EPOLLIN) { + curFdOff = epollEvent.data.u32; + if (!fdArray[curFdOff].active) { + fdArray[curFdOff].active = 1; + ++numActive; + } + + } else if (epollEvent.events & EPOLLHUP) { + DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); + continue; + } else { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("error event %d"), epollEvent.events); + goto cleanup; + } + + } else if (0 == numEvents) { + if (2 == numActive) { + /* both fds active, toggle between the two */ + curFdOff ^= 1; + } else { + /* only one active, if current is active, use it, else it */ + /* must be the other one (ie. curFd just went inactive) */ + curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1; + } + + } else { + if (EINTR == errno) { + continue; + } + + /* error */ + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_wait() failed: %s"), strerror(errno)); + goto cleanup; + + } + + if (0 < numActive) { + writeFdOff = curFdOff ^ 1; + rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd); + + if (EAGAIN == rc) { + /* this fd no longer has data, set it as inactive */ + --numActive; + fdArray[curFdOff].active = 0; + } else if (-1 == rc) { + goto cleanup; + } + + } + + } + + rc = 0; + +cleanup: + close(appPty); + close(contPty); + close(epollFd); + return rc; +} + +#endif diff --git a/src/lxc_controller.h b/src/lxc_controller.h new file mode 100644 index 0000000000..edaf53ada7 --- /dev/null +++ b/src/lxc_controller.h @@ -0,0 +1,33 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_controller.h: linux container process controller + * + * Authors: + * David L. Leskovec + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LXC_CONTROLLER_H +#define LXC_CONTROLLER_H + +#ifdef WITH_LXC + +int lxcControllerMain(int appPty, int contPty); + +#endif /* WITH_LXC */ + +#endif /* LXC_CONTROLLER_H */ diff --git a/src/lxc_driver.c b/src/lxc_driver.c index 9d3ffcf035..4eb9aba1c7 100644 --- a/src/lxc_driver.c +++ b/src/lxc_driver.c @@ -26,7 +26,6 @@ #ifdef WITH_LXC #include -#include #include #include #include @@ -39,6 +38,7 @@ #include "lxc_conf.h" #include "lxc_container.h" #include "lxc_driver.h" +#include "lxc_controller.h" #include "driver.h" #include "internal.h" #include "memory.h" @@ -52,77 +52,19 @@ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) -/* - * GLibc headers are behind the kernel, so we define these - * constants if they're not present already. - */ - -#ifndef CLONE_NEWPID -#define CLONE_NEWPID 0x20000000 -#endif -#ifndef CLONE_NEWUTS -#define CLONE_NEWUTS 0x04000000 -#endif -#ifndef CLONE_NEWUSER -#define CLONE_NEWUSER 0x10000000 -#endif -#ifndef CLONE_NEWIPC -#define CLONE_NEWIPC 0x08000000 -#endif -#ifndef CLONE_NEWNET -#define CLONE_NEWNET 0x40000000 /* New network namespace */ -#endif static int lxcStartup(void); static int lxcShutdown(void); static lxc_driver_t *lxc_driver = NULL; /* Functions */ -static int lxcDummyChild( void *argv ATTRIBUTE_UNUSED ) -{ - exit(0); -} - -static int lxcCheckContainerSupport(int extra_flags) -{ - int rc = 0; - int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| - CLONE_NEWIPC|SIGCHLD|extra_flags; - int cpid; - char *childStack; - char *stack; - int childStatus; - - if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) { - DEBUG0("Unable to allocate stack"); - rc = -1; - goto check_complete; - } - - childStack = stack + (getpagesize() * 4); - - cpid = clone(lxcDummyChild, childStack, flags, NULL); - if ((0 > cpid) && (EINVAL == errno)) { - DEBUG0("clone call returned EINVAL, container support is not enabled"); - rc = -1; - } else { - waitpid(cpid, &childStatus, 0); - } - - VIR_FREE(stack); - -check_complete: - return rc; -} static const char *lxcProbe(void) { -#ifdef __linux__ - if (0 == lxcCheckContainerSupport(0)) { - return("lxc:///"); - } -#endif - return(NULL); + if (lxcContainerAvailable(0) < 0) + return NULL; + + return("lxc:///"); } static virDrvOpenStatus lxcOpen(virConnectPtr conn, @@ -559,89 +501,6 @@ static int lxcCleanupInterfaces(const lxc_vm_t *vm) return 0; } -/** - * lxcSendContainerContinue: - * @monitor: FD for communicating with child - * - * Sends the continue message via the socket pair stored in the vm - * structure. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSendContainerContinue(virConnectPtr conn, - int monitor) -{ - int rc = -1; - lxc_message_t msg = LXC_CONTINUE_MSG; - int writeCount = 0; - - writeCount = safewrite(monitor, &msg, sizeof(msg)); - if (writeCount != sizeof(msg)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("unable to send container continue message: %s"), - strerror(errno)); - goto error_out; - } - - rc = 0; - -error_out: - return rc; -} - -/** - * lxcStartContainer: - * @conn: pointer to connection - * @driver: pointer to driver structure - * @vm: pointer to virtual machine structure - * - * Starts a container process by calling clone() with the namespace flags - * - * Returns 0 on success or -1 in case of error - */ -static int lxcStartContainer(virConnectPtr conn, - lxc_driver_t* driver, - lxc_vm_t *vm, - int monitor, - char *ttyPath) -{ - int rc = -1; - int flags; - int stacksize = getpagesize() * 4; - char *stack, *stacktop; - lxc_child_argv_t args = { vm->def, monitor, ttyPath }; - - /* allocate a stack for the container */ - if (VIR_ALLOC_N(stack, stacksize) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("unable to allocate container stack")); - goto error_exit; - } - stacktop = stack + stacksize; - - flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; - - if (vm->def->nets != NULL) - flags |= CLONE_NEWNET; - - vm->def->id = clone(lxcChild, stacktop, flags, &args); - - DEBUG("clone() returned, %d", vm->def->id); - - if (vm->def->id < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("clone() failed, %s"), strerror(errno)); - goto error_exit; - } - - lxcSaveConfig(NULL, driver, vm, vm->def); - - rc = 0; - -error_exit: - return rc; -} - /** * lxcOpenTty: @@ -716,170 +575,6 @@ cleanup: return rc; } -/** - * lxcFdForward: - * @readFd: file descriptor to read - * @writeFd: file desriptor to write - * - * Reads 1 byte of data from readFd and writes to writeFd. - * - * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error - */ -static int lxcFdForward(int readFd, int writeFd) -{ - int rc = -1; - char buf[2]; - - if (1 != (saferead(readFd, buf, 1))) { - if (EAGAIN == errno) { - rc = EAGAIN; - goto cleanup; - } - - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("read of fd %d failed: %s"), readFd, strerror(errno)); - goto cleanup; - } - - if (1 != (safewrite(writeFd, buf, 1))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("write to fd %d failed: %s"), writeFd, strerror(errno)); - goto cleanup; - } - - rc = 0; - -cleanup: - return rc; -} - -typedef struct _lxcTtyForwardFd_t { - int fd; - bool active; -} lxcTtyForwardFd_t; - -/** - * lxcTtyForward: - * @fd1: Open fd - * @fd1: Open fd - * - * Forwards traffic between fds. Data read from fd1 will be written to fd2 - * This process loops forever. - * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP - * events when the user disconnects the virsh console via ctrl-] - * - * Returns 0 on success or -1 in case of error - */ -static int lxcTtyForward(int fd1, int fd2) -{ - int rc = -1; - int epollFd; - struct epoll_event epollEvent; - int numEvents; - int numActive = 0; - lxcTtyForwardFd_t fdArray[2]; - int timeout = -1; - int curFdOff = 0; - int writeFdOff = 0; - - fdArray[0].fd = fd1; - fdArray[0].active = false; - fdArray[1].fd = fd2; - fdArray[1].active = false; - - /* create the epoll fild descriptor */ - epollFd = epoll_create(2); - if (0 > epollFd) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_create(2) failed: %s"), strerror(errno)); - goto cleanup; - } - - /* add the file descriptors the epoll fd */ - memset(&epollEvent, 0x00, sizeof(epollEvent)); - epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */ - epollEvent.data.fd = fd1; - epollEvent.data.u32 = 0; /* fdArray position */ - if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd1, &epollEvent)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_ctl(fd1) failed: %s"), strerror(errno)); - goto cleanup; - } - epollEvent.data.fd = fd2; - epollEvent.data.u32 = 1; /* fdArray position */ - if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd2, &epollEvent)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_ctl(fd2) failed: %s"), strerror(errno)); - goto cleanup; - } - - while (1) { - /* if active fd's, return if no events, else wait forever */ - timeout = (numActive > 0) ? 0 : -1; - numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout); - if (0 < numEvents) { - if (epollEvent.events & EPOLLIN) { - curFdOff = epollEvent.data.u32; - if (!fdArray[curFdOff].active) { - fdArray[curFdOff].active = true; - ++numActive; - } - - } else if (epollEvent.events & EPOLLHUP) { - DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); - continue; - } else { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("error event %d"), epollEvent.events); - goto cleanup; - } - - } else if (0 == numEvents) { - if (2 == numActive) { - /* both fds active, toggle between the two */ - curFdOff ^= 1; - } else { - /* only one active, if current is active, use it, else it */ - /* must be the other one (ie. curFd just went inactive) */ - curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1; - } - - } else { - if (EINTR == errno) { - continue; - } - - /* error */ - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_wait() failed: %s"), strerror(errno)); - goto cleanup; - - } - - if (0 < numActive) { - writeFdOff = curFdOff ^ 1; - rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd); - - if (EAGAIN == rc) { - /* this fd no longer has data, set it as inactive */ - --numActive; - fdArray[curFdOff].active = false; - } else if (-1 == rc) { - goto cleanup; - } - - } - - } - - rc = 0; - -cleanup: - close(fd1); - close(fd2); - close(epollFd); - exit(rc); -} /** * lxcVmStart: @@ -921,7 +616,7 @@ static int lxcVmStart(virConnectPtr conn, if (vm->pid == 0) { /* child process calls forward routine */ - lxcTtyForward(parentTty, containerTty); + lxcControllerMain(parentTty, containerTty); } if (lxcStoreTtyPid(driver, vm)) { @@ -945,17 +640,19 @@ static int lxcVmStart(virConnectPtr conn, /* check this rc */ - rc = lxcStartContainer(conn, driver, vm, - sockpair[1], - containerTtyPath); - if (rc != 0) + vm->def->id = lxcContainerStart(conn, + vm->def, + sockpair[1], + containerTtyPath); + if (vm->def->id == -1) goto cleanup; + lxcSaveConfig(conn, driver, vm, vm->def); rc = lxcMoveInterfacesToNetNs(conn, vm); if (rc != 0) goto cleanup; - rc = lxcSendContainerContinue(conn, sockpair[0]); + rc = lxcContainerSendContinue(conn, sockpair[0]); if (rc != 0) goto cleanup; @@ -1196,16 +893,15 @@ static int lxcCheckNetNsSupport(void) { const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL}; int ip_rc; - int user_netns = 0; - int kern_netns = 0; - if (virRun(NULL, argv, &ip_rc) == 0) - user_netns = WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255); + if (virRun(NULL, argv, &ip_rc) < 0 || + !(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255))) + return 0; - if (lxcCheckContainerSupport(CLONE_NEWNET) == 0) - kern_netns = 1; + if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0) + return 0; - return kern_netns && user_netns; + return 1; } static int lxcStartup(void) @@ -1222,9 +918,8 @@ static int lxcStartup(void) } /* Check that this is a container enabled kernel */ - if(0 != lxcCheckContainerSupport(0)) { + if(lxcContainerAvailable(0) < 0) return -1; - } lxc_driver->have_netns = lxcCheckNetNsSupport();