mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2025-03-07 17:28:15 +00:00
Re-arrange code between LXC driver files
This commit is contained in:
parent
e82c913680
commit
518c2144e5
@ -64,6 +64,7 @@ CLIENT_SOURCES = \
|
||||
openvz_conf.c openvz_conf.h \
|
||||
openvz_driver.c openvz_driver.h \
|
||||
lxc_driver.c lxc_driver.h \
|
||||
lxc_controller.c lxc_controller.h \
|
||||
lxc_conf.c lxc_conf.h \
|
||||
lxc_container.c lxc_container.h \
|
||||
veth.c veth.h \
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "lxc_container.h"
|
||||
@ -40,49 +41,69 @@
|
||||
#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
|
||||
#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
|
||||
|
||||
/*
|
||||
* GLibc headers are behind the kernel, so we define these
|
||||
* constants if they're not present already.
|
||||
*/
|
||||
|
||||
#ifndef CLONE_NEWPID
|
||||
#define CLONE_NEWPID 0x20000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWUTS
|
||||
#define CLONE_NEWUTS 0x04000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWUSER
|
||||
#define CLONE_NEWUSER 0x10000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWIPC
|
||||
#define CLONE_NEWIPC 0x08000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWNET
|
||||
#define CLONE_NEWNET 0x40000000 /* New network namespace */
|
||||
#endif
|
||||
|
||||
/* messages between parent and container */
|
||||
typedef char lxc_message_t;
|
||||
#define LXC_CONTINUE_MSG 'c'
|
||||
|
||||
typedef struct __lxc_child_argv lxc_child_argv_t;
|
||||
struct __lxc_child_argv {
|
||||
lxc_vm_def_t *config;
|
||||
int monitor;
|
||||
char *ttyPath;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* lxcExecContainerInit:
|
||||
* lxcContainerExecInit:
|
||||
* @vmDef: Ptr to vm definition structure
|
||||
*
|
||||
* Exec the container init string. The container init will replace then
|
||||
* be running in the current process
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
* Does not return
|
||||
*/
|
||||
static int lxcExecContainerInit(const lxc_vm_def_t *vmDef)
|
||||
static int lxcContainerExecInit(const lxc_vm_def_t *vmDef)
|
||||
{
|
||||
int rc = -1;
|
||||
char* execString;
|
||||
size_t execStringLen = strlen(vmDef->init) + 1 + 5;
|
||||
const char *const argv[] = {
|
||||
vmDef->init,
|
||||
NULL,
|
||||
};
|
||||
|
||||
if (VIR_ALLOC_N(execString, execStringLen) < 0) {
|
||||
lxcError(NULL, NULL, VIR_ERR_NO_MEMORY,
|
||||
_("failed to calloc memory for init string: %s"),
|
||||
strerror(errno));
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
strcpy(execString, "exec ");
|
||||
strcat(execString, vmDef->init);
|
||||
|
||||
execl("/bin/sh", "sh", "-c", execString, (char*)NULL);
|
||||
lxcError(NULL, NULL, VIR_ERR_NO_MEMORY,
|
||||
_("execl failed to exec init: %s"), strerror(errno));
|
||||
|
||||
error_out:
|
||||
exit(rc);
|
||||
return execve(argv[0], (char **)argv, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcSetContainerStdio:
|
||||
* @ttyName: Name of tty to set as the container console
|
||||
* lxcContainerSetStdio:
|
||||
* @control: the conrol FD
|
||||
* @ttyPath: Name of tty to set as the container console
|
||||
*
|
||||
* Sets the given tty as the primary conosole for the container as well as
|
||||
* stdout, stdin and stderr.
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcSetContainerStdio(const char *ttyPath)
|
||||
static int lxcContainerSetStdio(int control, const char *ttyPath)
|
||||
{
|
||||
int rc = -1;
|
||||
int ttyfd;
|
||||
@ -111,7 +132,7 @@ static int lxcSetContainerStdio(const char *ttyPath)
|
||||
* close all FDs before executing the container */
|
||||
open_max = sysconf (_SC_OPEN_MAX);
|
||||
for (i = 0; i < open_max; i++)
|
||||
if (i != ttyfd)
|
||||
if (i != ttyfd && i != control)
|
||||
close(i);
|
||||
|
||||
if (dup2(ttyfd, 0) < 0) {
|
||||
@ -142,30 +163,38 @@ error_out:
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcExecWithTty:
|
||||
* @vm: Ptr to vm structure
|
||||
* lxcContainerSendContinue:
|
||||
* @monitor: control FD to child
|
||||
*
|
||||
* Sets container console and stdio and then execs container init
|
||||
* Sends the continue message via the socket pair stored in the vm
|
||||
* structure.
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcExecWithTty(lxc_vm_def_t *vmDef, char *ttyPath)
|
||||
int lxcContainerSendContinue(virConnectPtr conn,
|
||||
int control)
|
||||
{
|
||||
int rc = -1;
|
||||
lxc_message_t msg = LXC_CONTINUE_MSG;
|
||||
int writeCount = 0;
|
||||
|
||||
if(lxcSetContainerStdio(ttyPath) < 0) {
|
||||
goto exit_with_error;
|
||||
writeCount = safewrite(control, &msg, sizeof(msg));
|
||||
if (writeCount != sizeof(msg)) {
|
||||
lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("unable to send container continue message: %s"),
|
||||
strerror(errno));
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
lxcExecContainerInit(vmDef);
|
||||
rc = 0;
|
||||
|
||||
exit_with_error:
|
||||
exit(rc);
|
||||
error_out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcWaitForContinue:
|
||||
* @monitor: monitor FD from parent
|
||||
* lxcContainerWaitForContinue:
|
||||
* @control: control FD from parent
|
||||
*
|
||||
* This function will wait for the container continue message from the
|
||||
* parent process. It will send this message on the socket pair stored in
|
||||
@ -173,12 +202,12 @@ exit_with_error:
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcWaitForContinue(int monitor)
|
||||
static int lxcContainerWaitForContinue(int control)
|
||||
{
|
||||
lxc_message_t msg;
|
||||
int readLen;
|
||||
|
||||
readLen = saferead(monitor, &msg, sizeof(msg));
|
||||
readLen = saferead(control, &msg, sizeof(msg));
|
||||
if (readLen != sizeof(msg) ||
|
||||
msg != LXC_CONTINUE_MSG) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
@ -186,6 +215,7 @@ static int lxcWaitForContinue(int monitor)
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
close(control);
|
||||
|
||||
DEBUG0("Received container continue message");
|
||||
|
||||
@ -200,7 +230,7 @@ static int lxcWaitForContinue(int monitor)
|
||||
*
|
||||
* Returns 0 on success or nonzero in case of error
|
||||
*/
|
||||
static int lxcEnableInterfaces(const lxc_vm_def_t *def)
|
||||
static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def)
|
||||
{
|
||||
int rc = 0;
|
||||
const lxc_net_def_t *net;
|
||||
@ -233,7 +263,7 @@ error_out:
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
int lxcChild( void *data )
|
||||
static int lxcContainerChild( void *data )
|
||||
{
|
||||
int rc = -1;
|
||||
lxc_child_argv_t *argv = data;
|
||||
@ -244,7 +274,7 @@ int lxcChild( void *data )
|
||||
if (NULL == vmDef) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("lxcChild() passed invalid vm definition"));
|
||||
goto cleanup;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* handle the bind mounts first before doing anything else that may */
|
||||
@ -260,7 +290,7 @@ int lxcChild( void *data )
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("failed to mount %s at %s for container: %s"),
|
||||
curMount->source, curMount->target, strerror(errno));
|
||||
goto cleanup;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -270,24 +300,106 @@ int lxcChild( void *data )
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("failed to mount /proc for container: %s"),
|
||||
strerror(errno));
|
||||
goto cleanup;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0)
|
||||
return -1;
|
||||
|
||||
/* Wait for interface devices to show up */
|
||||
if (0 != (rc = lxcWaitForContinue(argv->monitor))) {
|
||||
goto cleanup;
|
||||
}
|
||||
if (lxcContainerWaitForContinue(argv->monitor) < 0)
|
||||
return -1;
|
||||
|
||||
/* enable interfaces */
|
||||
if (0 != (rc = lxcEnableInterfaces(vmDef))) {
|
||||
goto cleanup;
|
||||
if (lxcContainerEnableInterfaces(vmDef) < 0)
|
||||
return -1;
|
||||
|
||||
/* this function will only return if an error occured */
|
||||
return lxcContainerExecInit(vmDef);
|
||||
}
|
||||
|
||||
rc = lxcExecWithTty(vmDef, argv->ttyPath);
|
||||
/* this function will only return if an error occured */
|
||||
/**
|
||||
* lxcContainerStart:
|
||||
* @conn: pointer to connection
|
||||
* @driver: pointer to driver structure
|
||||
* @vm: pointer to virtual machine structure
|
||||
*
|
||||
* Starts a container process by calling clone() with the namespace flags
|
||||
*
|
||||
* Returns PID of container on success or -1 in case of error
|
||||
*/
|
||||
int lxcContainerStart(virConnectPtr conn,
|
||||
lxc_vm_def_t *def,
|
||||
int control,
|
||||
char *ttyPath)
|
||||
{
|
||||
pid_t pid;
|
||||
int flags;
|
||||
int stacksize = getpagesize() * 4;
|
||||
char *stack, *stacktop;
|
||||
lxc_child_argv_t args = { def, control, ttyPath };
|
||||
|
||||
cleanup:
|
||||
return rc;
|
||||
/* allocate a stack for the container */
|
||||
if (VIR_ALLOC_N(stack, stacksize) < 0) {
|
||||
lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
|
||||
_("unable to allocate container stack"));
|
||||
return -1;
|
||||
}
|
||||
stacktop = stack + stacksize;
|
||||
|
||||
flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
|
||||
|
||||
if (def->nets != NULL)
|
||||
flags |= CLONE_NEWNET;
|
||||
|
||||
pid = clone(lxcContainerChild, stacktop, flags, &args);
|
||||
VIR_FREE(stack);
|
||||
DEBUG("clone() returned, %d", pid);
|
||||
|
||||
if (pid < 0) {
|
||||
lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("clone() failed, %s"), strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
|
||||
{
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
int lxcContainerAvailable(int features)
|
||||
{
|
||||
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
|
||||
CLONE_NEWIPC|SIGCHLD;
|
||||
int cpid;
|
||||
char *childStack;
|
||||
char *stack;
|
||||
int childStatus;
|
||||
|
||||
if (features & LXC_CONTAINER_FEATURE_NET)
|
||||
flags |= CLONE_NEWNET;
|
||||
|
||||
if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
|
||||
DEBUG0("Unable to allocate stack");
|
||||
return -1;
|
||||
}
|
||||
|
||||
childStack = stack + (getpagesize() * 4);
|
||||
|
||||
cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
|
||||
VIR_FREE(stack);
|
||||
if (cpid < 0) {
|
||||
DEBUG("clone call returned %s, container support is not enabled",
|
||||
strerror(errno));
|
||||
return -1;
|
||||
} else {
|
||||
waitpid(cpid, &childStatus, 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* WITH_LXC */
|
||||
|
@ -28,20 +28,19 @@
|
||||
|
||||
#ifdef WITH_LXC
|
||||
|
||||
typedef struct __lxc_child_argv lxc_child_argv_t;
|
||||
struct __lxc_child_argv {
|
||||
lxc_vm_def_t *config;
|
||||
int monitor;
|
||||
char *ttyPath;
|
||||
enum {
|
||||
LXC_CONTAINER_FEATURE_NET = (1 << 0),
|
||||
};
|
||||
|
||||
/* messages between parent and container */
|
||||
typedef char lxc_message_t;
|
||||
#define LXC_CONTINUE_MSG 'c'
|
||||
int lxcContainerSendContinue(virConnectPtr conn,
|
||||
int control);
|
||||
|
||||
int lxcContainerStart(virConnectPtr conn,
|
||||
lxc_vm_def_t *def,
|
||||
int control,
|
||||
char *ttyPath);
|
||||
|
||||
/* Function declarations */
|
||||
int lxcChild( void *argv );
|
||||
int lxcContainerAvailable(int features);
|
||||
|
||||
#endif /* LXC_DRIVER_H */
|
||||
|
||||
|
205
src/lxc_controller.c
Normal file
205
src/lxc_controller.c
Normal file
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright IBM Corp. 2008
|
||||
*
|
||||
* lxc_controller.c: linux container process controller
|
||||
*
|
||||
* Authors:
|
||||
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#ifdef WITH_LXC
|
||||
|
||||
#include <sys/epoll.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "lxc_conf.h"
|
||||
#include "lxc_controller.h"
|
||||
|
||||
|
||||
#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* lxcFdForward:
|
||||
* @readFd: file descriptor to read
|
||||
* @writeFd: file desriptor to write
|
||||
*
|
||||
* Reads 1 byte of data from readFd and writes to writeFd.
|
||||
*
|
||||
* Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
|
||||
*/
|
||||
static int lxcFdForward(int readFd, int writeFd)
|
||||
{
|
||||
int rc = -1;
|
||||
char buf[2];
|
||||
|
||||
if (1 != (saferead(readFd, buf, 1))) {
|
||||
if (EAGAIN == errno) {
|
||||
rc = EAGAIN;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("read of fd %d failed: %s"), readFd, strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (1 != (safewrite(writeFd, buf, 1))) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("write to fd %d failed: %s"), writeFd, strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
cleanup:
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef struct _lxcTtyForwardFd_t {
|
||||
int fd;
|
||||
int active;
|
||||
} lxcTtyForwardFd_t;
|
||||
|
||||
/**
|
||||
* lxcTtyForward:
|
||||
* @appPty: Open fd for application facing Pty
|
||||
* @contPty: Open fd for container facing Pty
|
||||
*
|
||||
* Forwards traffic between fds. Data read from appPty will be written to contPty
|
||||
* This process loops forever.
|
||||
* This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
|
||||
* events when the user disconnects the virsh console via ctrl-]
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
int lxcControllerMain(int appPty, int contPty)
|
||||
{
|
||||
int rc = -1;
|
||||
int epollFd;
|
||||
struct epoll_event epollEvent;
|
||||
int numEvents;
|
||||
int numActive = 0;
|
||||
lxcTtyForwardFd_t fdArray[2];
|
||||
int timeout = -1;
|
||||
int curFdOff = 0;
|
||||
int writeFdOff = 0;
|
||||
|
||||
fdArray[0].fd = appPty;
|
||||
fdArray[0].active = 0;
|
||||
fdArray[1].fd = contPty;
|
||||
fdArray[1].active = 0;
|
||||
|
||||
/* create the epoll fild descriptor */
|
||||
epollFd = epoll_create(2);
|
||||
if (0 > epollFd) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_create(2) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the file descriptors the epoll fd */
|
||||
memset(&epollEvent, 0x00, sizeof(epollEvent));
|
||||
epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */
|
||||
epollEvent.data.fd = appPty;
|
||||
epollEvent.data.u32 = 0; /* fdArray position */
|
||||
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_ctl(appPty) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
epollEvent.data.fd = contPty;
|
||||
epollEvent.data.u32 = 1; /* fdArray position */
|
||||
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_ctl(contPty) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* if active fd's, return if no events, else wait forever */
|
||||
timeout = (numActive > 0) ? 0 : -1;
|
||||
numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
|
||||
if (0 < numEvents) {
|
||||
if (epollEvent.events & EPOLLIN) {
|
||||
curFdOff = epollEvent.data.u32;
|
||||
if (!fdArray[curFdOff].active) {
|
||||
fdArray[curFdOff].active = 1;
|
||||
++numActive;
|
||||
}
|
||||
|
||||
} else if (epollEvent.events & EPOLLHUP) {
|
||||
DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
|
||||
continue;
|
||||
} else {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("error event %d"), epollEvent.events);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
} else if (0 == numEvents) {
|
||||
if (2 == numActive) {
|
||||
/* both fds active, toggle between the two */
|
||||
curFdOff ^= 1;
|
||||
} else {
|
||||
/* only one active, if current is active, use it, else it */
|
||||
/* must be the other one (ie. curFd just went inactive) */
|
||||
curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (EINTR == errno) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* error */
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_wait() failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
|
||||
}
|
||||
|
||||
if (0 < numActive) {
|
||||
writeFdOff = curFdOff ^ 1;
|
||||
rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
|
||||
|
||||
if (EAGAIN == rc) {
|
||||
/* this fd no longer has data, set it as inactive */
|
||||
--numActive;
|
||||
fdArray[curFdOff].active = 0;
|
||||
} else if (-1 == rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
cleanup:
|
||||
close(appPty);
|
||||
close(contPty);
|
||||
close(epollFd);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif
|
33
src/lxc_controller.h
Normal file
33
src/lxc_controller.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright IBM Corp. 2008
|
||||
*
|
||||
* lxc_controller.h: linux container process controller
|
||||
*
|
||||
* Authors:
|
||||
* David L. Leskovec <dlesko at linux.vnet.ibm.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef LXC_CONTROLLER_H
|
||||
#define LXC_CONTROLLER_H
|
||||
|
||||
#ifdef WITH_LXC
|
||||
|
||||
int lxcControllerMain(int appPty, int contPty);
|
||||
|
||||
#endif /* WITH_LXC */
|
||||
|
||||
#endif /* LXC_CONTROLLER_H */
|
339
src/lxc_driver.c
339
src/lxc_driver.c
@ -26,7 +26,6 @@
|
||||
#ifdef WITH_LXC
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sched.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <stdbool.h>
|
||||
@ -39,6 +38,7 @@
|
||||
#include "lxc_conf.h"
|
||||
#include "lxc_container.h"
|
||||
#include "lxc_driver.h"
|
||||
#include "lxc_controller.h"
|
||||
#include "driver.h"
|
||||
#include "internal.h"
|
||||
#include "memory.h"
|
||||
@ -52,78 +52,20 @@
|
||||
#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
|
||||
#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
|
||||
|
||||
/*
|
||||
* GLibc headers are behind the kernel, so we define these
|
||||
* constants if they're not present already.
|
||||
*/
|
||||
|
||||
#ifndef CLONE_NEWPID
|
||||
#define CLONE_NEWPID 0x20000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWUTS
|
||||
#define CLONE_NEWUTS 0x04000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWUSER
|
||||
#define CLONE_NEWUSER 0x10000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWIPC
|
||||
#define CLONE_NEWIPC 0x08000000
|
||||
#endif
|
||||
#ifndef CLONE_NEWNET
|
||||
#define CLONE_NEWNET 0x40000000 /* New network namespace */
|
||||
#endif
|
||||
|
||||
static int lxcStartup(void);
|
||||
static int lxcShutdown(void);
|
||||
static lxc_driver_t *lxc_driver = NULL;
|
||||
|
||||
/* Functions */
|
||||
static int lxcDummyChild( void *argv ATTRIBUTE_UNUSED )
|
||||
{
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static int lxcCheckContainerSupport(int extra_flags)
|
||||
{
|
||||
int rc = 0;
|
||||
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
|
||||
CLONE_NEWIPC|SIGCHLD|extra_flags;
|
||||
int cpid;
|
||||
char *childStack;
|
||||
char *stack;
|
||||
int childStatus;
|
||||
|
||||
if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
|
||||
DEBUG0("Unable to allocate stack");
|
||||
rc = -1;
|
||||
goto check_complete;
|
||||
}
|
||||
|
||||
childStack = stack + (getpagesize() * 4);
|
||||
|
||||
cpid = clone(lxcDummyChild, childStack, flags, NULL);
|
||||
if ((0 > cpid) && (EINVAL == errno)) {
|
||||
DEBUG0("clone call returned EINVAL, container support is not enabled");
|
||||
rc = -1;
|
||||
} else {
|
||||
waitpid(cpid, &childStatus, 0);
|
||||
}
|
||||
|
||||
VIR_FREE(stack);
|
||||
|
||||
check_complete:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const char *lxcProbe(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
if (0 == lxcCheckContainerSupport(0)) {
|
||||
if (lxcContainerAvailable(0) < 0)
|
||||
return NULL;
|
||||
|
||||
return("lxc:///");
|
||||
}
|
||||
#endif
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
static virDrvOpenStatus lxcOpen(virConnectPtr conn,
|
||||
xmlURIPtr uri,
|
||||
@ -559,89 +501,6 @@ static int lxcCleanupInterfaces(const lxc_vm_t *vm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcSendContainerContinue:
|
||||
* @monitor: FD for communicating with child
|
||||
*
|
||||
* Sends the continue message via the socket pair stored in the vm
|
||||
* structure.
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcSendContainerContinue(virConnectPtr conn,
|
||||
int monitor)
|
||||
{
|
||||
int rc = -1;
|
||||
lxc_message_t msg = LXC_CONTINUE_MSG;
|
||||
int writeCount = 0;
|
||||
|
||||
writeCount = safewrite(monitor, &msg, sizeof(msg));
|
||||
if (writeCount != sizeof(msg)) {
|
||||
lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("unable to send container continue message: %s"),
|
||||
strerror(errno));
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
error_out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcStartContainer:
|
||||
* @conn: pointer to connection
|
||||
* @driver: pointer to driver structure
|
||||
* @vm: pointer to virtual machine structure
|
||||
*
|
||||
* Starts a container process by calling clone() with the namespace flags
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcStartContainer(virConnectPtr conn,
|
||||
lxc_driver_t* driver,
|
||||
lxc_vm_t *vm,
|
||||
int monitor,
|
||||
char *ttyPath)
|
||||
{
|
||||
int rc = -1;
|
||||
int flags;
|
||||
int stacksize = getpagesize() * 4;
|
||||
char *stack, *stacktop;
|
||||
lxc_child_argv_t args = { vm->def, monitor, ttyPath };
|
||||
|
||||
/* allocate a stack for the container */
|
||||
if (VIR_ALLOC_N(stack, stacksize) < 0) {
|
||||
lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
|
||||
_("unable to allocate container stack"));
|
||||
goto error_exit;
|
||||
}
|
||||
stacktop = stack + stacksize;
|
||||
|
||||
flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
|
||||
|
||||
if (vm->def->nets != NULL)
|
||||
flags |= CLONE_NEWNET;
|
||||
|
||||
vm->def->id = clone(lxcChild, stacktop, flags, &args);
|
||||
|
||||
DEBUG("clone() returned, %d", vm->def->id);
|
||||
|
||||
if (vm->def->id < 0) {
|
||||
lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("clone() failed, %s"), strerror(errno));
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
lxcSaveConfig(NULL, driver, vm, vm->def);
|
||||
|
||||
rc = 0;
|
||||
|
||||
error_exit:
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* lxcOpenTty:
|
||||
@ -716,170 +575,6 @@ cleanup:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcFdForward:
|
||||
* @readFd: file descriptor to read
|
||||
* @writeFd: file desriptor to write
|
||||
*
|
||||
* Reads 1 byte of data from readFd and writes to writeFd.
|
||||
*
|
||||
* Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
|
||||
*/
|
||||
static int lxcFdForward(int readFd, int writeFd)
|
||||
{
|
||||
int rc = -1;
|
||||
char buf[2];
|
||||
|
||||
if (1 != (saferead(readFd, buf, 1))) {
|
||||
if (EAGAIN == errno) {
|
||||
rc = EAGAIN;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("read of fd %d failed: %s"), readFd, strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (1 != (safewrite(writeFd, buf, 1))) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("write to fd %d failed: %s"), writeFd, strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
cleanup:
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef struct _lxcTtyForwardFd_t {
|
||||
int fd;
|
||||
bool active;
|
||||
} lxcTtyForwardFd_t;
|
||||
|
||||
/**
|
||||
* lxcTtyForward:
|
||||
* @fd1: Open fd
|
||||
* @fd1: Open fd
|
||||
*
|
||||
* Forwards traffic between fds. Data read from fd1 will be written to fd2
|
||||
* This process loops forever.
|
||||
* This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
|
||||
* events when the user disconnects the virsh console via ctrl-]
|
||||
*
|
||||
* Returns 0 on success or -1 in case of error
|
||||
*/
|
||||
static int lxcTtyForward(int fd1, int fd2)
|
||||
{
|
||||
int rc = -1;
|
||||
int epollFd;
|
||||
struct epoll_event epollEvent;
|
||||
int numEvents;
|
||||
int numActive = 0;
|
||||
lxcTtyForwardFd_t fdArray[2];
|
||||
int timeout = -1;
|
||||
int curFdOff = 0;
|
||||
int writeFdOff = 0;
|
||||
|
||||
fdArray[0].fd = fd1;
|
||||
fdArray[0].active = false;
|
||||
fdArray[1].fd = fd2;
|
||||
fdArray[1].active = false;
|
||||
|
||||
/* create the epoll fild descriptor */
|
||||
epollFd = epoll_create(2);
|
||||
if (0 > epollFd) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_create(2) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the file descriptors the epoll fd */
|
||||
memset(&epollEvent, 0x00, sizeof(epollEvent));
|
||||
epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */
|
||||
epollEvent.data.fd = fd1;
|
||||
epollEvent.data.u32 = 0; /* fdArray position */
|
||||
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd1, &epollEvent)) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_ctl(fd1) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
epollEvent.data.fd = fd2;
|
||||
epollEvent.data.u32 = 1; /* fdArray position */
|
||||
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd2, &epollEvent)) {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_ctl(fd2) failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* if active fd's, return if no events, else wait forever */
|
||||
timeout = (numActive > 0) ? 0 : -1;
|
||||
numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
|
||||
if (0 < numEvents) {
|
||||
if (epollEvent.events & EPOLLIN) {
|
||||
curFdOff = epollEvent.data.u32;
|
||||
if (!fdArray[curFdOff].active) {
|
||||
fdArray[curFdOff].active = true;
|
||||
++numActive;
|
||||
}
|
||||
|
||||
} else if (epollEvent.events & EPOLLHUP) {
|
||||
DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
|
||||
continue;
|
||||
} else {
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("error event %d"), epollEvent.events);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
} else if (0 == numEvents) {
|
||||
if (2 == numActive) {
|
||||
/* both fds active, toggle between the two */
|
||||
curFdOff ^= 1;
|
||||
} else {
|
||||
/* only one active, if current is active, use it, else it */
|
||||
/* must be the other one (ie. curFd just went inactive) */
|
||||
curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (EINTR == errno) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* error */
|
||||
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||
_("epoll_wait() failed: %s"), strerror(errno));
|
||||
goto cleanup;
|
||||
|
||||
}
|
||||
|
||||
if (0 < numActive) {
|
||||
writeFdOff = curFdOff ^ 1;
|
||||
rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
|
||||
|
||||
if (EAGAIN == rc) {
|
||||
/* this fd no longer has data, set it as inactive */
|
||||
--numActive;
|
||||
fdArray[curFdOff].active = false;
|
||||
} else if (-1 == rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
cleanup:
|
||||
close(fd1);
|
||||
close(fd2);
|
||||
close(epollFd);
|
||||
exit(rc);
|
||||
}
|
||||
|
||||
/**
|
||||
* lxcVmStart:
|
||||
@ -921,7 +616,7 @@ static int lxcVmStart(virConnectPtr conn,
|
||||
|
||||
if (vm->pid == 0) {
|
||||
/* child process calls forward routine */
|
||||
lxcTtyForward(parentTty, containerTty);
|
||||
lxcControllerMain(parentTty, containerTty);
|
||||
}
|
||||
|
||||
if (lxcStoreTtyPid(driver, vm)) {
|
||||
@ -945,17 +640,19 @@ static int lxcVmStart(virConnectPtr conn,
|
||||
|
||||
/* check this rc */
|
||||
|
||||
rc = lxcStartContainer(conn, driver, vm,
|
||||
vm->def->id = lxcContainerStart(conn,
|
||||
vm->def,
|
||||
sockpair[1],
|
||||
containerTtyPath);
|
||||
if (rc != 0)
|
||||
if (vm->def->id == -1)
|
||||
goto cleanup;
|
||||
lxcSaveConfig(conn, driver, vm, vm->def);
|
||||
|
||||
rc = lxcMoveInterfacesToNetNs(conn, vm);
|
||||
if (rc != 0)
|
||||
goto cleanup;
|
||||
|
||||
rc = lxcSendContainerContinue(conn, sockpair[0]);
|
||||
rc = lxcContainerSendContinue(conn, sockpair[0]);
|
||||
if (rc != 0)
|
||||
goto cleanup;
|
||||
|
||||
@ -1196,16 +893,15 @@ static int lxcCheckNetNsSupport(void)
|
||||
{
|
||||
const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL};
|
||||
int ip_rc;
|
||||
int user_netns = 0;
|
||||
int kern_netns = 0;
|
||||
|
||||
if (virRun(NULL, argv, &ip_rc) == 0)
|
||||
user_netns = WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255);
|
||||
if (virRun(NULL, argv, &ip_rc) < 0 ||
|
||||
!(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255)))
|
||||
return 0;
|
||||
|
||||
if (lxcCheckContainerSupport(CLONE_NEWNET) == 0)
|
||||
kern_netns = 1;
|
||||
if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0)
|
||||
return 0;
|
||||
|
||||
return kern_netns && user_netns;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int lxcStartup(void)
|
||||
@ -1222,9 +918,8 @@ static int lxcStartup(void)
|
||||
}
|
||||
|
||||
/* Check that this is a container enabled kernel */
|
||||
if(0 != lxcCheckContainerSupport(0)) {
|
||||
if(lxcContainerAvailable(0) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
lxc_driver->have_netns = lxcCheckNetNsSupport();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user