Make LXC container startup/shutdown/I/O more robust

The current LXC I/O controller looks for HUP to detect
when a guest has quit. This isn't reliable as during
initial bootup it is possible that 'init' will close
the console and let mingetty re-open it. The shutdown
of containers was also flakey because it only killed
the libvirt I/O controller and expected container
processes to gracefully follow.

Change the I/O controller such that when it see HUP
or an I/O error, it uses kill($PID, 0) to see if the
process has really quit.

Change the container shutdown sequence to use the
virCgroupKillPainfully function to ensure every
really goes away

This change makes the use of the 'cpu', 'devices'
and 'memory' cgroups controllers compulsory with
LXC

* docs/drvlxc.html.in: Document that certain cgroups
  controllers are now mandatory
* src/lxc/lxc_controller.c: Check if PID is still
  alive before quitting on I/O error/HUP
* src/lxc/lxc_driver.c: Use virCgroupKillPainfully
This commit is contained in:
Daniel P. Berrange 2011-02-22 17:35:06 +00:00
parent b16f47ab61
commit 4e3117ae50
3 changed files with 113 additions and 104 deletions

View File

@ -9,6 +9,28 @@ light-weight "application container" which does not have it's own root image. Y
start it using
</p>
<h2>Cgroups Requirements</h2>
<p>
The libvirt LXC driver requires that certain cgroups controllers are
mounted on the host OS. The minimum required controllers are 'cpuacct',
'memory' and 'devices', while recommended extra controllers are
'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf &amp; cgconfig
init service used to mount cgroups at host boot time. To manually
mount them use:
</p>
<pre>
# mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
</pre>
<p>
NB, the blkio controller in some kernels will not allow creation of nested
sub-directories which will prevent correct operation of the libvirt LXC
driver. On such kernels, it may be neccessary to unmount the blkio controller.
</p>
<h3>Example config version 1</h3>
<p></p>
<pre>

View File

@ -33,6 +33,7 @@
#include <sys/personality.h>
#include <unistd.h>
#include <paths.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <getopt.h>
@ -121,12 +122,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
virReportSystemError(-rc,
_("Unable to set memory limit for domain %s"),
def->name);
/* Don't fail if we can't set memory due to lack of kernel support */
if (rc != -ENOENT)
goto cleanup;
goto cleanup;
}
if(def->mem.hard_limit) {
if (def->mem.hard_limit) {
rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
if (rc != 0) {
virReportSystemError(-rc,
@ -136,7 +135,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
}
}
if(def->mem.soft_limit) {
if (def->mem.soft_limit) {
rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
if (rc != 0) {
virReportSystemError(-rc,
@ -146,7 +145,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
}
}
if(def->mem.swap_hard_limit) {
if (def->mem.swap_hard_limit) {
rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
if (rc != 0) {
virReportSystemError(-rc,
@ -327,6 +326,18 @@ ignorable_epoll_accept_errno(int errnum)
|| errnum == EWOULDBLOCK);
}
static bool
lxcPidGone(pid_t container)
{
waitpid(container, NULL, WNOHANG);
if (kill(container, 0) < 0 &&
errno == ESRCH)
return true;
return false;
}
/**
* lxcControllerMain
* @monitor: server socket fd to accept client requests
@ -344,7 +355,8 @@ ignorable_epoll_accept_errno(int errnum)
static int lxcControllerMain(int monitor,
int client,
int appPty,
int contPty)
int contPty,
pid_t container)
{
int rc = -1;
int epollFd;
@ -450,7 +462,13 @@ static int lxcControllerMain(int monitor,
++numActive;
}
} else if (epollEvent.events & EPOLLHUP) {
VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
if (lxcPidGone(container))
goto cleanup;
curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
if (fdArray[curFdOff].active) {
fdArray[curFdOff].active = 0;
--numActive;
}
continue;
} else {
lxcError(VIR_ERR_INTERNAL_ERROR,
@ -489,7 +507,9 @@ static int lxcControllerMain(int monitor,
--numActive;
fdArray[curFdOff].active = 0;
} else if (-1 == rc) {
goto cleanup;
if (lxcPidGone(container))
goto cleanup;
continue;
}
}
@ -587,7 +607,7 @@ lxcControllerRun(virDomainDefPtr def,
int rc = -1;
int control[2] = { -1, -1};
int containerPty = -1;
char *containerPtyPath;
char *containerPtyPath = NULL;
pid_t container = -1;
virDomainFSDefPtr root;
char *devpts = NULL;
@ -709,7 +729,7 @@ lxcControllerRun(virDomainDefPtr def,
if (lxcControllerClearCapabilities() < 0)
goto cleanup;
rc = lxcControllerMain(monitor, client, appPty, containerPty);
rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
cleanup:
VIR_FREE(devptmx);

View File

@ -952,36 +952,16 @@ cleanup:
* @driver: pointer to driver structure
* @vm: pointer to VM to clean up
*
* waitpid() on the container process. kill and wait the tty process
* This is called by both lxcDomainDestroy and lxcSigHandler when a
* container exits.
* Cleanout resources associated with the now dead VM
*
* Returns 0 on success or -1 in case of error
*/
static int lxcVmCleanup(lxc_driver_t *driver,
static void lxcVmCleanup(lxc_driver_t *driver,
virDomainObjPtr vm)
{
int rc = 0;
int waitRc;
int childStatus = -1;
virCgroupPtr cgroup;
int i;
lxcDomainObjPrivatePtr priv = vm->privateData;
while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
errno == EINTR)
; /* empty */
if ((waitRc != vm->pid) && (errno != ECHILD)) {
virReportSystemError(errno,
_("waitpid failed to wait for container %d: %d"),
vm->pid, waitRc);
rc = -1;
} else if (WIFEXITED(childStatus)) {
VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
rc = -1;
}
/* now that we know it's stopped call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
@ -1021,8 +1001,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
vm->def->id = -1;
vm->newDef = NULL;
}
return rc;
}
/**
@ -1181,11 +1159,10 @@ error:
static int lxcVmTerminate(lxc_driver_t *driver,
virDomainObjPtr vm,
int signum)
virDomainObjPtr vm)
{
if (signum == 0)
signum = SIGINT;
virCgroupPtr group = NULL;
int rc;
if (vm->pid <= 0) {
lxcError(VIR_ERR_INTERNAL_ERROR,
@ -1193,18 +1170,29 @@ static int lxcVmTerminate(lxc_driver_t *driver,
return -1;
}
if (kill(vm->pid, signum) < 0) {
if (errno != ESRCH) {
virReportSystemError(errno,
_("Failed to kill pid %d"),
vm->pid);
return -1;
}
if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
return -1;
rc = virCgroupKillPainfully(group);
if (rc < 0) {
virReportSystemError(-rc, "%s",
_("Failed to kill container PIDs"));
rc = -1;
goto cleanup;
}
if (rc == 1) {
lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Some container PIDs refused to die"));
rc = -1;
goto cleanup;
}
lxcVmCleanup(driver, vm);
vm->state = VIR_DOMAIN_SHUTDOWN;
rc = 0;
return lxcVmCleanup(driver, vm);
cleanup:
virCgroupFree(&group);
return rc;
}
static void lxcMonitorEvent(int watch,
@ -1228,7 +1216,7 @@ static void lxcMonitorEvent(int watch,
goto cleanup;
}
if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
if (lxcVmTerminate(driver, vm) < 0) {
virEventRemoveHandle(watch);
} else {
event = virDomainEventNewFromObj(vm,
@ -1473,6 +1461,31 @@ static int lxcVmStart(virConnectPtr conn,
char **veths = NULL;
lxcDomainObjPrivatePtr priv = vm->privateData;
if (!lxc_driver->cgroup) {
lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
_("The 'cpuacct', 'devices' & 'memory' cgroups controllers must be mounted"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_CPUACCT)) {
lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'cpuacct' cgroups controller mount"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_DEVICES)) {
lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'devices' cgroups controller mount"));
return -1;
}
if (!virCgroupMounted(lxc_driver->cgroup,
VIR_CGROUP_CONTROLLER_MEMORY)) {
lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to find 'memory' cgroups controller mount"));
return -1;
}
if ((r = virFileMakePath(driver->logDir)) != 0) {
virReportSystemError(r,
_("Cannot create log directory '%s'"),
@ -1543,7 +1556,7 @@ static int lxcVmStart(virConnectPtr conn,
VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
lxcMonitorEvent,
vm, NULL)) < 0) {
lxcVmTerminate(driver, vm, 0);
lxcVmTerminate(driver, vm);
goto cleanup;
}
@ -1711,55 +1724,6 @@ cleanup:
return dom;
}
/**
* lxcDomainShutdown:
* @dom: pointer to domain to shutdown
*
* Sends SIGINT to container root process to request it to shutdown
*
* Returns 0 on success or -1 in case of error
*/
static int lxcDomainShutdown(virDomainPtr dom)
{
lxc_driver_t *driver = dom->conn->privateData;
virDomainObjPtr vm;
virDomainEventPtr event = NULL;
int ret = -1;
lxcDriverLock(driver);
vm = virDomainFindByUUID(&driver->domains, dom->uuid);
if (!vm) {
char uuidstr[VIR_UUID_STRING_BUFLEN];
virUUIDFormat(dom->uuid, uuidstr);
lxcError(VIR_ERR_NO_DOMAIN,
_("No domain with matching uuid '%s'"), uuidstr);
goto cleanup;
}
if (!virDomainObjIsActive(vm)) {
lxcError(VIR_ERR_OPERATION_INVALID,
"%s", _("Domain is not running"));
goto cleanup;
}
ret = lxcVmTerminate(driver, vm, 0);
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
if (!vm->persistent) {
virDomainRemoveInactive(&driver->domains, vm);
vm = NULL;
}
cleanup:
if (vm)
virDomainObjUnlock(vm);
if (event)
lxcDomainEventQueue(driver, event);
lxcDriverUnlock(driver);
return ret;
}
static int
lxcDomainEventRegister(virConnectPtr conn,
@ -1927,7 +1891,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
goto cleanup;
}
ret = lxcVmTerminate(driver, vm, SIGKILL);
ret = lxcVmTerminate(driver, vm);
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@ -2056,7 +2020,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
lxcMonitorEvent,
vm, NULL)) < 0) {
lxcVmTerminate(driver, vm, 0);
lxcVmTerminate(driver, vm);
goto cleanup;
}
} else {
@ -2123,8 +2087,11 @@ static int lxcStartup(int privileged)
rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
if (rc < 0) {
char buf[1024];
VIR_WARN("Unable to create cgroup for driver: %s",
virStrerror(-rc, buf, sizeof(buf)));
VIR_DEBUG("Unable to create cgroup for LXC driver: %s",
virStrerror(-rc, buf, sizeof(buf)));
/* Don't abort startup. We will explicitly report to
* the user when they try to start a VM
*/
}
/* Call function to load lxc driver configuration information */
@ -2844,7 +2811,7 @@ static virDriver lxcDriver = {
lxcDomainLookupByName, /* domainLookupByName */
lxcDomainSuspend, /* domainSuspend */
lxcDomainResume, /* domainResume */
lxcDomainShutdown, /* domainShutdown */
NULL, /* domainShutdown */
NULL, /* domainReboot */
lxcDomainDestroy, /* domainDestroy */
lxcGetOSType, /* domainGetOSType */