CVE-2013-6456: Avoid unsafe use of /proc/$PID/root in LXC disk hotplug

Rewrite lxcDomainAttachDeviceDiskLive function to use the
virProcessRunInMountNamespace helper. This avoids risk of
a malicious guest replacing /dev with a absolute symlink,
tricking the driver into changing the host OS filesystem.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
(cherry picked from commit 4dd3a7d5bc44980135a1b11810ba9aeab42a4a59)

Conflicts:
	src/lxc/lxc_driver.c: OOM + cgroups error reporting and
        remove usernamespace integration
This commit is contained in:
Daniel P. Berrange 2014-01-30 15:59:20 +00:00
parent bd9ec4506e
commit b2a853e1f6

View File

@ -3116,6 +3116,111 @@ cleanup:
}
struct lxcDomainAttachDeviceMknodData {
virLXCDriverPtr driver;
mode_t mode;
dev_t dev;
virDomainObjPtr vm;
virDomainDeviceDefPtr def;
char *file;
};
static int
lxcDomainAttachDeviceMknodHelper(pid_t pid ATTRIBUTE_UNUSED,
void *opaque)
{
struct lxcDomainAttachDeviceMknodData *data = opaque;
int ret = -1;
virSecurityManagerPostFork(data->driver->securityManager);
if (virFileMakeParentPath(data->file) < 0) {
virReportSystemError(errno,
_("Unable to create %s"), data->file);
goto cleanup;
}
/* Yes, the device name we're creating may not
* actually correspond to the major:minor number
* we're using, but we've no other option at this
* time. Just have to hope that containerized apps
* don't get upset that the major:minor is different
* to that normally implied by the device name
*/
VIR_DEBUG("Creating dev %s (%d,%d)",
data->file, major(data->dev), minor(data->dev));
if (mknod(data->file, data->mode, data->dev) < 0) {
virReportSystemError(errno,
_("Unable to create device %s"),
data->file);
goto cleanup;
}
/* Labelling normally operates on src, but we need
* to actually label the dst here, so hack the config */
switch (data->def->type) {
case VIR_DOMAIN_DEVICE_DISK: {
virDomainDiskDefPtr def = data->def->data.disk;
char *tmpsrc = def->src;
def->src = data->file;
if (virSecurityManagerSetImageLabel(data->driver->securityManager,
data->vm->def, def) < 0) {
def->src = tmpsrc;
goto cleanup;
}
def->src = tmpsrc;
} break;
default:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unexpected device type %d"),
data->def->type);
goto cleanup;
}
ret = 0;
cleanup:
if (ret < 0)
unlink(data->file);
return ret;
}
static int
lxcDomainAttachDeviceMknod(virLXCDriverPtr driver,
mode_t mode,
dev_t dev,
virDomainObjPtr vm,
virDomainDeviceDefPtr def,
char *file)
{
virLXCDomainObjPrivatePtr priv = vm->privateData;
struct lxcDomainAttachDeviceMknodData data;
memset(&data, 0, sizeof(data));
data.driver = driver;
data.mode = mode;
data.dev = dev;
data.vm = vm;
data.def = def;
data.file = file;
virSecurityManagerPreFork(driver->securityManager);
if (virProcessRunInMountNamespace(priv->initpid,
lxcDomainAttachDeviceMknodHelper,
&data) < 0) {
virSecurityManagerPostFork(driver->securityManager);
return -1;
}
virSecurityManagerPostFork(driver->securityManager);
return 0;
}
static int
lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
virDomainObjPtr vm,
@ -3124,11 +3229,9 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
virLXCDomainObjPrivatePtr priv = vm->privateData;
virDomainDiskDefPtr def = dev->data.disk;
int ret = -1;
char *dst = NULL;
struct stat sb;
bool created = false;
mode_t mode = 0;
char *tmpsrc = def->src;
char *file = NULL;
int perms;
if (!priv->initpid) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
@ -3172,51 +3275,49 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
goto cleanup;
}
if (virAsprintf(&dst, "/proc/%llu/root/dev/%s",
(unsigned long long)priv->initpid, def->dst) < 0) {
virReportOOMError();
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("devices cgroup isn't mounted"));
goto cleanup;
}
if (VIR_REALLOC_N(vm->def->disks, vm->def->ndisks+1) < 0) {
virReportOOMError();
goto cleanup;
}
perms = (def->readonly ?
VIR_CGROUP_DEVICE_READ :
VIR_CGROUP_DEVICE_RW) |
VIR_CGROUP_DEVICE_MKNOD;
mode = 0700 | S_IFBLK;
/* Yes, the device name we're creating may not
* actually correspond to the major:minor number
* we're using, but we've no other option at this
* time. Just have to hope that containerized apps
* don't get upset that the major:minor is different
* to that normally implied by the device name
*/
VIR_DEBUG("Creating dev %s (%d,%d) from %s",
dst, major(sb.st_rdev), minor(sb.st_rdev), def->src);
if (mknod(dst, mode, sb.st_rdev) < 0) {
virReportSystemError(errno,
_("Unable to create device %s"),
dst);
goto cleanup;
}
created = true;
/* Labelling normally operates on src, but we need
* to actually label the dst here, so hack the config */
def->src = dst;
if (virSecurityManagerSetImageLabel(driver->securityManager,
vm->def, def) < 0)
goto cleanup;
if (virCgroupAllowDevicePath(priv->cgroup, def->src,
(def->readonly ?
VIR_CGROUP_DEVICE_READ :
VIR_CGROUP_DEVICE_RW) |
VIR_CGROUP_DEVICE_MKNOD) != 0) {
if (virCgroupAllowDevice(priv->cgroup,
'b',
major(sb.st_rdev),
minor(sb.st_rdev),
perms) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot allow device %s for domain %s"),
def->src, vm->def->name);
_("Unable to allow device %s"), def->src);
goto cleanup;
}
if (VIR_REALLOC_N(vm->def->disks, vm->def->ndisks + 1) < 0)
goto cleanup;
if (virAsprintf(&file,
"/dev/%s", def->dst) < 0) {
virReportOOMError();
goto cleanup;
}
if (lxcDomainAttachDeviceMknod(driver,
0700 | S_IFBLK,
sb.st_rdev,
vm,
dev,
file) < 0) {
if (virCgroupDenyDevice(priv->cgroup,
'b',
major(sb.st_rdev),
minor(sb.st_rdev),
perms) < 0)
VIR_WARN("cannot deny device %s for domain %s",
def->src, vm->def->name);
goto cleanup;
}
@ -3225,11 +3326,8 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
ret = 0;
cleanup:
def->src = tmpsrc;
virDomainAuditDisk(vm, NULL, def->src, "attach", ret == 0);
if (dst && created && ret < 0)
unlink(dst);
VIR_FREE(dst);
VIR_FREE(file);
return ret;
}