lxc: allow to keep or drop capabilities

Added <capabilities> in the <features> section of LXC domains
configuration. This section can contain elements named after the
capabilities like:

  <mknod state="on"/>, keep CAP_MKNOD capability
  <sys_chroot state="off"/> drop CAP_SYS_CHROOT capability

Users can restrict or give more capabilities than the default using
this mechanism.
This commit is contained in:
Cédric Bosdonnat 2014-07-18 10:02:29 +02:00 committed by Gao feng
parent a86b6215a7
commit 47e5b5ae32
10 changed files with 755 additions and 20 deletions

View File

@ -540,6 +540,53 @@ debootstrap, whatever) under /opt/vm-1-root:
&lt;/domain&gt; &lt;/domain&gt;
</pre> </pre>
<h2><a name="capabilities">Altering the available capabilities</a></h2>
<p>
By default the libvirt LXC driver drops some capabilities among which CAP_MKNOD.
However <span class="since">since 1.2.6</span> libvirt can be told to keep or
drop some capabilities using a domain configuration like the following:
</p>
<pre>
...
&lt;features&gt;
&lt;capabilities policy='default'&gt;
&lt;mknod state='on'/&gt;
&lt;sys_chroot state='off'/&gt;
&lt;/capabilities&gt;
&lt;/features&gt;
...
</pre>
<p>
The capabilities children elements are named after the capabilities as defined in
<code>man 7 capabilities</code>. An <code>off</code> state tells libvirt to drop the
capability, while an <code>on</code> state will force to keep the capability even though
this one is dropped by default.
</p>
<p>
The <code>policy</code> attribute can be one of <code>default</code>, <code>allow</code>
or <code>deny</code>. It defines the default rules for capabilities: either keep the
default behavior that is dropping a few selected capabilities, or keep all capabilities
or drop all capabilities. The interest of <code>allow</code> and <code>deny</code> is that
they guarantee that all capabilities will be kept (or removed) even if new ones are added
later.
</p>
<p>
The following example, drops all capabilities but CAP_MKNOD:
</p>
<pre>
...
&lt;features&gt;
&lt;capabilities policy='deny'&gt;
&lt;mknod state='on'/&gt;
&lt;/capabilities&gt;
&lt;/features&gt;
...
</pre>
<p>
Note that allowing capabilities that are normally dropped by default can seriously
affect the security of the container and the host.
</p>
<h2><a name="usage">Container usage / management</a></h2> <h2><a name="usage">Container usage / management</a></h2>

View File

@ -3815,6 +3815,9 @@
<empty/> <empty/>
</element> </element>
</optional> </optional>
<optional>
<ref name="capabilities"/>
</optional>
</interleave> </interleave>
</element> </element>
</optional> </optional>
@ -4387,6 +4390,200 @@
</element> </element>
</define> </define>
<!-- Optional capabilities features -->
<define name="capabilities">
<element name="capabilities">
<ref name="capabilitiespolicy"/>
<interleave>
<optional>
<element name="audit_control">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="audit_write">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="block_suspend">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="chown">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="dac_override">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="dac_read_search">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="fowner">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="fsetid">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="ipc_lock">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="ipc_owner">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="kill">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="lease">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="linux_immutable">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="mac_admin">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="mac_override">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="mknod">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="net_admin">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="net_bind_service">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="net_broadcast">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="net_raw">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="setgid">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="setfcap">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="setpcap">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="setuid">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_admin">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_boot">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_chroot">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_module">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_nice">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_pacct">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_ptrace">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_rawio">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_resource">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_time">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="sys_tty_config">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="syslog">
<ref name="featurestate"/>
</element>
</optional>
<optional>
<element name="wake_alarm">
<ref name="featurestate"/>
</element>
</optional>
</interleave>
</element>
</define>
<define name="featurestate"> <define name="featurestate">
<attribute name="state"> <attribute name="state">
<choice> <choice>
@ -4396,6 +4593,16 @@
</attribute> </attribute>
</define> </define>
<define name="capabilitiespolicy">
<attribute name="policy">
<choice>
<value>default</value>
<value>allow</value>
<value>deny</value>
</choice>
</attribute>
</define>
<!-- <!--
Optional hypervisor extensions in their own namespace: Optional hypervisor extensions in their own namespace:
QEmu QEmu

View File

@ -147,18 +147,63 @@ VIR_ENUM_IMPL(virDomainFeature, VIR_DOMAIN_FEATURE_LAST,
"viridian", "viridian",
"privnet", "privnet",
"hyperv", "hyperv",
"pvspinlock") "pvspinlock",
"capabilities")
VIR_ENUM_IMPL(virDomainFeatureState, VIR_DOMAIN_FEATURE_STATE_LAST, VIR_ENUM_IMPL(virDomainFeatureState, VIR_DOMAIN_FEATURE_STATE_LAST,
"default", "default",
"on", "on",
"off") "off")
VIR_ENUM_IMPL(virDomainCapabilitiesPolicy, VIR_DOMAIN_CAPABILITIES_POLICY_LAST,
"default",
"allow",
"deny")
VIR_ENUM_IMPL(virDomainHyperv, VIR_DOMAIN_HYPERV_LAST, VIR_ENUM_IMPL(virDomainHyperv, VIR_DOMAIN_HYPERV_LAST,
"relaxed", "relaxed",
"vapic", "vapic",
"spinlocks") "spinlocks")
VIR_ENUM_IMPL(virDomainCapsFeature, VIR_DOMAIN_CAPS_FEATURE_LAST,
"audit_control",
"audit_write",
"block_suspend",
"chown",
"dac_override",
"dac_read_search",
"fowner",
"fsetid",
"ipc_lock",
"ipc_owner",
"kill",
"lease",
"linux_immutable",
"mac_admin",
"mac_override",
"mknod",
"net_admin",
"net_bind_service",
"net_broadcast",
"net_raw",
"setgid",
"setfcap",
"setpcap",
"setuid",
"sys_admin",
"sys_boot",
"sys_chroot",
"sys_module",
"sys_nice",
"sys_pacct",
"sys_ptrace",
"sys_rawio",
"sys_resource",
"sys_time",
"sys_tty_config",
"syslog",
"wake_alarm")
VIR_ENUM_IMPL(virDomainLifecycle, VIR_DOMAIN_LIFECYCLE_LAST, VIR_ENUM_IMPL(virDomainLifecycle, VIR_DOMAIN_LIFECYCLE_LAST,
"destroy", "destroy",
"restart", "restart",
@ -11863,6 +11908,22 @@ virDomainDefParseXML(xmlDocPtr xml,
def->features[val] = VIR_DOMAIN_FEATURE_STATE_ON; def->features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
break; break;
case VIR_DOMAIN_FEATURE_CAPABILITIES:
node = ctxt->node;
ctxt->node = nodes[i];
if ((tmp = virXPathString("string(./@policy)", ctxt))) {
if ((def->features[val] = virDomainCapabilitiesPolicyTypeFromString(tmp)) == -1) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("unknown state attribute '%s' of feature '%s'"),
tmp, virDomainFeatureTypeToString(val));
goto error;
}
VIR_FREE(tmp);
} else {
def->features[val] = VIR_DOMAIN_FEATURE_STATE_DEFAULT;
}
ctxt->node = node;
break;
case VIR_DOMAIN_FEATURE_PVSPINLOCK: case VIR_DOMAIN_FEATURE_PVSPINLOCK:
node = ctxt->node; node = ctxt->node;
ctxt->node = nodes[i]; ctxt->node = nodes[i];
@ -11971,6 +12032,37 @@ virDomainDefParseXML(xmlDocPtr xml,
ctxt->node = node; ctxt->node = node;
} }
if ((n = virXPathNodeSet("./features/capabilities/*", ctxt, &nodes)) < 0)
goto error;
for (i = 0; i < n; i++) {
int val = virDomainCapsFeatureTypeFromString((const char *)nodes[i]->name);
if (val < 0) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("unexpected capability feature '%s'"), nodes[i]->name);
goto error;
}
if (val >= 0 && val < VIR_DOMAIN_CAPS_FEATURE_LAST) {
node = ctxt->node;
ctxt->node = nodes[i];
if ((tmp = virXPathString("string(./@state)", ctxt))) {
if ((def->caps_features[val] = virDomainFeatureStateTypeFromString(tmp)) == -1) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("unknown state attribute '%s' of feature capability '%s'"),
tmp, virDomainFeatureTypeToString(val));
goto error;
}
VIR_FREE(tmp);
} else {
def->caps_features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
}
ctxt->node = node;
}
}
VIR_FREE(nodes);
if (virDomainEventActionParseXML(ctxt, "on_reboot", if (virDomainEventActionParseXML(ctxt, "on_reboot",
"string(./on_reboot[1])", "string(./on_reboot[1])",
&def->onReboot, &def->onReboot,
@ -17175,6 +17267,19 @@ verify(((VIR_DOMAIN_XML_INTERNAL_STATUS |
VIR_DOMAIN_XML_INTERNAL_CLOCK_ADJUST) VIR_DOMAIN_XML_INTERNAL_CLOCK_ADJUST)
& DUMPXML_FLAGS) == 0); & DUMPXML_FLAGS) == 0);
static bool
virDomainDefHasCapabilitiesFeatures(virDomainDefPtr def)
{
size_t i;
for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
if (def->caps_features[i] != VIR_DOMAIN_FEATURE_STATE_DEFAULT)
return true;
}
return false;
}
/* This internal version can accept VIR_DOMAIN_XML_INTERNAL_*, /* This internal version can accept VIR_DOMAIN_XML_INTERNAL_*,
* whereas the public version cannot. Also, it appends to an existing * whereas the public version cannot. Also, it appends to an existing
* buffer (possibly with auto-indent), rather than flattening to string. * buffer (possibly with auto-indent), rather than flattening to string.
@ -17672,6 +17777,25 @@ virDomainDefFormatInternal(virDomainDefPtr def,
virBufferAddLit(buf, "</hyperv>\n"); virBufferAddLit(buf, "</hyperv>\n");
break; break;
case VIR_DOMAIN_FEATURE_CAPABILITIES:
if (def->features[i] == VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT &&
!virDomainDefHasCapabilitiesFeatures(def))
break;
virBufferAsprintf(buf, "<capabilities policy='%s'>\n",
virDomainCapabilitiesPolicyTypeToString(def->features[i]));
virBufferAdjustIndent(buf, 2);
for (j = 0; j < VIR_DOMAIN_CAPS_FEATURE_LAST; j++) {
if (def->caps_features[j] != VIR_DOMAIN_FEATURE_STATE_DEFAULT)
virBufferAsprintf(buf, "<%s state='%s'/>\n",
virDomainCapsFeatureTypeToString(j),
virDomainFeatureStateTypeToString(
def->caps_features[j]));
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</capabilities>\n");
break;
case VIR_DOMAIN_FEATURE_LAST: case VIR_DOMAIN_FEATURE_LAST:
break; break;
} }

View File

@ -1536,6 +1536,7 @@ typedef enum {
VIR_DOMAIN_FEATURE_PRIVNET, VIR_DOMAIN_FEATURE_PRIVNET,
VIR_DOMAIN_FEATURE_HYPERV, VIR_DOMAIN_FEATURE_HYPERV,
VIR_DOMAIN_FEATURE_PVSPINLOCK, VIR_DOMAIN_FEATURE_PVSPINLOCK,
VIR_DOMAIN_FEATURE_CAPABILITIES,
VIR_DOMAIN_FEATURE_LAST VIR_DOMAIN_FEATURE_LAST
} virDomainFeature; } virDomainFeature;
@ -1556,6 +1557,56 @@ typedef enum {
VIR_DOMAIN_HYPERV_LAST VIR_DOMAIN_HYPERV_LAST
} virDomainHyperv; } virDomainHyperv;
typedef enum {
VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT = 0,
VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW,
VIR_DOMAIN_CAPABILITIES_POLICY_DENY,
VIR_DOMAIN_CAPABILITIES_POLICY_LAST
} virDomainCapabilitiesPolicy;
/* The capabilities are ordered alphabetically to help check for new ones */
typedef enum {
VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL = 0,
VIR_DOMAIN_CAPS_FEATURE_AUDIT_WRITE,
VIR_DOMAIN_CAPS_FEATURE_BLOCK_SUSPEND,
VIR_DOMAIN_CAPS_FEATURE_CHOWN,
VIR_DOMAIN_CAPS_FEATURE_DAC_OVERRIDE,
VIR_DOMAIN_CAPS_FEATURE_DAC_READ_SEARCH,
VIR_DOMAIN_CAPS_FEATURE_FOWNER,
VIR_DOMAIN_CAPS_FEATURE_FSETID,
VIR_DOMAIN_CAPS_FEATURE_IPC_LOCK,
VIR_DOMAIN_CAPS_FEATURE_IPC_OWNER,
VIR_DOMAIN_CAPS_FEATURE_KILL,
VIR_DOMAIN_CAPS_FEATURE_LEASE,
VIR_DOMAIN_CAPS_FEATURE_LINUX_IMMUTABLE,
VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN,
VIR_DOMAIN_CAPS_FEATURE_MAC_OVERRIDE,
VIR_DOMAIN_CAPS_FEATURE_MKNOD,
VIR_DOMAIN_CAPS_FEATURE_NET_ADMIN,
VIR_DOMAIN_CAPS_FEATURE_NET_BIND_SERVICE,
VIR_DOMAIN_CAPS_FEATURE_NET_BROADCAST,
VIR_DOMAIN_CAPS_FEATURE_NET_RAW,
VIR_DOMAIN_CAPS_FEATURE_SETGID,
VIR_DOMAIN_CAPS_FEATURE_SETFCAP,
VIR_DOMAIN_CAPS_FEATURE_SETPCAP,
VIR_DOMAIN_CAPS_FEATURE_SETUID,
VIR_DOMAIN_CAPS_FEATURE_SYS_ADMIN,
VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT,
VIR_DOMAIN_CAPS_FEATURE_SYS_CHROOT,
VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE,
VIR_DOMAIN_CAPS_FEATURE_SYS_NICE,
VIR_DOMAIN_CAPS_FEATURE_SYS_PACCT,
VIR_DOMAIN_CAPS_FEATURE_SYS_PTRACE,
VIR_DOMAIN_CAPS_FEATURE_SYS_RAWIO,
VIR_DOMAIN_CAPS_FEATURE_SYS_RESOURCE,
VIR_DOMAIN_CAPS_FEATURE_SYS_TIME,
VIR_DOMAIN_CAPS_FEATURE_SYS_TTY_CONFIG,
VIR_DOMAIN_CAPS_FEATURE_SYSLOG,
VIR_DOMAIN_CAPS_FEATURE_WAKE_ALARM,
VIR_DOMAIN_CAPS_FEATURE_LAST
} virDomainCapsFeature;
typedef enum { typedef enum {
VIR_DOMAIN_LIFECYCLE_DESTROY, VIR_DOMAIN_LIFECYCLE_DESTROY,
VIR_DOMAIN_LIFECYCLE_RESTART, VIR_DOMAIN_LIFECYCLE_RESTART,
@ -1923,6 +1974,9 @@ struct _virDomainDef {
int hyperv_features[VIR_DOMAIN_HYPERV_LAST]; int hyperv_features[VIR_DOMAIN_HYPERV_LAST];
unsigned int hyperv_spinlocks; unsigned int hyperv_spinlocks;
/* This options are of type virDomainFeatureState: ON = keep, OFF = drop */
int caps_features[VIR_DOMAIN_CAPS_FEATURE_LAST];
virDomainClockDef clock; virDomainClockDef clock;
size_t ngraphics; size_t ngraphics;
@ -2540,6 +2594,8 @@ VIR_ENUM_DECL(virDomainBoot)
VIR_ENUM_DECL(virDomainBootMenu) VIR_ENUM_DECL(virDomainBootMenu)
VIR_ENUM_DECL(virDomainFeature) VIR_ENUM_DECL(virDomainFeature)
VIR_ENUM_DECL(virDomainFeatureState) VIR_ENUM_DECL(virDomainFeatureState)
VIR_ENUM_DECL(virDomainCapabilitiesPolicy)
VIR_ENUM_DECL(virDomainCapsFeature)
VIR_ENUM_DECL(virDomainLifecycle) VIR_ENUM_DECL(virDomainLifecycle)
VIR_ENUM_DECL(virDomainLifecycleCrash) VIR_ENUM_DECL(virDomainLifecycleCrash)
VIR_ENUM_DECL(virDomainPMState) VIR_ENUM_DECL(virDomainPMState)

View File

@ -147,6 +147,8 @@ virDomainBlockedReasonTypeFromString;
virDomainBlockedReasonTypeToString; virDomainBlockedReasonTypeToString;
virDomainBootMenuTypeFromString; virDomainBootMenuTypeFromString;
virDomainBootMenuTypeToString; virDomainBootMenuTypeToString;
virDomainCapabilitiesPolicyTypeToString;
virDomainCapsFeatureTypeToString;
virDomainChrConsoleTargetTypeFromString; virDomainChrConsoleTargetTypeFromString;
virDomainChrConsoleTargetTypeToString; virDomainChrConsoleTargetTypeToString;
virDomainChrDefForeach; virDomainChrDefForeach;
@ -1060,6 +1062,7 @@ virBufferVasprintf;
# util/vircgroup.h # util/vircgroup.h
virCgroupAddTask; virCgroupAddTask;
virCgroupAddTaskController; virCgroupAddTaskController;
virCgroupAllowAllDevices;
virCgroupAllowDevice; virCgroupAllowDevice;
virCgroupAllowDeviceMajor; virCgroupAllowDeviceMajor;
virCgroupAllowDevicePath; virCgroupAllowDevicePath;

View File

@ -353,6 +353,14 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
if (virCgroupDenyAllDevices(cgroup) < 0) if (virCgroupDenyAllDevices(cgroup) < 0)
goto cleanup; goto cleanup;
/* white list mknod if CAP_MKNOD has to be kept */
int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
if (capMknod == VIR_DOMAIN_FEATURE_STATE_ON) {
if (virCgroupAllowAllDevices(cgroup,
VIR_CGROUP_DEVICE_MKNOD) < 0)
goto cleanup;
}
for (i = 0; devices[i].type != 0; i++) { for (i = 0; devices[i].type != 0; i++) {
virLXCCgroupDevicePolicyPtr dev = &devices[i]; virLXCCgroupDevicePolicyPtr dev = &devices[i];
if (virCgroupAllowDevice(cgroup, if (virCgroupAllowDevice(cgroup,

View File

@ -1776,25 +1776,233 @@ static int lxcContainerResolveSymlinks(virDomainDefPtr vmDef)
* host system, since they are not currently "containerized" * host system, since they are not currently "containerized"
*/ */
#if WITH_CAPNG #if WITH_CAPNG
static int lxcContainerDropCapabilities(bool keepReboot)
/* Define capabilities to -1 if those aren't defined in the kernel:
* this will help us ignore them. */
# ifndef CAP_AUDIT_CONTROL
# define CAP_AUDIT_CONTROL -1
# endif
# ifndef CAP_AUDIT_WRITE
# define CAP_AUDIT_WRITE -1
# endif
# ifndef CAP_BLOCK_SUSPEND
# define CAP_BLOCK_SUSPEND -1
# endif
# ifndef CAP_CHOWN
# define CAP_CHOWN -1
# endif
# ifndef CAP_DAC_OVERRIDE
# define CAP_DAC_OVERRIDE -1
# endif
# ifndef CAP_DAC_READ_SEARCH
# define CAP_DAC_READ_SEARCH -1
# endif
# ifndef CAP_FOWNER
# define CAP_FOWNER -1
# endif
# ifndef CAP_FSETID
# define CAP_FSETID -1
# endif
# ifndef CAP_IPC_LOCK
# define CAP_IPC_LOCK -1
# endif
# ifndef CAP_IPC_OWNER
# define CAP_IPC_OWNER -1
# endif
# ifndef CAP_KILL
# define CAP_KILL -1
# endif
# ifndef CAP_LEASE
# define CAP_LEASE -1
# endif
# ifndef CAP_LINUX_IMMUTABLE
# define CAP_LINUX_IMMUTABLE -1
# endif
# ifndef CAP_MAC_ADMIN
# define CAP_MAC_ADMIN -1
# endif
# ifndef CAP_MAC_OVERRIDE
# define CAP_MAC_OVERRIDE -1
# endif
# ifndef CAP_MKNOD
# define CAP_MKNOD -1
# endif
# ifndef CAP_NET_ADMIN
# define CAP_NET_ADMIN -1
# endif
# ifndef CAP_NET_BIND_SERVICE
# define CAP_NET_BIND_SERVICE -1
# endif
# ifndef CAP_NET_BROADCAST
# define CAP_NET_BROADCAST -1
# endif
# ifndef CAP_NET_RAW
# define CAP_NET_RAW -1
# endif
# ifndef CAP_SETGID
# define CAP_SETGID -1
# endif
# ifndef CAP_SETFCAP
# define CAP_SETFCAP -1
# endif
# ifndef CAP_SETPCAP
# define CAP_SETPCAP -1
# endif
# ifndef CAP_SETUID
# define CAP_SETUID -1
# endif
# ifndef CAP_SYS_ADMIN
# define CAP_SYS_ADMIN -1
# endif
# ifndef CAP_SYS_BOOT
# define CAP_SYS_BOOT -1
# endif
# ifndef CAP_SYS_CHROOT
# define CAP_SYS_CHROOT -1
# endif
# ifndef CAP_SYS_MODULE
# define CAP_SYS_MODULE -1
# endif
# ifndef CAP_SYS_NICE
# define CAP_SYS_NICE -1
# endif
# ifndef CAP_SYS_PACCT
# define CAP_SYS_PACCT -1
# endif
# ifndef CAP_SYS_PTRACE
# define CAP_SYS_PTRACE -1
# endif
# ifndef CAP_SYS_RAWIO
# define CAP_SYS_RAWIO -1
# endif
# ifndef CAP_SYS_RESOURCE
# define CAP_SYS_RESOURCE -1
# endif
# ifndef CAP_SYS_TIME
# define CAP_SYS_TIME -1
# endif
# ifndef CAP_SYS_TTY_CONFIG
# define CAP_SYS_TTY_CONFIG -1
# endif
# ifndef CAP_SYSLOG
# define CAP_SYSLOG -1
# endif
# ifndef CAP_WAKE_ALARM
# define CAP_WAKE_ALARM -1
# endif
static int lxcContainerDropCapabilities(virDomainDefPtr def,
bool keepReboot)
{ {
int ret; int ret;
size_t i;
int policy = def->features[VIR_DOMAIN_FEATURE_CAPABILITIES];
/* Maps virDomainCapsFeature to CAPS_* */
static unsigned int capsMapping[] = {CAP_AUDIT_CONTROL,
CAP_AUDIT_WRITE,
CAP_BLOCK_SUSPEND,
CAP_CHOWN,
CAP_DAC_OVERRIDE,
CAP_DAC_READ_SEARCH,
CAP_FOWNER,
CAP_FSETID,
CAP_IPC_LOCK,
CAP_IPC_OWNER,
CAP_KILL,
CAP_LEASE,
CAP_LINUX_IMMUTABLE,
CAP_MAC_ADMIN,
CAP_MAC_OVERRIDE,
CAP_MKNOD,
CAP_NET_ADMIN,
CAP_NET_BIND_SERVICE,
CAP_NET_BROADCAST,
CAP_NET_RAW,
CAP_SETGID,
CAP_SETFCAP,
CAP_SETPCAP,
CAP_SETUID,
CAP_SYS_ADMIN,
CAP_SYS_BOOT,
CAP_SYS_CHROOT,
CAP_SYS_MODULE,
CAP_SYS_NICE,
CAP_SYS_PACCT,
CAP_SYS_PTRACE,
CAP_SYS_RAWIO,
CAP_SYS_RESOURCE,
CAP_SYS_TIME,
CAP_SYS_TTY_CONFIG,
CAP_SYSLOG,
CAP_WAKE_ALARM};
capng_get_caps_process(); capng_get_caps_process();
if ((ret = capng_updatev(CAPNG_DROP, /* Make sure we drop everything if required by the user */
CAPNG_EFFECTIVE | CAPNG_PERMITTED | if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_DENY)
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET, capng_clear(CAPNG_SELECT_BOTH);
CAP_SYS_MODULE, /* No kernel module loading */
CAP_SYS_TIME, /* No changing the clock */ /* Apply all single capabilities changes */
CAP_MKNOD, /* No creating device nodes */ for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
CAP_AUDIT_CONTROL, /* No messing with auditing status */ bool toDrop = false;
CAP_MAC_ADMIN, /* No messing with LSM config */ int state = def->caps_features[i];
keepReboot ? -1 : CAP_SYS_BOOT, /* No use of reboot */
-1)) < 0) { if (!cap_valid(capsMapping[i]))
virReportError(VIR_ERR_INTERNAL_ERROR, continue;
_("Failed to remove capabilities: %d"), ret);
return -1; switch ((virDomainCapabilitiesPolicy) policy) {
case VIR_DOMAIN_CAPABILITIES_POLICY_DENY:
if (state == VIR_DOMAIN_FEATURE_STATE_ON &&
(ret = capng_update(CAPNG_ADD,
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
capsMapping[i])) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to add capability %s: %d"),
virDomainCapsFeatureTypeToString(i), ret);
return -1;
}
break;
case VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT:
switch ((virDomainCapsFeature) i) {
case VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT: /* No use of reboot */
toDrop = !keepReboot && (state != VIR_DOMAIN_FEATURE_STATE_ON);
break;
case VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE: /* No kernel module loading */
case VIR_DOMAIN_CAPS_FEATURE_SYS_TIME: /* No changing the clock */
case VIR_DOMAIN_CAPS_FEATURE_MKNOD: /* No creating device nodes */
case VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL: /* No messing with auditing status */
case VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN: /* No messing with LSM config */
toDrop = (state != VIR_DOMAIN_FEATURE_STATE_ON);
break;
default: /* User specified capabilities to drop */
toDrop = (state == VIR_DOMAIN_FEATURE_STATE_OFF);
}
/* Fallthrough */
case VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW:
if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW)
toDrop = state == VIR_DOMAIN_FEATURE_STATE_OFF;
if (toDrop && (ret = capng_update(CAPNG_DROP,
CAPNG_EFFECTIVE | CAPNG_PERMITTED |
CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
capsMapping[i])) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to remove capability %s: %d"),
virDomainCapsFeatureTypeToString(i), ret);
return -1;
}
break;
default:
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Unsupported capabilities policy: %s"),
virDomainCapabilitiesPolicyTypeToString(policy));
}
} }
if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
@ -1812,7 +2020,8 @@ static int lxcContainerDropCapabilities(bool keepReboot)
return 0; return 0;
} }
#else #else
static int lxcContainerDropCapabilities(bool keepReboot ATTRIBUTE_UNUSED) static int lxcContainerDropCapabilities(virDomainDefPtr def ATTRIBUTE_UNUSED,
bool keepReboot ATTRIBUTE_UNUSED)
{ {
VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities"); VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
return 0; return 0;
@ -1917,7 +2126,7 @@ static int lxcContainerChild(void *data)
} }
/* drop a set of root capabilities */ /* drop a set of root capabilities */
if (lxcContainerDropCapabilities(!!hasReboot) < 0) if (lxcContainerDropCapabilities(vmDef, !!hasReboot) < 0)
goto cleanup; goto cleanup;
if (lxcContainerSendContinue(argv->handshakefd) < 0) { if (lxcContainerSendContinue(argv->handshakefd) < 0) {

View File

@ -2660,14 +2660,45 @@ virCgroupDenyAllDevices(virCgroupPtr group)
"a"); "a");
} }
/**
* virCgroupAllowAllDevices:
*
* Allows the permissiong for all devices by setting lines similar
* to these ones (obviously the 'm' permission is an example):
*
* 'b *:* m'
* 'c *:* m'
*
* @group: The cgroup to allow devices for
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
*
* Returns: 0 on success
*/
int
virCgroupAllowAllDevices(virCgroupPtr group, int perms)
{
int ret = -1;
if (virCgroupAllowDevice(group, 'b', -1, -1, perms) < 0)
goto cleanup;
if (virCgroupAllowDevice(group, 'c', -1, -1, perms) < 0)
goto cleanup;
ret = 0;
cleanup:
return ret;
}
/** /**
* virCgroupAllowDevice: * virCgroupAllowDevice:
* *
* @group: The cgroup to allow a device for * @group: The cgroup to allow a device for
* @type: The device type (i.e., 'c' or 'b') * @type: The device type (i.e., 'c' or 'b')
* @major: The major number of the device * @major: The major number of the device, a negative value means '*'
* @minor: The minor number of the device * @minor: The minor number of the device, a negative value means '*'
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
* *
* Returns: 0 on success * Returns: 0 on success
@ -2678,8 +2709,18 @@ virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor,
{ {
int ret = -1; int ret = -1;
char *devstr = NULL; char *devstr = NULL;
char *majorstr = NULL;
char *minorstr = NULL;
if (virAsprintf(&devstr, "%c %i:%i %s", type, major, minor, if ((major < 0 && VIR_STRDUP(majorstr, "*") < 0) ||
virAsprintf(&majorstr, "%i", major) < 0)
goto cleanup;
if ((minor < 0 && VIR_STRDUP(minorstr, "*") < 0) ||
virAsprintf(&minorstr, "%i", minor) < 0)
goto cleanup;
if (virAsprintf(&devstr, "%c %s:%s %s", type, majorstr, minorstr,
virCgroupGetDevicePermsString(perms)) < 0) virCgroupGetDevicePermsString(perms)) < 0)
goto cleanup; goto cleanup;
@ -2693,6 +2734,8 @@ virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor,
cleanup: cleanup:
VIR_FREE(devstr); VIR_FREE(devstr);
VIR_FREE(majorstr);
VIR_FREE(minorstr);
return ret; return ret;
} }
@ -4232,6 +4275,14 @@ virCgroupGetCpusetCpus(virCgroupPtr group ATTRIBUTE_UNUSED,
return -1; return -1;
} }
int
virCgroupAllowAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED,
int perms ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Control groups not supported on this platform"));
return -1;
}
int int
virCgroupDenyAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED) virCgroupDenyAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED)

View File

@ -177,6 +177,8 @@ const char *virCgroupGetDevicePermsString(int perms);
int virCgroupDenyAllDevices(virCgroupPtr group); int virCgroupDenyAllDevices(virCgroupPtr group);
int virCgroupAllowAllDevices(virCgroupPtr group, int perms);
int virCgroupAllowDevice(virCgroupPtr group, int virCgroupAllowDevice(virCgroupPtr group,
char type, char type,
int major, int major,

View File

@ -0,0 +1,28 @@
<domain type='lxc'>
<name>demo</name>
<uuid>8369f1ac-7e46-e869-4ca5-759d51478066</uuid>
<os>
<type>exe</type>
<init>/sh</init>
</os>
<features>
<capabilities policy="deny">
<mknod state="on"/>
</capabilities>
</features>
<resource>
<partition>/virtualmachines</partition>
</resource>
<memory unit='KiB'>500000</memory>
<devices>
<filesystem type='mount'>
<source dir='/root/container'/>
<target dir='/'/>
</filesystem>
<filesystem type='mount'>
<source dir='/home'/>
<target dir='/home'/>
</filesystem>
<console type='pty'/>
</devices>
</domain>