Add helper for running code in separate namespaces

Implement virProcessRunInMountNamespace, which runs callback of type
virProcessNamespaceCallback in a container namespace. This uses a
child process to run the callback, since you can't change the mount
namespace of a thread. This implies that callbacks have to be careful
about what code they run due to async safety rules.

Idea by Dan Berrange, based on an initial report by Reco
<recoverym4n@gmail.com> at
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=732394

Signed-off-by: Daniel Berrange <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
This commit is contained in:
Daniel P. Berrange 2014-01-30 13:11:23 +00:00
parent c321bfc5c3
commit 7c72ef6f55
3 changed files with 118 additions and 0 deletions

View File

@ -1673,6 +1673,7 @@ virProcessGetNamespaces;
virProcessGetStartTime;
virProcessKill;
virProcessKillPainfully;
virProcessRunInMountNamespace;
virProcessSetAffinity;
virProcessSetMaxFiles;
virProcessSetMaxMemLock;

View File

@ -50,6 +50,7 @@
#include "virlog.h"
#include "virutil.h"
#include "virstring.h"
#include "vircommand.h"
#define VIR_FROM_THIS VIR_FROM_NONE
@ -877,3 +878,108 @@ int virProcessGetStartTime(pid_t pid,
return 0;
}
#endif
#ifdef HAVE_SETNS
static int virProcessNamespaceHelper(int errfd,
pid_t pid,
virProcessNamespaceCallback cb,
void *opaque)
{
char *path;
int fd = -1;
int ret = -1;
if (virAsprintf(&path, "/proc/%llu/ns/mnt", (unsigned long long)pid) < 0)
goto cleanup;
if ((fd = open(path, O_RDONLY)) < 0) {
virReportSystemError(errno, "%s",
_("Kernel does not provide mount namespace"));
goto cleanup;
}
if (setns(fd, 0) < 0) {
virReportSystemError(errno, "%s",
_("Unable to enter mount namespace"));
goto cleanup;
}
ret = cb(pid, opaque);
cleanup:
if (ret < 0) {
virErrorPtr err = virGetLastError();
if (err) {
size_t len = strlen(err->message) + 1;
ignore_value(safewrite(errfd, err->message, len));
}
}
VIR_FREE(path);
VIR_FORCE_CLOSE(fd);
return ret;
}
/* Run cb(opaque) in the mount namespace of pid. Return -1 with error
* message raised if we fail to run the child, if the child dies from
* a signal, or if the child has status 1; otherwise return the exit
* status of the child. The callback will be run in a child process
* so must be careful to only use async signal safe functions.
*/
int
virProcessRunInMountNamespace(pid_t pid,
virProcessNamespaceCallback cb,
void *opaque)
{
int ret = -1;
pid_t child = -1;
int errfd[2] = { -1, -1 };
if (pipe(errfd) < 0) {
virReportSystemError(errno, "%s",
_("Cannot create pipe for child"));
return -1;
}
ret = virFork(&child);
if (ret < 0 || child < 0) {
if (child == 0)
_exit(1);
/* parent */
virProcessAbort(child);
goto cleanup;
}
if (child == 0) {
VIR_FORCE_CLOSE(errfd[0]);
ret = virProcessNamespaceHelper(errfd[1], pid,
cb, opaque);
VIR_FORCE_CLOSE(errfd[1]);
_exit(ret < 0 ? 1 : 0);
} else {
char *buf = NULL;
VIR_FORCE_CLOSE(errfd[1]);
ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf));
ret = virProcessWait(child, NULL);
VIR_FREE(buf);
}
cleanup:
VIR_FORCE_CLOSE(errfd[0]);
VIR_FORCE_CLOSE(errfd[1]);
return ret;
}
#else /* !HAVE_SETNS */
int
virProcessRunInMountNamespace(pid_t pid ATTRIBUTE_UNUSED,
virProcessNamespaceCallback cb ATTRIBUTE_UNUSED,
void *opaque ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Mount namespaces are not available on this platform"));
return -1;
}
#endif

View File

@ -60,4 +60,15 @@ int virProcessSetNamespaces(size_t nfdlist,
int virProcessSetMaxMemLock(pid_t pid, unsigned long long bytes);
int virProcessSetMaxProcesses(pid_t pid, unsigned int procs);
int virProcessSetMaxFiles(pid_t pid, unsigned int files);
/* Callback to run code within the mount namespace tied to the given
* pid. This function must use only async-signal-safe functions, as
* it gets run after a fork of a multi-threaded process. The return
* value of this function is passed to _exit(), except that a
* negative value is treated as an error. */
typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque);
int virProcessRunInMountNamespace(pid_t pid,
virProcessNamespaceCallback cb,
void *opaque);
#endif /* __VIR_PROCESS_H__ */