2021-10-13 23:21:29 +00:00
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
|
|
|
|
/* PASST - Plug A Simple Socket Transport
|
|
|
|
* for qemu/UNIX domain socket mode
|
|
|
|
*
|
|
|
|
* PASTA - Pack A Subtle Tap Abstraction
|
|
|
|
* for network namespace/tap device mode
|
|
|
|
*
|
|
|
|
* pasta.c - pasta (namespace) specific implementations
|
|
|
|
*
|
|
|
|
* Copyright (c) 2020-2021 Red Hat GmbH
|
|
|
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
|
|
|
*
|
|
|
|
* #syscalls:pasta clone unshare waitid kill execve exit_group rt_sigprocmask
|
2021-10-14 19:48:44 +00:00
|
|
|
* #syscalls:pasta geteuid getdents64 readlink setsid nanosleep clock_nanosleep
|
2021-10-13 23:21:29 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <sched.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <syslog.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <net/ethernet.h>
|
|
|
|
#include <sys/syscall.h>
|
|
|
|
|
|
|
|
#include "util.h"
|
|
|
|
#include "passt.h"
|
|
|
|
#include "netlink.h"
|
|
|
|
|
|
|
|
/* PID of child, in case we created a namespace, and its procfs link */
|
|
|
|
static int pasta_child_pid;
|
|
|
|
static char pasta_child_ns[PATH_MAX];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pasta_ns_cleanup() - Look for processes in namespace, terminate them
|
|
|
|
*/
|
|
|
|
static void pasta_ns_cleanup(void)
|
|
|
|
{
|
|
|
|
char proc_path[PATH_MAX], ns_link[PATH_MAX], buf[BUFSIZ];
|
|
|
|
int recheck = 0, found = 0, waited = 0;
|
|
|
|
int dir_fd, n;
|
|
|
|
|
|
|
|
if (!*pasta_child_ns)
|
|
|
|
return;
|
|
|
|
|
|
|
|
loop:
|
|
|
|
if ((dir_fd = open("/proc", O_RDONLY | O_DIRECTORY)) < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
while ((n = syscall(SYS_getdents64, dir_fd, buf, BUFSIZ)) > 0) {
|
|
|
|
struct dirent *dp = (struct dirent *)buf;
|
|
|
|
int pos = 0;
|
|
|
|
|
|
|
|
while (pos < n) {
|
|
|
|
pid_t pid;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
pid = strtol(dp->d_name, NULL, 0);
|
|
|
|
if (!pid || errno)
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid);
|
|
|
|
if (readlink(proc_path, ns_link, PATH_MAX) < 0)
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) {
|
|
|
|
found = 1;
|
|
|
|
if (waited)
|
|
|
|
kill(pid, SIGKILL);
|
|
|
|
else
|
|
|
|
kill(pid, SIGQUIT);
|
|
|
|
}
|
|
|
|
next:
|
|
|
|
dp = (struct dirent *)(buf + (pos += dp->d_reclen));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close(dir_fd);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (waited) {
|
|
|
|
if (recheck) {
|
|
|
|
info("Some processes in namespace didn't quit");
|
|
|
|
} else {
|
|
|
|
found = 0;
|
|
|
|
recheck = 1;
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
info("Waiting for all processes in namespace to terminate");
|
|
|
|
sleep(1);
|
|
|
|
waited = 1;
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pasta_child_handler() - Exit once shell exits (if we started it), reap clones
|
|
|
|
* @signal: Unused, handler deals with SIGCHLD only
|
|
|
|
*/
|
|
|
|
void pasta_child_handler(int signal)
|
|
|
|
{
|
|
|
|
siginfo_t infop;
|
|
|
|
|
|
|
|
(void)signal;
|
|
|
|
|
|
|
|
if (pasta_child_pid &&
|
|
|
|
!waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
|
|
|
|
if (infop.si_pid == pasta_child_pid) {
|
|
|
|
pasta_ns_cleanup();
|
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
|
|
|
|
waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pasta_wait_for_ns() - Busy loop until we can enter the target namespace
|
|
|
|
* @arg: Execution context
|
|
|
|
*
|
|
|
|
* Return: 0
|
|
|
|
*/
|
|
|
|
static int pasta_wait_for_ns(void *arg)
|
|
|
|
{
|
|
|
|
struct ctx *c = (struct ctx *)arg;
|
|
|
|
char ns[PATH_MAX];
|
|
|
|
|
|
|
|
if (c->netns_only)
|
|
|
|
goto netns;
|
|
|
|
|
|
|
|
snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
|
|
|
|
do
|
|
|
|
while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0);
|
|
|
|
while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd));
|
|
|
|
|
|
|
|
netns:
|
|
|
|
snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
|
|
|
|
do
|
|
|
|
while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0);
|
|
|
|
while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pasta_start_ns() - Fork shell in new namespace if target ns is not given
|
|
|
|
* @c: Execution context
|
|
|
|
*/
|
|
|
|
void pasta_start_ns(struct ctx *c)
|
|
|
|
{
|
|
|
|
char buf[BUFSIZ], *shell, proc_path[PATH_MAX];
|
|
|
|
int euid = geteuid();
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
c->foreground = 1;
|
|
|
|
if (!c->debug)
|
|
|
|
c->quiet = 1;
|
|
|
|
|
|
|
|
if ((pasta_child_pid = fork()) == -1) {
|
|
|
|
perror("fork");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pasta_child_pid) {
|
|
|
|
NS_CALL(pasta_wait_for_ns, c);
|
|
|
|
|
|
|
|
snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net",
|
|
|
|
pasta_child_pid);
|
|
|
|
readlink(proc_path, pasta_child_ns, PATH_MAX);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) {
|
|
|
|
perror("unshare");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!c->netns_only) {
|
|
|
|
snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
|
|
|
|
|
|
|
|
fd = open("/proc/self/uid_map", O_WRONLY);
|
|
|
|
write(fd, buf, strlen(buf));
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
fd = open("/proc/self/setgroups", O_WRONLY);
|
|
|
|
write(fd, "deny", sizeof("deny"));
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
fd = open("/proc/self/gid_map", O_WRONLY);
|
|
|
|
write(fd, buf, strlen(buf));
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
|
|
|
|
write(fd, "0 0", strlen("0 0"));
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
|
|
|
|
if (strstr(shell, "/bash"))
|
|
|
|
execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
|
|
|
|
else
|
|
|
|
execve(shell, ((char *[]) { shell, NULL }), environ);
|
|
|
|
|
|
|
|
perror("execve");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* pasta_ns_conf() - Set up loopback and tap interfaces in namespace as needed
|
|
|
|
* @c: Execution context
|
|
|
|
*/
|
|
|
|
void pasta_ns_conf(struct ctx *c)
|
|
|
|
{
|
2021-10-14 11:05:56 +00:00
|
|
|
nl_link(1, 1 /* lo */, MAC_ZERO, 1, 0);
|
2021-10-13 23:21:29 +00:00
|
|
|
|
|
|
|
if (c->pasta_conf_ns) {
|
2021-10-14 11:05:56 +00:00
|
|
|
nl_link(1, c->pasta_ifi, c->mac_guest, 1, c->mtu);
|
2021-10-13 23:21:29 +00:00
|
|
|
|
|
|
|
if (c->v4) {
|
|
|
|
nl_addr(1, c->pasta_ifi, AF_INET, &c->addr4,
|
|
|
|
__builtin_popcount(c->mask4), NULL);
|
|
|
|
nl_route(1, c->pasta_ifi, AF_INET, &c->gw4);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c->v6) {
|
|
|
|
nl_addr(1, c->pasta_ifi, AF_INET6, &c->addr6, 64, NULL);
|
|
|
|
nl_route(1, c->pasta_ifi, AF_INET6, &c->gw6);
|
|
|
|
}
|
|
|
|
} else {
|
2021-10-14 11:05:56 +00:00
|
|
|
nl_link(1, c->pasta_ifi, c->mac_guest, 0, 0);
|
2021-10-13 23:21:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
proto_update_l2_buf(c->mac_guest, NULL, NULL);
|
|
|
|
}
|