From 66d5930ec77caed942404ceef4829f2c4ca431bd Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 13 Oct 2021 22:25:03 +0200 Subject: [PATCH] passt, pasta: Add seccomp support List of allowed syscalls comes from comments in the form: #syscalls for syscalls needed both in passt and pasta mode, and: #syscalls:pasta #syscalls:passt for syscalls specifically needed in pasta or passt mode only. seccomp.sh builds a list of BPF statements from those comments, prefixed by a binary search tree to keep lookup fast. While at it, clean up a bit the Makefile using wildcards. Signed-off-by: Stefano Brivio --- Makefile | 17 +++-- conf.c | 2 + igmp.c | 1 + mld.c | 1 + passt.c | 36 +++++++++++ seccomp.sh | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tap.c | 11 ++++ tcp.c | 11 ++++ udp.c | 7 +++ util.c | 2 + 10 files changed, 259 insertions(+), 9 deletions(-) create mode 100755 seccomp.sh diff --git a/Makefile b/Makefile index 9f0e3bf..26b6840 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ CFLAGS += -Wall -Wextra -pedantic CFLAGS += -DRLIMIT_STACK_VAL=$(shell ulimit -s) CFLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE) CFLAGS += -DNETNS_RUN_DIR=\"/run/netns\" +CFLAGS += -DPASST_AUDIT_ARCH=AUDIT_ARCH_$(shell uname -m | tr [a-z] [A-Z]) prefix ?= /usr/local @@ -13,14 +14,12 @@ avx2: clean all static: CFLAGS += -static static: clean all -passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \ - dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \ - netlink.c netlink.h pasta.c pasta.h siphash.c siphash.h tap.c tap.h \ - icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h - $(CC) $(CFLAGS) \ - passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c pasta.c pcap.c \ - ndp.c netlink.c siphash.c tap.c icmp.c tcp.c udp.c util.c \ - -o passt +seccomp.h: *.c $(filter-out seccomp.h,$(wildcard *.h)) + @ ./seccomp.sh + +passt: $(filter-out qrap.c,$(wildcard *.c)) \ + $(filter-out qrap.h,$(wildcard *.h)) seccomp.h + $(CC) $(CFLAGS) $(filter-out qrap.c,$(wildcard *.c)) -o passt pasta: passt ln -s passt pasta @@ -35,7 +34,7 @@ qrap: qrap.c passt.h .PHONY: clean clean: - -${RM} passt *.o qrap pasta pasta.1 passt4netns \ + -${RM} passt *.o seccomp.h qrap pasta pasta.1 passt4netns \ passt.tar passt.tar.gz *.deb *.rpm install: passt pasta qrap diff --git a/conf.c b/conf.c index 6399fbb..e3244aa 100644 --- a/conf.c +++ b/conf.c @@ -10,6 +10,8 @@ * * Copyright (c) 2020-2021 Red Hat GmbH * Author: Stefano Brivio + * + * #syscalls stat */ #define _GNU_SOURCE diff --git a/igmp.c b/igmp.c index 235002d..b92aa0a 100644 --- a/igmp.c +++ b/igmp.c @@ -1 +1,2 @@ /* TO BE IMPLEMENTED */ +__attribute__((__unused__)) static void __(void) { } diff --git a/mld.c b/mld.c index 235002d..b92aa0a 100644 --- a/mld.c +++ b/mld.c @@ -1 +1,2 @@ /* TO BE IMPLEMENTED */ +__attribute__((__unused__)) static void __(void) { } diff --git a/passt.c b/passt.c index b411657..0628d8c 100644 --- a/passt.c +++ b/passt.c @@ -51,7 +51,12 @@ #include #include #include +#include +#include +#include +#include +#include "seccomp.h" #include "util.h" #include "passt.h" #include "dhcp.h" @@ -157,12 +162,41 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, udp_update_l2_buf(eth_d, eth_s, ip_da); } +/** + * seccomp() - Set up seccomp filters depending on mode, won't return on failure + * @c: Execution context + */ +static void seccomp(struct ctx *c) +{ + struct sock_fprog prog; + + if (c->mode == MODE_PASST) { + prog.len = (unsigned short)ARRAY_SIZE(filter_passt); + prog.filter = filter_passt; + } else { + prog.len = (unsigned short)ARRAY_SIZE(filter_pasta); + prog.filter = filter_pasta; + } + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl"); + exit(EXIT_FAILURE); + } +} + /** * main() - Entry point and main loop * @argc: Argument count * @argv: Options, plus optional target PID for pasta mode * * Return: 0 once interrupted, non-zero on failure + * + * #syscalls read write open close fork dup2 exit chdir brk ioctl writev syslog + * #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen + * #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown + * #syscalls openat fstat fcntl lseek + * #syscalls:pasta rt_sigreturn */ int main(int argc, char **argv) { @@ -198,6 +232,8 @@ int main(int argc, char **argv) conf(&c, argc, argv); + seccomp(&c); + if (!c.debug && (c.stderr || isatty(fileno(stdout)))) openlog(log_name, LOG_PERROR, LOG_DAEMON); diff --git a/seccomp.sh b/seccomp.sh new file mode 100755 index 0000000..7e37bd3 --- /dev/null +++ b/seccomp.sh @@ -0,0 +1,180 @@ +#!/bin/sh -eu +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +# PASST - Plug A Simple Socket Transport +# for qemu/UNIX domain socket mode +# +# PASTA - Pack A Subtle Tap Abstraction +# for network namespace/tap device mode +# +# seccomp.sh - Build seccomp profiles from "#syscalls[:PROFILE]" comments in code +# +# Copyright (c) 2021 Red Hat GmbH +# Author: Stefano Brivio + +TMP="$(mktemp)" +OUT="seccomp.h" + +HEADER="/* This file was automatically generated by $(basename ${0}) */" + +# Prefix for each profile: check that 'arch' in seccomp_data is matching +PRE=' +struct sock_filter filter_@PROFILE@[] = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, arch))), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, PASST_AUDIT_ARCH, 0, @KILL@), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, nr))), + +' + +# Suffix for each profile: return actions +POST=' BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), +}; +' + +# Syscall, @NR@: number, @ALLOW@: offset to RET_ALLOW, @NAME@: syscall name +CALL=' BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, @NR@, @ALLOW@, 0), /* @NAME@ */' + +# Binary search tree node or leaf, @NR@: value, @R@: right jump, @L@: left jump +BST=' BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, @NR@, @R@, @L@),' + +# sub() - Substitute in-place file line with processed template line +# $1: Line number +# $@: Replacement for @KEY@ in the form KEY:value +sub() { + IFS= + __line_no="${1}" + __template="$(eval printf '%s' "\${${2}}")" + shift; shift + + sed -i "${__line_no}s#.*#${__template}#" "${TMP}" + + for __def in ${@}; do + __key="@${__def%%:*}@" + __value="${__def#*:}" + sed -i "${__line_no}s/${__key}/${__value}/" "${TMP}" + done + unset IFS +} + +# finish() - Finalise header file from temporary files with prefix and suffix +# $1: Variable name of prefix +# $@: Replacements for prefix variable +finish() { + IFS= + __out="$(eval printf '%s' "\${${1}}")" + shift + + for __def in ${@}; do + __key="@${__def%%:*}@" + __value="${__def#*:}" + __out="$(printf '%s' "${__out}" | sed "s#${__key}#${__value}#")" + done + + printf '%s\n' "${__out}" >> "${OUT}" + cat "${TMP}" >> "${OUT}" + rm "${TMP}" + printf '%s' "${POST}" >> "${OUT}" + unset IFS +} + +# log2() - Binary logarithm +# $1: Operand +log2() { + __x=-1 + __y=${1} + while [ ${__y} -gt 0 ]; do : $((__y >>= 1)); __x=$((__x + 1)); done + echo ${__x} +} + +# gen_profile() - Build struct sock_filter for a single profile +# $1: Profile name +# $@: Names of allowed system calls, amount padded to next power of two +gen_profile() { + __profile="${1}" + shift + + __statements_calls=${#} + __bst_levels=$(log2 $(( __statements_calls / 4 )) ) + __statements_bst=$(( __statements_calls / 4 - 1 )) + __statements=$((__statements_calls + __statements_bst)) + + for __i in $(seq 1 ${__statements_bst} ); do + echo -1 >> "${TMP}" + done + for __i in $(seq 1 ${__statements_calls} ); do + ausyscall $(eval echo \${${__i}}) --exact >> "${TMP}" + done + sort -go "${TMP}" "${TMP}" + + __distance=$(( __statements_calls / 2 )) + __level_nodes=1 + __ll=0 + __line=1 + for __level in $(seq 1 $(( __bst_levels - 1 )) ); do + # Nodes + __cmp_pos=${__distance} + + for __node in $(seq 1 ${__level_nodes}); do + __cmp_line=$(( __statements_bst + __cmp_pos )) + __lr=$(( __ll + 1 )) + __nr="$(sed -n ${__cmp_line}p "${TMP}")" + + sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}" + + __ll=${__lr} + __line=$(( __line + 1 )) + __cmp_pos=$(( __cmp_pos + __distance * 2 )) + done + + __distance=$(( __distance / 2 )) + __level_nodes=$(( __level_nodes * 2 )) + done + + # Leaves + __ll=$(( __level_nodes - 1 )) + __lr=$(( __ll + __distance - 1 )) + __cmp_pos=${__distance} + + for __leaf in $(seq 1 ${__level_nodes}); do + __cmp_line=$(( __statements_bst + __cmp_pos )) + __nr="$(sed -n ${__cmp_line}p "${TMP}")" + sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}" + + __ll=$(( __lr + __distance - 1 )) + __lr=$(( __ll + __distance)) + __line=$(( __line + 1 )) + __cmp_pos=$(( __cmp_pos + __distance * 2 )) + done + + # Calls + for __i in $(seq $(( __statements_bst + 1 )) ${__statements}); do + __nr="$(sed -n ${__i}p "${TMP}")" + __name=$(ausyscall ${__nr}) + __allow=$(( __statements - __i + 1 )) + sub ${__i} CALL "NR:${__nr}" "NAME:${__name}" "ALLOW:${__allow}" + done + + finish PRE "PROFILE:${__profile}" "KILL:$(( __statements + 1))" +} + +printf '%s\n' "${HEADER}" > "${OUT}" +__profiles="$(sed -n 's/[\t ]*\*[\t ]*#syscalls:\([^ ]*\).*/\1/p' *.[ch] | sort -u)" +for __p in ${__profiles}; do + __calls="$(sed -n 's/[\t ]*\*[\t ]*#syscalls\(:'"${__p}"'\|\)[\t ]\{1,\}\(.*\)/\2/p' *.[ch] | tr ' ' '\n' | sort -u)" + + echo "seccomp profile ${__p} allows: ${__calls}" | tr '\n' ' ' | fmt -t + + # Pad here to keep gen_profile() "simple" + __count=0 + for __c in ${__calls}; do __count=$(( __count + 1 )); done + __padded=$(( 1 << (( $(log2 ${__count}) + 1 )) )) + for __i in $( seq ${__count} $(( __padded - 1 )) ); do + __calls="${__calls} tuxcall" + done + + gen_profile "${__p}" ${__calls} +done diff --git a/tap.c b/tap.c index ec2b8b5..d20503d 100644 --- a/tap.c +++ b/tap.c @@ -10,6 +10,8 @@ * * Copyright (c) 2020-2021 Red Hat GmbH * Author: Stefano Brivio + * + * #syscalls recvfrom sendto */ #define _GNU_SOURCE @@ -768,6 +770,8 @@ restart: /** * tap_sock_init_unix() - Create and bind AF_UNIX socket, wait for connection * @c: Execution context + * + * #syscalls:passt unlink */ static void tap_sock_init_unix(struct ctx *c) { @@ -819,8 +823,13 @@ static void tap_sock_init_unix(struct ctx *c) } info("UNIX domain socket bound at %s\n", addr.sun_path); +#ifdef PASST_LEGACY_NO_OPTIONS + /* + * syscalls:passt chmod + */ chmod(addr.sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); +#endif pcap_init(c, i); @@ -850,6 +859,8 @@ static int tun_ns_fd = -1; * @c: Execution context * * Return: 0 + * + * #syscalls:pasta ioctl */ static int tap_ns_tun(void *arg) { diff --git a/tcp.c b/tcp.c index b49f49e..05382f7 100644 --- a/tcp.c +++ b/tcp.c @@ -303,6 +303,8 @@ * - SPLICE_FIN_FROM: FIN (EPOLLRDHUP) seen from originating socket * - SPLICE_FIN_TO: FIN (EPOLLRDHUP) seen from connected socket * - SPLICE_FIN_BOTH: FIN (EPOLLRDHUP) seen from both sides + * + * #syscalls pipe pipe2 */ #define _GNU_SOURCE @@ -2078,6 +2080,9 @@ static void tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq) * @now: Current timestamp * * Return: negative on connection reset, 0 otherwise + * + * #syscalls recvmsg + * #syscalls:passt sendmmsg sendmsg */ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn, struct timespec *now) @@ -2320,6 +2325,8 @@ out: * @msg: Array of messages from tap * @count: Count of messages * @now: Current timestamp + * + * #syscalls sendmsg */ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn, struct tap_l4_msg *msg, int count, @@ -2965,6 +2972,8 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, * @c: Execution context * @ref: epoll reference * @events: epoll events bitmap + * + * #syscalls splice */ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, uint32_t events) @@ -3525,6 +3534,8 @@ static int tcp_sock_refill(void *arg) * @c: Execution context * * Return: 0 on success, -1 on failure + * + * #syscalls getrandom */ int tcp_sock_init(struct ctx *c, struct timespec *now) { diff --git a/udp.c b/udp.c index 0837cba..e77345f 100644 --- a/udp.c +++ b/udp.c @@ -419,6 +419,8 @@ static void udp_sock6_iov_init(void) * @splice: UDP_BACK_TO_INIT from init, UDP_BACK_TO_NS from namespace * * Return: connected socket, negative error code on failure + * + * #syscalls:pasta getsockname */ int udp_splice_connect(struct ctx *c, int v6, int bound_sock, in_port_t src, in_port_t dst, int splice) @@ -640,6 +642,9 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref, * @ref: epoll reference * @events: epoll events bitmap * @now: Current timestamp + * + * #syscalls recvmmsg + * #syscalls:passt sendmmsg sendmsg */ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, struct timespec *now) @@ -877,6 +882,8 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, * @now: Current timestamp * * Return: count of consumed packets + * + * #syscalls sendmmsg */ int udp_tap_handler(struct ctx *c, int af, void *addr, struct tap_l4_msg *msg, int count, struct timespec *now) diff --git a/util.c b/util.c index 4945431..e0fa5ff 100644 --- a/util.c +++ b/util.c @@ -332,6 +332,8 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude) * @c: Execution context * * Return: 0 on success, -1 on failure + * + * #syscalls:pasta setns */ int ns_enter(struct ctx *c) {