diff --git a/Makefile b/Makefile index ca7a3e5..ebd5b20 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ CFLAGS += -Wall -Wextra -pedantic all: passt qrap -passt: passt.c passt.h arp.c arp.h dhcp.c dhcp.h ndp.c ndp.h tap.c tap.h tcp.c tcp.h udp.c udp.h util.c util.h - $(CC) $(CFLAGS) passt.c arp.c dhcp.c ndp.c tap.c tcp.c udp.c util.c -o passt +passt: passt.c passt.h arp.c arp.h dhcp.c dhcp.h ndp.c ndp.h siphash.c siphash.h tap.c tap.h tcp.c tcp.h udp.c udp.h util.c util.h + $(CC) $(CFLAGS) passt.c arp.c dhcp.c ndp.c siphash.c tap.c tcp.c udp.c util.c -o passt qrap: qrap.c passt.h $(CC) $(CFLAGS) qrap.c -o qrap diff --git a/siphash.c b/siphash.c new file mode 100644 index 0000000..910c718 --- /dev/null +++ b/siphash.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * + * siphash.c - SipHash routines + * + * Copyright (c) 2020-2021 Red Hat GmbH + * Author: Stefano Brivio + * + * This is an implementation of the SipHash-2-4-64 functions needed for TCP + * initial sequence numbers and socket lookup table hash for IPv4 and IPv6, see: + * + * Aumasson, J.P. and Bernstein, D.J., 2012, December. SipHash: a fast + * short-input PRF. In International Conference on Cryptology in India + * (pp. 489-508). Springer, Berlin, Heidelberg. + * + * http://cr.yp.to/siphash/siphash-20120918.pdf + * + * This includes code from the reference SipHash implementation at + * https://github.com/veorq/SipHash/ licensed as follows: + * + * -- + * SipHash reference C implementation + * + * Copyright (c) 2012-2021 Jean-Philippe Aumasson + * + * Copyright (c) 2012-2014 Daniel J. Bernstein + * + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. This software is distributed without any warranty. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + * -- + * + * and from the Linux kernel implementation (lib/siphash.c), licensed as + * follows: + * + * -- + * Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * -- + * + */ + +#include + +#include "siphash.h" + +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define PREAMBLE(len) \ + uint64_t v[4] = { 0x736f6d6570736575ULL, 0x646f72616e646f6dULL, \ + 0x6c7967656e657261ULL, 0x7465646279746573ULL }; \ + uint64_t b = (uint64_t)(len) << 56; \ + uint32_t ret; \ + int __i; \ + \ + do { \ + for (__i = sizeof(v) / sizeof(v[0]); __i >= 0; __i--) \ + v[__i] = k[__i % 2]; \ + } while (0) + +#define SIPROUND(n) \ + do { \ + for (__i = 0; __i < (n); __i++) { \ + v[0] += v[1]; \ + v[1] = ROTL(v[1], 13) ^ v[0]; \ + v[0] = ROTL(v[0], 32); \ + v[2] += v[3]; \ + v[3] = ROTL(v[3], 16) ^ v[2]; \ + v[0] += v[3]; \ + v[3] = ROTL(v[3], 21) ^ v[0]; \ + v[2] += v[1]; \ + v[1] = ROTL(v[1], 17) ^ v[2]; \ + v[2] = ROTL(v[2], 32); \ + } \ + } while (0) + +#define POSTAMBLE \ + do { \ + v[3] ^= b; \ + SIPROUND(2); \ + v[0] ^= b; \ + v[2] ^= 0xff; \ + SIPROUND(4); \ + b = (v[0] ^ v[1]) ^ (v[2] ^ v[3]); \ + ret = (uint32_t)(b >> 32) ^ (uint32_t)b; \ + (void)ret; \ + } while (0) + +/** + * siphash_8b() - Table index or timestamp offset for TCP over IPv4 (8 bytes in) + * @in: Input data (remote address and two ports, or two addresses) + * @k: Hash function key, 128 bits + * + * Return: the 64-bit hash output + */ +uint64_t siphash_8b(const uint8_t *in, const uint64_t *k) +{ + PREAMBLE(8); + v[3] ^= *(uint64_t *)in; + SIPROUND(2); + v[0] ^= *(uint64_t *)in; + POSTAMBLE; + + return b; +} + +/** + * siphash_12b() - Initial sequence number for TCP over IPv4 (12 bytes in) + * @in: Input data (two addresses, two ports) + * @k: Hash function key, 128 bits + * + * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output + */ +uint32_t siphash_12b(const uint8_t *in, const uint64_t *k) +{ + uint32_t *in32 = (uint32_t *)in; + uint64_t combined; + + combined = (uint64_t)(*(in32 + 1)) << 32 | *in32; + + PREAMBLE(12); + v[3] ^= combined; + SIPROUND(2); + v[0] ^= combined; + b |= *(in32 + 2); + POSTAMBLE; + + return ret; +} + +/** + * siphash_20b() - Table index for TCP over IPv6 (20 bytes in) + * @in: Input data (remote address, two ports) + * @k: Hash function key, 128 bits + * + * Return: the 64-bit hash output + */ +uint64_t siphash_20b(const uint8_t *in, const uint64_t *k) +{ + uint32_t *in32 = (uint32_t *)in; + uint64_t combined; + int i; + + PREAMBLE(20); + + for (i = 0; i < 2; i++, in32 += 2) { + combined = (uint64_t)(*(in32 + 1)) << 32 | *in32; + v[3] ^= combined; + SIPROUND(2); + v[0] ^= combined; + } + + b |= *in32; + POSTAMBLE; + + return b; +} + +/** + * siphash_32b() - Timestamp offset for TCP over IPv6 (32 bytes in) + * @in: Input data (two addresses) + * @k: Hash function key, 128 bits + * + * Return: the 64-bit hash output + */ +uint32_t siphash_32b(const uint8_t *in, const uint64_t *k) +{ + uint64_t *in64 = (uint64_t *)in; + int i; + + PREAMBLE(32); + + for (i = 0; i < 4; i++, in64++) { + v[3] ^= *in64; + SIPROUND(2); + v[0] ^= *in64; + } + + POSTAMBLE; + + return b; +} + +/** + * siphash_36b() - Initial sequence number for TCP over IPv6 (36 bytes in) + * @in: Input data (two addresses, two ports) + * @k: Hash function key, 128 bits + * + * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output + */ +uint32_t siphash_36b(const uint8_t *in, const uint64_t *k) +{ + uint32_t *in32 = (uint32_t *)in; + uint64_t combined; + int i; + + PREAMBLE(36); + + for (i = 0; i < 4; i++, in32 += 2) { + combined = (uint64_t)(*(in32 + 1)) << 32 | *in32; + v[3] ^= combined; + SIPROUND(2); + v[0] ^= combined; + } + + b |= *in32; + POSTAMBLE; + + return ret; +} diff --git a/siphash.h b/siphash.h new file mode 100644 index 0000000..694fdc3 --- /dev/null +++ b/siphash.h @@ -0,0 +1,5 @@ +uint64_t siphash_8b(const uint8_t *in, const uint64_t *k); +uint32_t siphash_12b(const uint8_t *in, const uint64_t *k); +uint64_t siphash_20b(const uint8_t *in, const uint64_t *k); +uint32_t siphash_32b(const uint8_t *in, const uint64_t *k); +uint32_t siphash_36b(const uint8_t *in, const uint64_t *k); diff --git a/tcp.c b/tcp.c index f1de9cf..6c6a6dd 100644 --- a/tcp.c +++ b/tcp.c @@ -304,8 +304,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -315,6 +316,7 @@ #include "passt.h" #include "tap.h" #include "util.h" +#include "siphash.h" /* Approximately maximum number of open descriptors per process */ #define MAX_CONNS (256 * 1024) @@ -430,6 +432,7 @@ struct tcp_conn { static char sock_buf[MAX_WINDOW]; static uint8_t tcp_act[MAX_CONNS / 8] = { 0 }; static struct tcp_conn tc[MAX_CONNS]; +static uint64_t hash_secret[2]; static int tcp_send_to_tap(struct ctx *c, int s, int flags, char *in, int len); @@ -676,6 +679,60 @@ static void tcp_clamp_window(int s, struct tcphdr *th, int len, int init) } } +/** + * tcp_seq_init() - Calculate initial sequence number according to RFC 6528 + * @c: Execution context + * @af: Address family, AF_INET or AF_INET6 + * @addr: Remote address, pointer to sin_addr or sin6_addr + * @dstport: Destination port, connection-wise, network order + * @srcport: Source port, connection-wise, network order + * + * Return: initial TCP sequence + */ +static uint32_t tcp_seq_init(struct ctx *c, int af, void *addr, + in_port_t dstport, in_port_t srcport) +{ + struct timespec ts = { 0 }; + uint32_t ns, seq; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (af == AF_INET) { + struct { + struct in_addr src; + in_port_t srcport; + struct in_addr dst; + in_port_t dstport; + } __attribute__((__packed__)) in = { + .src = *(struct in_addr *)addr, + .srcport = srcport, + .dst = *(struct in_addr *)c->addr4, + .dstport = dstport, + }; + + seq = siphash_12b((uint8_t *)&in, hash_secret); + } else if (af == AF_INET6) { + struct { + struct in6_addr src; + in_port_t srcport; + struct in6_addr dst; + in_port_t dstport; + } __attribute__((__packed__)) in = { + .src = *(struct in6_addr *)addr, + .srcport = srcport, + .dst = c->addr6, + .dstport = dstport, + }; + + seq = siphash_36b((uint8_t *)&in, hash_secret); + } + + ns = ts.tv_sec * 1E9; + ns += ts.tv_nsec >> 5; /* 32ns ticks, overflows 32 bits every 137s */ + + return seq + ns; +} + /** * tcp_conn_from_tap() - Handle connection request (SYN segment) from tap * @c: Execution context @@ -744,9 +801,8 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr, tc[s].seq_from_tap = tc[s].seq_init_from_tap + 1; tc[s].seq_ack_to_tap = tc[s].seq_from_tap; - /* TODO: RFC 6528 with SipHash, worth it? */ - tc[s].seq_to_tap = 0; - tc[s].seq_ack_from_tap = tc[s].seq_to_tap; + tc[s].seq_to_tap = tcp_seq_init(c, af, addr, th->dest, th->source); + tc[s].seq_ack_from_tap = tc[s].seq_to_tap + 1; if (connect(s, sa, sl)) { if (errno != EINPROGRESS) { @@ -827,6 +883,10 @@ static void tcp_conn_from_sock(struct ctx *c, int fd) tc[s].sock_port = sa4->sin_port; tc[s].tap_port = ((struct sockaddr_in *)&sa_l)->sin_port; + + tc[s].seq_to_tap = tcp_seq_init(c, AF_INET, &sa4->sin_addr, + tc[s].sock_port, + tc[s].tap_port); } else if (sa_l.ss_family == AF_INET6) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&sa_r; @@ -834,10 +894,12 @@ static void tcp_conn_from_sock(struct ctx *c, int fd) tc[s].sock_port = sa6->sin6_port; tc[s].tap_port = ((struct sockaddr_in6 *)&sa_l)->sin6_port; + + tc[s].seq_to_tap = tcp_seq_init(c, AF_INET6, &sa6->sin6_addr, + tc[s].sock_port, + tc[s].tap_port); } - /* TODO: RFC 6528 with SipHash, worth it? */ - tc[s].seq_to_tap = 0; tc[s].seq_ack_from_tap = tc[s].seq_to_tap + 1; tc[s].tap_window = WINDOW_DEFAULT; @@ -1230,7 +1292,7 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events) } /** - * tcp_sock_init() - Create and bind listening sockets for inbound connections + * tcp_sock_init() - Bind sockets for inbound connections, get key for sequence * @c: Execution context * * Return: 0 on success, -1 on failure @@ -1246,6 +1308,8 @@ int tcp_sock_init(struct ctx *c) return -1; } + getrandom(hash_secret, sizeof(hash_secret), GRND_RANDOM); + return 0; }