1
0
mirror of https://passt.top/passt synced 2025-01-18 10:25:17 +00:00
passt/siphash.c
Stefano Brivio 33482d5bf2 passt: Add PASTA mode, major rework
PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host
connectivity to an otherwise disconnected, unprivileged network
and user namespace, similarly to slirp4netns. Given that the
implementation is largely overlapping with PASST, no separate binary
is built: 'pasta' (and 'passt4netns' for clarity) both link to
'passt', and the mode of operation is selected depending on how the
binary is invoked. Usage example:

	$ unshare -rUn
	# echo $$
	1871759

	$ ./pasta 1871759	# From another terminal

	# udhcpc -i pasta0 2>/dev/null
	# ping -c1 pasta.pizza
	PING pasta.pizza (64.190.62.111) 56(84) bytes of data.
	64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms

	--- pasta.pizza ping statistics ---
	1 packets transmitted, 1 received, 0% packet loss, time 0ms
	rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms
	# ping -c1 spaghetti.pizza
	PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes
	64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms

	--- spaghetti.pizza ping statistics ---
	1 packets transmitted, 1 received, 0% packet loss, time 0ms
	rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms

This entails a major rework, especially with regard to the storage of
tracked connections and to the semantics of epoll(7) references.

Indexing TCP and UDP bindings merely by socket proved to be
inflexible and unsuitable to handle different connection flows: pasta
also provides Layer-2 to Layer-2 socket mapping between init and a
separate namespace for local connections, using a pair of splice()
system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local
bindings. For instance, building on the previous example:

	# ip link set dev lo up
	# iperf3 -s

	$ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4
	[SUM]   0.00-10.00  sec  52.3 GBytes  44.9 Gbits/sec  283             sender
	[SUM]   0.00-10.43  sec  52.3 GBytes  43.1 Gbits/sec                  receiver

	iperf Done.

epoll(7) references now include a generic part in order to
demultiplex data to the relevant protocol handler, using 24
bits for the socket number, and an opaque portion reserved for
usage by the single protocol handlers, in order to track sockets
back to corresponding connections and bindings.

A number of fixes pertaining to TCP state machine and congestion
window handling are also included here.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2021-07-17 11:04:22 +02:00

222 lines
5.3 KiB
C

// SPDX-License-Identifier: AGPL-3.0-or-later
/* PASST - Plug A Simple Socket Transport
* for qemu/UNIX domain socket mode
*
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
* siphash.c - SipHash routines
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*
* This is an implementation of the SipHash-2-4-64 functions needed for TCP
* initial sequence numbers and socket lookup table hash for IPv4 and IPv6, see:
*
* Aumasson, J.P. and Bernstein, D.J., 2012, December. SipHash: a fast
* short-input PRF. In International Conference on Cryptology in India
* (pp. 489-508). Springer, Berlin, Heidelberg.
*
* http://cr.yp.to/siphash/siphash-20120918.pdf
*
* This includes code from the reference SipHash implementation at
* https://github.com/veorq/SipHash/ licensed as follows:
*
* --
* SipHash reference C implementation
*
* Copyright (c) 2012-2021 Jean-Philippe Aumasson
* <jeanphilippe.aumasson@gmail.com>
* Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
* --
*
* and from the Linux kernel implementation (lib/siphash.c), licensed as
* follows:
*
* --
* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This file is provided under a dual BSD/GPLv2 license.
* --
*
*/
#include <stdint.h>
#include "siphash.h"
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
#define PREAMBLE(len) \
uint64_t v[4] = { 0x736f6d6570736575ULL, 0x646f72616e646f6dULL, \
0x6c7967656e657261ULL, 0x7465646279746573ULL }; \
uint64_t b = (uint64_t)(len) << 56; \
uint32_t ret; \
int __i; \
\
do { \
for (__i = sizeof(v) / sizeof(v[0]); __i >= 0; __i--) \
v[__i] = k[__i % 2]; \
} while (0)
#define SIPROUND(n) \
do { \
for (__i = 0; __i < (n); __i++) { \
v[0] += v[1]; \
v[1] = ROTL(v[1], 13) ^ v[0]; \
v[0] = ROTL(v[0], 32); \
v[2] += v[3]; \
v[3] = ROTL(v[3], 16) ^ v[2]; \
v[0] += v[3]; \
v[3] = ROTL(v[3], 21) ^ v[0]; \
v[2] += v[1]; \
v[1] = ROTL(v[1], 17) ^ v[2]; \
v[2] = ROTL(v[2], 32); \
} \
} while (0)
#define POSTAMBLE \
do { \
v[3] ^= b; \
SIPROUND(2); \
v[0] ^= b; \
v[2] ^= 0xff; \
SIPROUND(4); \
b = (v[0] ^ v[1]) ^ (v[2] ^ v[3]); \
ret = (uint32_t)(b >> 32) ^ (uint32_t)b; \
(void)ret; \
} while (0)
/**
* siphash_8b() - Table index or timestamp offset for TCP over IPv4 (8 bytes in)
* @in: Input data (remote address and two ports, or two addresses)
* @k: Hash function key, 128 bits
*
* Return: the 64-bit hash output
*/
uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
{
PREAMBLE(8);
v[3] ^= *(uint64_t *)in;
SIPROUND(2);
v[0] ^= *(uint64_t *)in;
POSTAMBLE;
return b;
}
/**
* siphash_12b() - Initial sequence number for TCP over IPv4 (12 bytes in)
* @in: Input data (two addresses, two ports)
* @k: Hash function key, 128 bits
*
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
*/
uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
{
uint32_t *in32 = (uint32_t *)in;
uint64_t combined;
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
PREAMBLE(12);
v[3] ^= combined;
SIPROUND(2);
v[0] ^= combined;
b |= *(in32 + 2);
POSTAMBLE;
return ret;
}
/**
* siphash_20b() - Table index for TCP over IPv6 (20 bytes in)
* @in: Input data (remote address, two ports)
* @k: Hash function key, 128 bits
*
* Return: the 64-bit hash output
*/
uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
{
uint32_t *in32 = (uint32_t *)in;
uint64_t combined;
int i;
PREAMBLE(20);
for (i = 0; i < 2; i++, in32 += 2) {
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
v[3] ^= combined;
SIPROUND(2);
v[0] ^= combined;
}
b |= *in32;
POSTAMBLE;
return b;
}
/**
* siphash_32b() - Timestamp offset for TCP over IPv6 (32 bytes in)
* @in: Input data (two addresses)
* @k: Hash function key, 128 bits
*
* Return: the 64-bit hash output
*/
uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
{
uint64_t *in64 = (uint64_t *)in;
int i;
PREAMBLE(32);
for (i = 0; i < 4; i++, in64++) {
v[3] ^= *in64;
SIPROUND(2);
v[0] ^= *in64;
}
POSTAMBLE;
return b;
}
/**
* siphash_36b() - Initial sequence number for TCP over IPv6 (36 bytes in)
* @in: Input data (two addresses, two ports)
* @k: Hash function key, 128 bits
*
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
*/
uint32_t siphash_36b(const uint8_t *in, const uint64_t *k)
{
uint32_t *in32 = (uint32_t *)in;
uint64_t combined;
int i;
PREAMBLE(36);
for (i = 0; i < 4; i++, in32 += 2) {
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
v[3] ^= combined;
SIPROUND(2);
v[0] ^= combined;
}
b |= *in32;
POSTAMBLE;
return ret;
}