mirror of
https://passt.top/passt
synced 2024-12-22 13:45:32 +00:00
33482d5bf2
PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host connectivity to an otherwise disconnected, unprivileged network and user namespace, similarly to slirp4netns. Given that the implementation is largely overlapping with PASST, no separate binary is built: 'pasta' (and 'passt4netns' for clarity) both link to 'passt', and the mode of operation is selected depending on how the binary is invoked. Usage example: $ unshare -rUn # echo $$ 1871759 $ ./pasta 1871759 # From another terminal # udhcpc -i pasta0 2>/dev/null # ping -c1 pasta.pizza PING pasta.pizza (64.190.62.111) 56(84) bytes of data. 64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms --- pasta.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms # ping -c1 spaghetti.pizza PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes 64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms --- spaghetti.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms This entails a major rework, especially with regard to the storage of tracked connections and to the semantics of epoll(7) references. Indexing TCP and UDP bindings merely by socket proved to be inflexible and unsuitable to handle different connection flows: pasta also provides Layer-2 to Layer-2 socket mapping between init and a separate namespace for local connections, using a pair of splice() system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local bindings. For instance, building on the previous example: # ip link set dev lo up # iperf3 -s $ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4 [SUM] 0.00-10.00 sec 52.3 GBytes 44.9 Gbits/sec 283 sender [SUM] 0.00-10.43 sec 52.3 GBytes 43.1 Gbits/sec receiver iperf Done. epoll(7) references now include a generic part in order to demultiplex data to the relevant protocol handler, using 24 bits for the socket number, and an opaque portion reserved for usage by the single protocol handlers, in order to track sockets back to corresponding connections and bindings. A number of fixes pertaining to TCP state machine and congestion window handling are also included here. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
222 lines
5.3 KiB
C
222 lines
5.3 KiB
C
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
/* PASST - Plug A Simple Socket Transport
|
|
* for qemu/UNIX domain socket mode
|
|
*
|
|
* PASTA - Pack A Subtle Tap Abstraction
|
|
* for network namespace/tap device mode
|
|
*
|
|
* siphash.c - SipHash routines
|
|
*
|
|
* Copyright (c) 2020-2021 Red Hat GmbH
|
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
|
*
|
|
* This is an implementation of the SipHash-2-4-64 functions needed for TCP
|
|
* initial sequence numbers and socket lookup table hash for IPv4 and IPv6, see:
|
|
*
|
|
* Aumasson, J.P. and Bernstein, D.J., 2012, December. SipHash: a fast
|
|
* short-input PRF. In International Conference on Cryptology in India
|
|
* (pp. 489-508). Springer, Berlin, Heidelberg.
|
|
*
|
|
* http://cr.yp.to/siphash/siphash-20120918.pdf
|
|
*
|
|
* This includes code from the reference SipHash implementation at
|
|
* https://github.com/veorq/SipHash/ licensed as follows:
|
|
*
|
|
* --
|
|
* SipHash reference C implementation
|
|
*
|
|
* Copyright (c) 2012-2021 Jean-Philippe Aumasson
|
|
* <jeanphilippe.aumasson@gmail.com>
|
|
* Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
|
|
*
|
|
* To the extent possible under law, the author(s) have dedicated all copyright
|
|
* and related and neighboring rights to this software to the public domain
|
|
* worldwide. This software is distributed without any warranty.
|
|
*
|
|
* You should have received a copy of the CC0 Public Domain Dedication along
|
|
* with
|
|
* this software. If not, see
|
|
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
|
* --
|
|
*
|
|
* and from the Linux kernel implementation (lib/siphash.c), licensed as
|
|
* follows:
|
|
*
|
|
* --
|
|
* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
|
*
|
|
* This file is provided under a dual BSD/GPLv2 license.
|
|
* --
|
|
*
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "siphash.h"
|
|
|
|
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
|
|
|
|
#define PREAMBLE(len) \
|
|
uint64_t v[4] = { 0x736f6d6570736575ULL, 0x646f72616e646f6dULL, \
|
|
0x6c7967656e657261ULL, 0x7465646279746573ULL }; \
|
|
uint64_t b = (uint64_t)(len) << 56; \
|
|
uint32_t ret; \
|
|
int __i; \
|
|
\
|
|
do { \
|
|
for (__i = sizeof(v) / sizeof(v[0]); __i >= 0; __i--) \
|
|
v[__i] = k[__i % 2]; \
|
|
} while (0)
|
|
|
|
#define SIPROUND(n) \
|
|
do { \
|
|
for (__i = 0; __i < (n); __i++) { \
|
|
v[0] += v[1]; \
|
|
v[1] = ROTL(v[1], 13) ^ v[0]; \
|
|
v[0] = ROTL(v[0], 32); \
|
|
v[2] += v[3]; \
|
|
v[3] = ROTL(v[3], 16) ^ v[2]; \
|
|
v[0] += v[3]; \
|
|
v[3] = ROTL(v[3], 21) ^ v[0]; \
|
|
v[2] += v[1]; \
|
|
v[1] = ROTL(v[1], 17) ^ v[2]; \
|
|
v[2] = ROTL(v[2], 32); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define POSTAMBLE \
|
|
do { \
|
|
v[3] ^= b; \
|
|
SIPROUND(2); \
|
|
v[0] ^= b; \
|
|
v[2] ^= 0xff; \
|
|
SIPROUND(4); \
|
|
b = (v[0] ^ v[1]) ^ (v[2] ^ v[3]); \
|
|
ret = (uint32_t)(b >> 32) ^ (uint32_t)b; \
|
|
(void)ret; \
|
|
} while (0)
|
|
|
|
/**
|
|
* siphash_8b() - Table index or timestamp offset for TCP over IPv4 (8 bytes in)
|
|
* @in: Input data (remote address and two ports, or two addresses)
|
|
* @k: Hash function key, 128 bits
|
|
*
|
|
* Return: the 64-bit hash output
|
|
*/
|
|
uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
|
|
{
|
|
PREAMBLE(8);
|
|
v[3] ^= *(uint64_t *)in;
|
|
SIPROUND(2);
|
|
v[0] ^= *(uint64_t *)in;
|
|
POSTAMBLE;
|
|
|
|
return b;
|
|
}
|
|
|
|
/**
|
|
* siphash_12b() - Initial sequence number for TCP over IPv4 (12 bytes in)
|
|
* @in: Input data (two addresses, two ports)
|
|
* @k: Hash function key, 128 bits
|
|
*
|
|
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
|
*/
|
|
uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
|
|
{
|
|
uint32_t *in32 = (uint32_t *)in;
|
|
uint64_t combined;
|
|
|
|
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
|
|
|
|
PREAMBLE(12);
|
|
v[3] ^= combined;
|
|
SIPROUND(2);
|
|
v[0] ^= combined;
|
|
b |= *(in32 + 2);
|
|
POSTAMBLE;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* siphash_20b() - Table index for TCP over IPv6 (20 bytes in)
|
|
* @in: Input data (remote address, two ports)
|
|
* @k: Hash function key, 128 bits
|
|
*
|
|
* Return: the 64-bit hash output
|
|
*/
|
|
uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
|
|
{
|
|
uint32_t *in32 = (uint32_t *)in;
|
|
uint64_t combined;
|
|
int i;
|
|
|
|
PREAMBLE(20);
|
|
|
|
for (i = 0; i < 2; i++, in32 += 2) {
|
|
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
|
|
v[3] ^= combined;
|
|
SIPROUND(2);
|
|
v[0] ^= combined;
|
|
}
|
|
|
|
b |= *in32;
|
|
POSTAMBLE;
|
|
|
|
return b;
|
|
}
|
|
|
|
/**
|
|
* siphash_32b() - Timestamp offset for TCP over IPv6 (32 bytes in)
|
|
* @in: Input data (two addresses)
|
|
* @k: Hash function key, 128 bits
|
|
*
|
|
* Return: the 64-bit hash output
|
|
*/
|
|
uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
|
|
{
|
|
uint64_t *in64 = (uint64_t *)in;
|
|
int i;
|
|
|
|
PREAMBLE(32);
|
|
|
|
for (i = 0; i < 4; i++, in64++) {
|
|
v[3] ^= *in64;
|
|
SIPROUND(2);
|
|
v[0] ^= *in64;
|
|
}
|
|
|
|
POSTAMBLE;
|
|
|
|
return b;
|
|
}
|
|
|
|
/**
|
|
* siphash_36b() - Initial sequence number for TCP over IPv6 (36 bytes in)
|
|
* @in: Input data (two addresses, two ports)
|
|
* @k: Hash function key, 128 bits
|
|
*
|
|
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
|
*/
|
|
uint32_t siphash_36b(const uint8_t *in, const uint64_t *k)
|
|
{
|
|
uint32_t *in32 = (uint32_t *)in;
|
|
uint64_t combined;
|
|
int i;
|
|
|
|
PREAMBLE(36);
|
|
|
|
for (i = 0; i < 4; i++, in32 += 2) {
|
|
combined = (uint64_t)(*(in32 + 1)) << 32 | *in32;
|
|
v[3] ^= combined;
|
|
SIPROUND(2);
|
|
v[0] ^= combined;
|
|
}
|
|
|
|
b |= *in32;
|
|
POSTAMBLE;
|
|
|
|
return ret;
|
|
}
|