1
0
mirror of https://passt.top/passt synced 2025-01-05 04:15:25 +00:00
passt/tap.h

88 lines
2.8 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright (c) 2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*/
#ifndef TAP_H
#define TAP_H
/**
* struct tap_hdr - L2 and tap specific headers
* @vnet_len: Frame length (for qemu socket transport)
* @eh: Ethernet header
*/
struct tap_hdr {
uint32_t vnet_len;
struct ethhdr eh;
} __attribute__((packed));
#define TAP_HDR_INIT(proto) { .eh.h_proto = htons_constant(proto) }
static inline size_t tap_hdr_len_(const struct ctx *c)
{
if (c->mode == MODE_PASST)
return sizeof(struct tap_hdr);
else
return sizeof(struct ethhdr);
}
/**
* tap_frame_base() - Find start of tap frame
* @c: Execution context
* @taph: Pointer to L2 and tap specific header buffer
*
* Returns: pointer to the start of tap frame - suitable for an
* iov_base to be passed to tap_send_frames())
*/
static inline void *tap_frame_base(const struct ctx *c, struct tap_hdr *taph)
{
return (char *)(taph + 1) - tap_hdr_len_(c);
}
/**
* tap_frame_len() - Finalize tap frame and return total length
* @c: Execution context
* @taph: Tap header to finalize
* @plen: L3 packet length (excludes L2 and tap specific headers)
*
* Returns: length of the tap frame including L2 and tap specific
* headers - suitable for an iov_len to be passed to
* tap_send_frames()
*/
static inline size_t tap_frame_len(const struct ctx *c, struct tap_hdr *taph,
size_t plen)
{
if (c->mode == MODE_PASST)
taph->vnet_len = htonl(plen + sizeof(taph->eh));
return plen + tap_hdr_len_(c);
}
struct in_addr tap_ip4_daddr(const struct ctx *c);
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t len);
void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
const void *in, size_t len);
const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
const struct in6_addr *src);
void tap_udp6_send(const struct ctx *c,
const struct in6_addr *src, in_port_t sport,
const struct in6_addr *dst, in_port_t dport,
uint32_t flow, const void *in, size_t len);
void tap_icmp6_send(const struct ctx *c,
const struct in6_addr *src, const struct in6_addr *dst,
const void *in, size_t len);
void tap_send_single(const struct ctx *c, const void *data, size_t len);
size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
size_t bufs_per_frame, size_t nframes);
void eth_update_mac(struct ethhdr *eh,
const unsigned char *eth_d, const unsigned char *eth_s);
void tap_listen_handler(struct ctx *c, uint32_t events);
void tap_handler_pasta(struct ctx *c, uint32_t events,
const struct timespec *now);
void tap_handler_passt(struct ctx *c, uint32_t events,
const struct timespec *now);
passt: Add PASTA mode, major rework PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host connectivity to an otherwise disconnected, unprivileged network and user namespace, similarly to slirp4netns. Given that the implementation is largely overlapping with PASST, no separate binary is built: 'pasta' (and 'passt4netns' for clarity) both link to 'passt', and the mode of operation is selected depending on how the binary is invoked. Usage example: $ unshare -rUn # echo $$ 1871759 $ ./pasta 1871759 # From another terminal # udhcpc -i pasta0 2>/dev/null # ping -c1 pasta.pizza PING pasta.pizza (64.190.62.111) 56(84) bytes of data. 64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms --- pasta.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms # ping -c1 spaghetti.pizza PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes 64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms --- spaghetti.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms This entails a major rework, especially with regard to the storage of tracked connections and to the semantics of epoll(7) references. Indexing TCP and UDP bindings merely by socket proved to be inflexible and unsuitable to handle different connection flows: pasta also provides Layer-2 to Layer-2 socket mapping between init and a separate namespace for local connections, using a pair of splice() system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local bindings. For instance, building on the previous example: # ip link set dev lo up # iperf3 -s $ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4 [SUM] 0.00-10.00 sec 52.3 GBytes 44.9 Gbits/sec 283 sender [SUM] 0.00-10.43 sec 52.3 GBytes 43.1 Gbits/sec receiver iperf Done. epoll(7) references now include a generic part in order to demultiplex data to the relevant protocol handler, using 24 bits for the socket number, and an opaque portion reserved for usage by the single protocol handlers, in order to track sockets back to corresponding connections and bindings. A number of fixes pertaining to TCP state machine and congestion window handling are also included here. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2021-07-17 06:34:53 +00:00
void tap_sock_init(struct ctx *c);
#endif /* TAP_H */