1
0
mirror of https://passt.top/passt synced 2024-12-22 13:45:32 +00:00
passt/tcp_conn.h
David Gibson fe27ebce5c tcp, tcp_splice: Helpers for getting sockets from the pools
We maintain pools of ready-to-connect sockets in both the original and
(for pasta) guest namespace to reduce latency when starting new TCP
connections.  If we exhaust those pools we have to take a higher
latency path to get a new socket.

Currently we open-code that fallback in the places we need it.  To improve
clarity encapsulate that into helper functions.  While we're at it, give
those helpers clearer error reporting.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2024-02-27 12:52:46 +01:00

167 lines
5.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright Red Hat
* Author: Stefano Brivio <sbrivio@redhat.com>
* Author: David Gibson <david@gibson.dropbear.id.au>
*
* TCP connection tracking data structures, used by tcp.c and
* tcp_splice.c. Shouldn't be included in non-TCP code.
*/
#ifndef TCP_CONN_H
#define TCP_CONN_H
/**
* struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
* @f: Generic flow information
* @in_epoll: Is the connection in the epoll set?
* @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
* @sock: Socket descriptor number
* @events: Connection events, implying connection states
* @timer: timerfd descriptor for timeout events
* @flags: Connection flags representing internal attributes
* @retrans: Number of retransmissions occurred due to ACK_TIMEOUT
* @ws_from_tap: Window scaling factor advertised from tap/guest
* @ws_to_tap: Window scaling factor advertised to tap/guest
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
* @faddr: Guest side forwarding address (guest's remote address)
* @eport: Guest side endpoint port (guest's local port)
* @fport: Guest side forwarding port (guest's remote port)
* @wnd_from_tap: Last window size from tap, unscaled (as received)
* @wnd_to_tap: Sending window advertised to tap, unscaled (as sent)
* @seq_to_tap: Next sequence for packets to tap
* @seq_ack_from_tap: Last ACK number received from tap
* @seq_from_tap: Next sequence for packets from tap (not actually sent)
* @seq_ack_to_tap: Last ACK number sent to tap
* @seq_init_from_tap: Initial sequence number from tap
*/
struct tcp_tap_conn {
/* Must be first element */
struct flow_common f;
bool in_epoll :1;
#define TCP_RETRANS_BITS 3
unsigned int retrans :TCP_RETRANS_BITS;
#define TCP_MAX_RETRANS MAX_FROM_BITS(TCP_RETRANS_BITS)
#define TCP_WS_BITS 4 /* RFC 7323 */
#define TCP_WS_MAX 14
unsigned int ws_from_tap :TCP_WS_BITS;
unsigned int ws_to_tap :TCP_WS_BITS;
int sock :FD_REF_BITS;
uint8_t events;
#define CLOSED 0
#define SOCK_ACCEPTED BIT(0) /* implies SYN sent to tap */
#define TAP_SYN_RCVD BIT(1) /* implies socket connecting */
#define TAP_SYN_ACK_SENT BIT( 3) /* implies socket connected */
#define ESTABLISHED BIT(2)
#define SOCK_FIN_RCVD BIT( 3)
#define SOCK_FIN_SENT BIT( 4)
#define TAP_FIN_RCVD BIT( 5)
#define TAP_FIN_SENT BIT( 6)
#define TAP_FIN_ACKED BIT( 7)
#define CONN_STATE_BITS /* Setting these clears other flags */ \
(SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED)
int timer :FD_REF_BITS;
uint8_t flags;
#define STALLED BIT(0)
#define LOCAL BIT(1)
#define ACTIVE_CLOSE BIT(2)
#define ACK_TO_TAP_DUE BIT(3)
#define ACK_FROM_TAP_DUE BIT(4)
#define TCP_MSS_BITS 14
unsigned int tap_mss :TCP_MSS_BITS;
#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS))
#define SNDBUF_BITS 24
unsigned int sndbuf :SNDBUF_BITS;
#define SNDBUF_SET(conn, bytes) (conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
#define SNDBUF_GET(conn) (conn->sndbuf << (32 - SNDBUF_BITS))
uint8_t seq_dup_ack_approx;
union inany_addr faddr;
in_port_t eport;
in_port_t fport;
uint16_t wnd_from_tap;
uint16_t wnd_to_tap;
uint32_t seq_to_tap;
uint32_t seq_ack_from_tap;
uint32_t seq_from_tap;
uint32_t seq_ack_to_tap;
uint32_t seq_init_from_tap;
};
#define SIDES 2
/**
* struct tcp_splice_conn - Descriptor for a spliced TCP connection
* @f: Generic flow information
* @in_epoll: Is the connection in the epoll set?
* @s: File descriptor for sockets
* @pipe: File descriptors for pipes
* @events: Events observed/actions performed on connection
* @flags: Connection flags (attributes, not events)
* @read: Bytes read (not fully written to other side in one shot)
* @written: Bytes written (not fully written from one other side read)
*/
struct tcp_splice_conn {
/* Must be first element */
struct flow_common f;
bool in_epoll :1;
int s[SIDES];
int pipe[SIDES][2];
uint8_t events;
#define SPLICE_CLOSED 0
#define SPLICE_CONNECT BIT(0)
#define SPLICE_ESTABLISHED BIT(1)
#define OUT_WAIT_0 BIT(2)
#define OUT_WAIT_1 BIT(3)
#define FIN_RCVD_0 BIT(4)
#define FIN_RCVD_1 BIT(5)
#define FIN_SENT_0 BIT(6)
#define FIN_SENT_1 BIT(7)
uint8_t flags;
#define SPLICE_V6 BIT(0)
#define RCVLOWAT_SET_0 BIT(1)
#define RCVLOWAT_SET_1 BIT(2)
#define RCVLOWAT_ACT_0 BIT(3)
#define RCVLOWAT_ACT_1 BIT(4)
#define CLOSING BIT(5)
uint32_t read[SIDES];
uint32_t written[SIDES];
};
/* Socket pools */
#define TCP_SOCK_POOL_SIZE 32
extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE];
extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
bool tcp_flow_defer(union flow *flow);
bool tcp_splice_flow_defer(union flow *flow);
void tcp_splice_timer(const struct ctx *c, union flow *flow);
int tcp_conn_pool_sock(int pool[]);
int tcp_conn_sock(const struct ctx *c, sa_family_t af);
int tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af);
void tcp_splice_refill(const struct ctx *c);
#endif /* TCP_CONN_H */