mirror of
https://passt.top/passt
synced 2025-01-18 10:25:17 +00:00
tcp, tap: Don't increase tap-side sequence counter for dropped frames
...so that we'll retry sending them, instead of more-or-less silently dropping them. This happens quite frequently if our sending buffer on the UNIX domain socket is heavily constrained (for instance, by the 208 KiB default memory limit). It might be argued that dropping frames is part of the expected TCP flow: we don't dequeue those from the socket anyway, so we'll eventually retransmit them. But we don't need the receiver to tell us (by the way of duplicate or missing ACKs) that we couldn't send them: we already know as sendmsg() reports that. This seems to considerably increase throughput stability and throughput itself for TCP connections with default wmem_max values. Unfortunately, the 16 bits left as padding in the frame descriptors we use internally aren't enough to uniquely identify for which connection we should update sequence numbers: create a parallel array of pointers to sequence numbers and L4 lengths, of TCP_FRAMES_MEM size, and go through it after calling sendmsg(). Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
parent
d3192f67c4
commit
a469fc393f
10
tap.c
10
tap.c
@ -413,13 +413,15 @@ static size_t tap_send_frames_passt(const struct ctx *c,
|
|||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @iov: Array of buffers, each containing one frame (with L2 headers)
|
* @iov: Array of buffers, each containing one frame (with L2 headers)
|
||||||
* @n: Number of buffers/frames in @iov
|
* @n: Number of buffers/frames in @iov
|
||||||
|
*
|
||||||
|
* Return: number of frames actually sent
|
||||||
*/
|
*/
|
||||||
void tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n)
|
size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n)
|
||||||
{
|
{
|
||||||
size_t m;
|
size_t m;
|
||||||
|
|
||||||
if (!n)
|
if (!n)
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
if (c->mode == MODE_PASST)
|
if (c->mode == MODE_PASST)
|
||||||
m = tap_send_frames_passt(c, iov, n);
|
m = tap_send_frames_passt(c, iov, n);
|
||||||
@ -427,9 +429,11 @@ void tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n)
|
|||||||
m = tap_send_frames_pasta(c, iov, n);
|
m = tap_send_frames_pasta(c, iov, n);
|
||||||
|
|
||||||
if (m < n)
|
if (m < n)
|
||||||
debug("tap: dropped %lu frames of %lu due to short send", n - m, n);
|
debug("tap: failed to send %lu frames of %lu", n - m, n);
|
||||||
|
|
||||||
pcap_multiple(iov, m, c->mode == MODE_PASST ? sizeof(uint32_t) : 0);
|
pcap_multiple(iov, m, c->mode == MODE_PASST ? sizeof(uint32_t) : 0);
|
||||||
|
|
||||||
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
2
tap.h
2
tap.h
@ -73,7 +73,7 @@ void tap_icmp6_send(const struct ctx *c,
|
|||||||
const struct in6_addr *src, const struct in6_addr *dst,
|
const struct in6_addr *src, const struct in6_addr *dst,
|
||||||
const void *in, size_t len);
|
const void *in, size_t len);
|
||||||
int tap_send(const struct ctx *c, const void *data, size_t len);
|
int tap_send(const struct ctx *c, const void *data, size_t len);
|
||||||
void tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n);
|
size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n);
|
||||||
void tap_update_mac(struct tap_hdr *taph,
|
void tap_update_mac(struct tap_hdr *taph,
|
||||||
const unsigned char *eth_d, const unsigned char *eth_s);
|
const unsigned char *eth_d, const unsigned char *eth_s);
|
||||||
void tap_listen_handler(struct ctx *c, uint32_t events);
|
void tap_listen_handler(struct ctx *c, uint32_t events);
|
||||||
|
40
tcp.c
40
tcp.c
@ -434,6 +434,16 @@ static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS];
|
|||||||
*/
|
*/
|
||||||
static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tcp_buf_seq_update - Sequences to update with length of frames once sent
|
||||||
|
* @seq: Pointer to sequence number sent to tap-side, to be updated
|
||||||
|
* @len: TCP payload length
|
||||||
|
*/
|
||||||
|
struct tcp_buf_seq_update {
|
||||||
|
uint32_t *seq;
|
||||||
|
uint16_t len;
|
||||||
|
};
|
||||||
|
|
||||||
/* Static buffers */
|
/* Static buffers */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -462,6 +472,8 @@ static struct tcp4_l2_buf_t {
|
|||||||
#endif
|
#endif
|
||||||
tcp4_l2_buf[TCP_FRAMES_MEM];
|
tcp4_l2_buf[TCP_FRAMES_MEM];
|
||||||
|
|
||||||
|
static struct tcp_buf_seq_update tcp4_l2_buf_seq_update[TCP_FRAMES_MEM];
|
||||||
|
|
||||||
static unsigned int tcp4_l2_buf_used;
|
static unsigned int tcp4_l2_buf_used;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -490,6 +502,8 @@ struct tcp6_l2_buf_t {
|
|||||||
#endif
|
#endif
|
||||||
tcp6_l2_buf[TCP_FRAMES_MEM];
|
tcp6_l2_buf[TCP_FRAMES_MEM];
|
||||||
|
|
||||||
|
static struct tcp_buf_seq_update tcp6_l2_buf_seq_update[TCP_FRAMES_MEM];
|
||||||
|
|
||||||
static unsigned int tcp6_l2_buf_used;
|
static unsigned int tcp6_l2_buf_used;
|
||||||
|
|
||||||
/* recvmsg()/sendmsg() data for tap */
|
/* recvmsg()/sendmsg() data for tap */
|
||||||
@ -1364,10 +1378,17 @@ static void tcp_l2_flags_buf_flush(const struct ctx *c)
|
|||||||
*/
|
*/
|
||||||
static void tcp_l2_data_buf_flush(const struct ctx *c)
|
static void tcp_l2_data_buf_flush(const struct ctx *c)
|
||||||
{
|
{
|
||||||
tap_send_frames(c, tcp6_l2_iov, tcp6_l2_buf_used);
|
unsigned i;
|
||||||
|
size_t m;
|
||||||
|
|
||||||
|
m = tap_send_frames(c, tcp6_l2_iov, tcp6_l2_buf_used);
|
||||||
|
for (i = 0; i < m; i++)
|
||||||
|
*tcp6_l2_buf_seq_update[i].seq += tcp6_l2_buf_seq_update[i].len;
|
||||||
tcp6_l2_buf_used = 0;
|
tcp6_l2_buf_used = 0;
|
||||||
|
|
||||||
tap_send_frames(c, tcp4_l2_iov, tcp4_l2_buf_used);
|
m = tap_send_frames(c, tcp4_l2_iov, tcp4_l2_buf_used);
|
||||||
|
for (i = 0; i < m; i++)
|
||||||
|
*tcp4_l2_buf_seq_update[i].seq += tcp4_l2_buf_seq_update[i].len;
|
||||||
tcp4_l2_buf_used = 0;
|
tcp4_l2_buf_used = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2145,17 +2166,20 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
|
|||||||
* @plen: Payload length at L4
|
* @plen: Payload length at L4
|
||||||
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
|
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
|
||||||
* @seq: Sequence number to be sent
|
* @seq: Sequence number to be sent
|
||||||
* @now: Current timestamp
|
|
||||||
*/
|
*/
|
||||||
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
|
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||||
ssize_t plen, int no_csum, uint32_t seq)
|
ssize_t plen, int no_csum, uint32_t seq)
|
||||||
{
|
{
|
||||||
|
uint32_t *seq_update = &conn->seq_to_tap;
|
||||||
struct iovec *iov;
|
struct iovec *iov;
|
||||||
|
|
||||||
if (CONN_V4(conn)) {
|
if (CONN_V4(conn)) {
|
||||||
struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
|
struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
|
||||||
const uint16_t *check = no_csum ? &(b - 1)->iph.check : NULL;
|
const uint16_t *check = no_csum ? &(b - 1)->iph.check : NULL;
|
||||||
|
|
||||||
|
tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
|
||||||
|
tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
|
||||||
|
|
||||||
iov = tcp4_l2_iov + tcp4_l2_buf_used++;
|
iov = tcp4_l2_iov + tcp4_l2_buf_used++;
|
||||||
iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
|
iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
|
||||||
check, seq);
|
check, seq);
|
||||||
@ -2164,6 +2188,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
|
|||||||
} else if (CONN_V6(conn)) {
|
} else if (CONN_V6(conn)) {
|
||||||
struct tcp6_l2_buf_t *b = &tcp6_l2_buf[tcp6_l2_buf_used];
|
struct tcp6_l2_buf_t *b = &tcp6_l2_buf[tcp6_l2_buf_used];
|
||||||
|
|
||||||
|
tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
|
||||||
|
tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
|
||||||
|
|
||||||
iov = tcp6_l2_iov + tcp6_l2_buf_used++;
|
iov = tcp6_l2_iov + tcp6_l2_buf_used++;
|
||||||
iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
|
iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
|
||||||
NULL, seq);
|
NULL, seq);
|
||||||
@ -2189,7 +2216,7 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
|
|||||||
int s = conn->sock, i, ret = 0;
|
int s = conn->sock, i, ret = 0;
|
||||||
struct msghdr mh_sock = { 0 };
|
struct msghdr mh_sock = { 0 };
|
||||||
uint16_t mss = MSS_GET(conn);
|
uint16_t mss = MSS_GET(conn);
|
||||||
uint32_t already_sent;
|
uint32_t already_sent, seq;
|
||||||
struct iovec *iov;
|
struct iovec *iov;
|
||||||
|
|
||||||
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
|
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
|
||||||
@ -2278,14 +2305,15 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
|
|||||||
|
|
||||||
/* Finally, queue to tap */
|
/* Finally, queue to tap */
|
||||||
plen = mss;
|
plen = mss;
|
||||||
|
seq = conn->seq_to_tap;
|
||||||
for (i = 0; i < send_bufs; i++) {
|
for (i = 0; i < send_bufs; i++) {
|
||||||
int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used;
|
int no_csum = i && i != send_bufs - 1 && tcp4_l2_buf_used;
|
||||||
|
|
||||||
if (i == send_bufs - 1)
|
if (i == send_bufs - 1)
|
||||||
plen = last_len;
|
plen = last_len;
|
||||||
|
|
||||||
tcp_data_to_tap(c, conn, plen, no_csum, conn->seq_to_tap);
|
tcp_data_to_tap(c, conn, plen, no_csum, seq);
|
||||||
conn->seq_to_tap += plen;
|
seq += plen;
|
||||||
}
|
}
|
||||||
|
|
||||||
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user