mirror of
https://passt.top/passt
synced 2025-01-21 19:55:17 +00:00
tcp: Probe net.core.{r,w}mem_max, don't set SO_{RCV,SND}BUF if low
If net.core.rmem_max and net.core.wmem_max sysctls have low values, we can get bigger buffers by not trying to set them high -- the kernel would lock their values to what we get. Try, instead, to get bigger buffers by queueing as much as possible, and if maximum values in tcp_wmem and tcp_rmem are bigger than this, that will work. While at it, drop QUICKACK option for non-spliced sockets, I set that earlier by mistake. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
e1a2e2780c
commit
683043e200
38
tcp.c
38
tcp.c
@ -807,15 +807,18 @@ static void tcp_get_sndbuf(struct tcp_tap_conn *conn)
|
||||
* tcp_sock_set_bufsize() - Set SO_RCVBUF and SO_SNDBUF to maximum values
|
||||
* @s: Socket, can be -1 to avoid check in the caller
|
||||
*/
|
||||
static void tcp_sock_set_bufsize(int s)
|
||||
static void tcp_sock_set_bufsize(struct ctx *c, int s)
|
||||
{
|
||||
int v = INT_MAX / 2; /* Kernel clamps and rounds, no need to check */
|
||||
|
||||
if (s == -1)
|
||||
return;
|
||||
|
||||
setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
|
||||
setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
|
||||
if (!c->tcp.low_rmem)
|
||||
setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
|
||||
|
||||
if (!c->tcp.low_wmem)
|
||||
setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1308,7 +1311,8 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags,
|
||||
else
|
||||
mss -= sizeof(struct ipv6hdr);
|
||||
|
||||
if (!conn->local && !tcp_rtt_dst_low(conn))
|
||||
if (c->tcp.low_wmem &&
|
||||
!conn->local && !tcp_rtt_dst_low(conn))
|
||||
mss = MIN(mss, PAGE_SIZE);
|
||||
else
|
||||
mss = ROUND_DOWN(mss, PAGE_SIZE);
|
||||
@ -1571,7 +1575,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
|
||||
if (s < 0)
|
||||
return;
|
||||
|
||||
tcp_sock_set_bufsize(s);
|
||||
tcp_sock_set_bufsize(c, s);
|
||||
|
||||
if (af == AF_INET && addr4.sin_addr.s_addr == c->gw4)
|
||||
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
@ -2560,13 +2564,15 @@ static int tcp_splice_connect(struct ctx *c, struct tcp_splice_conn *conn,
|
||||
.sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) },
|
||||
};
|
||||
const struct sockaddr *sa;
|
||||
int ret, one = 1;
|
||||
socklen_t sl;
|
||||
int ret;
|
||||
|
||||
conn->to = sock_conn;
|
||||
|
||||
if (s <= 0)
|
||||
tcp_sock_set_bufsize(sock_conn);
|
||||
tcp_sock_set_bufsize(c, sock_conn);
|
||||
|
||||
setsockopt(s, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
||||
|
||||
if (v6) {
|
||||
sa = (struct sockaddr *)&addr6;
|
||||
@ -3157,7 +3163,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
||||
tref.u32);
|
||||
if (s > 0)
|
||||
tcp_sock_set_bufsize(s);
|
||||
tcp_sock_set_bufsize(c, s);
|
||||
else
|
||||
s = -1;
|
||||
|
||||
@ -3170,7 +3176,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||
s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
|
||||
BIND_LOOPBACK, tref.u32);
|
||||
if (s > 0)
|
||||
tcp_sock_set_bufsize(s);
|
||||
tcp_sock_set_bufsize(c, s);
|
||||
else
|
||||
s = -1;
|
||||
|
||||
@ -3192,7 +3198,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
||||
tref.u32);
|
||||
if (s > 0)
|
||||
tcp_sock_set_bufsize(s);
|
||||
tcp_sock_set_bufsize(c, s);
|
||||
else
|
||||
s = -1;
|
||||
|
||||
@ -3205,7 +3211,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||
s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
|
||||
BIND_LOOPBACK, tref.u32);
|
||||
if (s > 0)
|
||||
tcp_sock_set_bufsize(s);
|
||||
tcp_sock_set_bufsize(c, s);
|
||||
else
|
||||
s = -1;
|
||||
|
||||
@ -3287,7 +3293,7 @@ struct tcp_sock_refill_arg {
|
||||
static int tcp_sock_refill(void *arg)
|
||||
{
|
||||
struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
|
||||
int i, *p4, *p6, one = 1;
|
||||
int i, *p4, *p6;
|
||||
|
||||
if (a->ns) {
|
||||
if (ns_enter(a->c->pasta_pid))
|
||||
@ -3304,8 +3310,7 @@ static int tcp_sock_refill(void *arg)
|
||||
break;
|
||||
}
|
||||
*p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
|
||||
setsockopt(*p4, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
||||
tcp_sock_set_bufsize(*p4);
|
||||
tcp_sock_set_bufsize(a->c, *p4);
|
||||
}
|
||||
|
||||
for (i = 0; a->c->v6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) {
|
||||
@ -3314,8 +3319,7 @@ static int tcp_sock_refill(void *arg)
|
||||
}
|
||||
*p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK,
|
||||
IPPROTO_TCP);
|
||||
setsockopt(*p6, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
||||
tcp_sock_set_bufsize(*p6);
|
||||
tcp_sock_set_bufsize(a->c, *p6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -3334,6 +3338,8 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
|
||||
|
||||
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
|
||||
|
||||
tcp_probe_mem(c);
|
||||
|
||||
for (port = 0; port < USHRT_MAX; port++) {
|
||||
if (!bitmap_isset(c->tcp.port_to_tap, port))
|
||||
continue;
|
||||
|
4
tcp.h
4
tcp.h
@ -51,6 +51,8 @@ union tcp_epoll_ref {
|
||||
* @pipe_size: Size of pipes for spliced connections
|
||||
* @refill_ts: Time of last refill operation for pools of sockets/pipes
|
||||
* @port_detect_ts: Time of last TCP port detection/rebind, if enabled
|
||||
* @low_wmem: Low probed net.core.wmem_max
|
||||
* @low_rmem: Low probed net.core.rmem_max
|
||||
*/
|
||||
struct tcp_ctx {
|
||||
uint64_t hash_secret[2];
|
||||
@ -65,6 +67,8 @@ struct tcp_ctx {
|
||||
size_t pipe_size;
|
||||
struct timespec refill_ts;
|
||||
struct timespec port_detect_ts;
|
||||
int low_wmem;
|
||||
int low_rmem;
|
||||
};
|
||||
|
||||
#endif /* TCP_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user