diff --git a/icmp.c b/icmp.c index 94093d2..0eb5bfe 100644 --- a/icmp.c +++ b/icmp.c @@ -43,13 +43,13 @@ /** * struct icmp_id_sock - Tracking information for single ICMP echo identifier * @sock: Bound socket for identifier - * @ts: Last associated activity from tap, seconds * @seq: Last sequence number sent to tap, host order + * @ts: Last associated activity from tap, seconds */ struct icmp_id_sock { int sock; - time_t ts; uint16_t seq; + time_t ts; }; /* Indexed by ICMP echo identifier */ @@ -168,6 +168,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, s = sock_l4(c, AF_INET, IPPROTO_ICMP, id, 0, iref.u32); if (s < 0) goto fail_sock; + if (s > SOCKET_MAX) { + close(s); + return 1; + } icmp_id_map[V4][id].sock = s; } @@ -201,6 +205,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, iref.u32); if (s < 0) goto fail_sock; + if (s > SOCKET_MAX) { + close(s); + return 1; + } icmp_id_map[V6][id].sock = s; } diff --git a/passt.h b/passt.h index 7d40c6f..69e334d 100644 --- a/passt.h +++ b/passt.h @@ -63,7 +63,7 @@ union epoll_ref { }; #define TAP_BUF_BYTES \ - ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 256), PAGE_SIZE) + ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE) #define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t)) #define TAP_MSGS \ DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t)) diff --git a/tap.c b/tap.c index 6333015..aca8c75 100644 --- a/tap.c +++ b/tap.c @@ -57,6 +57,8 @@ static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); +#define TAP_SEQS 128 /* Different L4 tuples in one batch */ + /** * tap_send() - Send frame, with qemu socket header if needed * @c: Execution context @@ -225,7 +227,7 @@ static struct tap4_l4_t { uint32_t daddr; struct pool_l4_t p; -} tap4_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */]; +} tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; /** * struct l4_seq6_t - Message sequence for one protocol handler call, IPv6 @@ -247,7 +249,7 @@ static struct tap6_l4_t { struct in6_addr daddr; struct pool_l4_t p; -} tap6_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */]; +} tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; /** * tap_packet_debug() - Print debug message for packet(s) from guest/tap @@ -401,12 +403,12 @@ resume: seq->daddr = iph->daddr; \ } while (0) - if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV) + if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < TAP_SEQS) goto append; for (seq = tap4_l4 + seq_count - 1; seq >= tap4_l4; seq--) { if (L4_MATCH(iph, uh, seq)) { - if (seq->p.count >= UIO_MAXIOV) + if (seq->p.count >= TAP_SEQS) seq = NULL; break; } @@ -424,7 +426,7 @@ resume: append: packet_add((struct pool *)&seq->p, l4_len, l4h); - if (seq_count == UIO_MAXIOV) + if (seq_count == TAP_SEQS) break; /* Resume after flushing if i < count */ } @@ -563,12 +565,12 @@ resume: } while (0) if (seq && L4_MATCH(ip6h, proto, uh, seq) && - seq->p.count < UIO_MAXIOV) + seq->p.count < TAP_SEQS) goto append; for (seq = tap6_l4 + seq_count - 1; seq >= tap6_l4; seq--) { if (L4_MATCH(ip6h, proto, uh, seq)) { - if (seq->p.count >= UIO_MAXIOV) + if (seq->p.count >= TAP_SEQS) seq = NULL; break; } @@ -586,7 +588,7 @@ resume: append: packet_add((struct pool *)&seq->p, l4_len, l4h); - if (seq_count == UIO_MAXIOV) + if (seq_count == TAP_SEQS) break; /* Resume after flushing if i < count */ } @@ -924,9 +926,9 @@ void tap_sock_init(struct ctx *c) pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, pkt_buf, sz); pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, pkt_buf, sz); - for (i = 0; i < UIO_MAXIOV; i++) { - tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz); - tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz); + for (i = 0; i < TAP_SEQS; i++) { + tap4_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz); + tap6_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz); } if (c->fd_tap != -1) { diff --git a/tcp.c b/tcp.c index fe46ede..2194067 100644 --- a/tcp.c +++ b/tcp.c @@ -70,9 +70,9 @@ * * Data needs to linger on sockets as long as it's not acknowledged by the * guest, and is read using MSG_PEEK into preallocated static buffers sized - * to the maximum supported window, 64MiB ("discard" buffer, for already-sent + * to the maximum supported window, 16 MiB ("discard" buffer, for already-sent * data) plus a number of maximum-MSS-sized buffers. This imposes a practical - * limitation on window scaling, that is, the maximum factor is 1024. Larger + * limitation on window scaling, that is, the maximum factor is 256. Larger * factors will be accepted, but resulting, larger values are never advertised * to the other side, and not used while queueing data. * @@ -299,7 +299,7 @@ #include "conf.h" #include "tcp_splice.h" -#define TCP_FRAMES_MEM 256 +#define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) @@ -311,17 +311,48 @@ #define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \ TCP_HASH_TABLE_LOAD) -#define MAX_WS 10 +#define MAX_WS 8 #define MAX_WINDOW (1 << (16 + (MAX_WS))) /* MSS rounding: see SET_MSS() */ #define MSS_DEFAULT 536 -#define MSS4 ROUND_DOWN(USHRT_MAX - \ - sizeof(uint32_t) - sizeof(struct ethhdr) - \ - sizeof(struct iphdr) - sizeof(struct tcphdr), 4) -#define MSS6 ROUND_DOWN(USHRT_MAX - \ - sizeof(uint32_t) - sizeof(struct ethhdr) - \ - sizeof(struct ipv6hdr) - sizeof(struct tcphdr), 4) + +struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */ + uint32_t psum; + uint32_t tsum; +#ifdef __AVX2__ + uint8_t pad[18]; +#else + uint8_t pad[2]; +#endif + uint32_t vnet_len; + struct ethhdr eh; + struct iphdr iph; + struct tcphdr th; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + +struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ +#ifdef __AVX2__ + uint8_t pad[14]; +#else + uint8_t pad[2]; +#endif + uint32_t vnet_len; + struct ethhdr eh; + struct ipv6hdr ip6h; + struct tcphdr th; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + +#define MSS4 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4) +#define MSS6 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4) #define WINDOW_DEFAULT 14600 /* RFC 6928 */ #ifdef HAS_SND_WND diff --git a/tcp_splice.c b/tcp_splice.c index 714571c..3f2ef2e 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -102,10 +102,10 @@ struct tcp_splice_conn { #define RCVLOWAT_ACT_B BIT(5) #define CLOSING BIT(6) - uint64_t a_read; - uint64_t a_written; - uint64_t b_read; - uint64_t b_written; + uint32_t a_read; + uint32_t a_written; + uint32_t b_read; + uint32_t b_written; }; #define CONN_V6(x) (x->flags & SOCK_V6) @@ -553,7 +553,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, { uint8_t lowat_set_flag, lowat_act_flag; int from, to, *pipes, eof, never_read; - uint64_t *seq_read, *seq_write; + uint32_t *seq_read, *seq_write; struct tcp_splice_conn *conn; if (ref.r.p.tcp.tcp.listen) { diff --git a/udp.c b/udp.c index 2cdbe6c..1c0fdc6 100644 --- a/udp.c +++ b/udp.c @@ -117,8 +117,8 @@ #include "pcap.h" #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ -#define UDP_SPLICE_FRAMES 128 -#define UDP_TAP_FRAMES_MEM 128 +#define UDP_SPLICE_FRAMES 32 +#define UDP_TAP_FRAMES_MEM 32 #define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1) /**