From 034fa8a58d87ad2ea5f8b56d267d17dbc75798de Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 17 Nov 2022 16:59:02 +1100 Subject: [PATCH] tcp: Remove v6 flag from tcp_epoll_ref This bit in the TCP specific epoll reference indicates whether the connection is IPv6 or IPv4. However the sites which refer to it are already calling accept() which (optionally) returns an address for the remote end of the connection. We can use the sa_family field in that address to determine the connection type independent of the epoll reference. This does have a cost: for the spliced case, it means we now need to get that address from accept() which introduces an extran copy_to_user(). However, in future we want to allow handling IPv4 connectons through IPv6 sockets, which means we won't be able to determine the IP version at the time we create the listening socket and epoll reference. So, at some point we'll have to pay this cost anyway. Signed-off-by: David Gibson Signed-off-by: Stefano Brivio --- tcp.c | 10 ++++------ tcp.h | 2 -- tcp_splice.c | 8 +++----- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tcp.c b/tcp.c index 559e271..7d5ac6c 100644 --- a/tcp.c +++ b/tcp.c @@ -662,8 +662,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) { int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref = { .r.proto = IPPROTO_TCP, .r.s = conn->sock, - .r.p.tcp.tcp.index = CONN_IDX(conn), - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; + .r.p.tcp.tcp.index = CONN_IDX(conn) }; struct epoll_event ev = { .data.u64 = ref.u64 }; if (conn->events == CLOSED) { @@ -2745,7 +2744,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - if (ref.r.p.tcp.tcp.v6) { + if (sa->sa_family == AF_INET6) { struct sockaddr_in6 sa6; memcpy(&sa6, sa, sizeof(sa6)); @@ -3019,8 +3018,7 @@ static void tcp_sock_init6(const struct ctx *c, in_port_t port) { in_port_t idx = port + c->tcp.fwd_in.delta[port]; - union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1, - .tcp.index = idx }; + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx }; int s; s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32); @@ -3084,7 +3082,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) { in_port_t idx = port + c->tcp.fwd_out.delta[port]; union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, - .tcp.v6 = 1, .tcp.index = idx }; + .tcp.index = idx }; int s; assert(c->mode == MODE_PASTA); diff --git a/tcp.h b/tcp.h index a940682..739b451 100644 --- a/tcp.h +++ b/tcp.h @@ -33,7 +33,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, * union tcp_epoll_ref - epoll reference portion for TCP connections * @listen: Set if this file descriptor is a listening socket * @outbound: Listening socket maps to outbound, spliced connection - * @v6: Set for IPv6 sockets or connections * @timer: Reference is a timerfd descriptor for connection * @index: Index of connection in table, or port for bound sockets * @u32: Opaque u32 value of reference @@ -42,7 +41,6 @@ union tcp_epoll_ref { struct { uint32_t listen:1, outbound:1, - v6:1, timer:1, index:20; } tcp; diff --git a/tcp_splice.c b/tcp_splice.c index 2be9b77..1bbd085 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -167,11 +167,9 @@ static int tcp_splice_epoll_ctl(const struct ctx *c, { int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a, - .r.p.tcp.tcp.index = CONN_IDX(conn), - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; + .r.p.tcp.tcp.index = CONN_IDX(conn) }; union epoll_ref ref_b = { .r.proto = IPPROTO_TCP, .r.s = conn->b, - .r.p.tcp.tcp.index = CONN_IDX(conn), - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; + .r.p.tcp.tcp.index = CONN_IDX(conn) }; struct epoll_event ev_a = { .data.u64 = ref_a.u64 }; struct epoll_event ev_b = { .data.u64 = ref_b.u64 }; uint32_t events_a, events_b; @@ -517,7 +515,7 @@ bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref, { assert(c->mode == MODE_PASTA); - if (ref.r.p.tcp.tcp.v6) { + if (sa->sa_family == AF_INET6) { const struct sockaddr_in6 *sa6; sa6 = (const struct sockaddr_in6 *)sa;