diff --git a/passt.c b/passt.c index a700e41..5867d0e 100644 --- a/passt.c +++ b/passt.c @@ -56,7 +56,8 @@ char pkt_buf[PKT_BUF_BYTES] __attribute__ ((aligned(PAGE_SIZE))); char *epoll_type_str[EPOLL_TYPE_MAX + 1] = { - [EPOLL_TYPE_TCP] = "TCP socket", + [EPOLL_TYPE_TCP] = "connected TCP socket", + [EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket", [EPOLL_TYPE_TCP_TIMER] = "TCP timer", [EPOLL_TYPE_UDP] = "UDP socket", [EPOLL_TYPE_ICMP] = "ICMP socket", @@ -323,7 +324,10 @@ loop: break; case EPOLL_TYPE_TCP: if (!c.no_tcp) - tcp_sock_handler(&c, ref, eventmask, &now); + tcp_sock_handler(&c, ref, eventmask); + break; + case EPOLL_TYPE_TCP_LISTEN: + tcp_listen_handler(&c, ref, &now); break; case EPOLL_TYPE_TCP_TIMER: tcp_timer_handler(&c, ref); diff --git a/passt.h b/passt.h index fc1efdb..a625d48 100644 --- a/passt.h +++ b/passt.h @@ -47,8 +47,10 @@ union epoll_ref; enum epoll_type { /* Special value to indicate an invalid type */ EPOLL_TYPE_NONE = 0, - /* TCP sockets */ + /* Connected TCP sockets */ EPOLL_TYPE_TCP, + /* Listening TCP sockets */ + EPOLL_TYPE_TCP_LISTEN, /* timerfds used for TCP timers */ EPOLL_TYPE_TCP_TIMER, /* UDP sockets */ @@ -69,7 +71,8 @@ enum epoll_type { * union epoll_ref - Breakdown of reference for epoll fd bookkeeping * @type: Type of fd (tells us what to do with events) * @fd: File descriptor number (implies < 2^24 total descriptors) - * @tcp: TCP-specific reference part + * @tcp: TCP-specific reference part (connected sockets) + * @tcp_listen: TCP-specific reference part (listening sockets) * @udp: UDP-specific reference part * @icmp: ICMP-specific reference part * @data: Data handled by protocol handlers @@ -83,6 +86,7 @@ union epoll_ref { int32_t fd:FD_REF_BITS; union { union tcp_epoll_ref tcp; + union tcp_listen_epoll_ref tcp_listen; union udp_epoll_ref udp; union icmp_epoll_ref icmp; uint32_t data; diff --git a/tcp.c b/tcp.c index 98761a2..0322842 100644 --- a/tcp.c +++ b/tcp.c @@ -2735,7 +2735,8 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr) * @sa: Peer socket address (from accept()) * @now: Current timestamp */ -static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, +static void tcp_tap_conn_from_sock(struct ctx *c, + union tcp_listen_epoll_ref ref, struct tcp_tap_conn *conn, int s, struct sockaddr *sa, const struct timespec *now) @@ -2747,7 +2748,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, conn_event(c, conn, SOCK_ACCEPTED); inany_from_sockaddr(&conn->addr, &conn->sock_port, sa); - conn->tap_port = ref.tcp.index; + conn->tap_port = ref.port; tcp_snat_inbound(c, &conn->addr); @@ -2765,22 +2766,20 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, } /** - * tcp_conn_from_sock() - Handle new connection request from listening socket + * tcp_listen_handler() - Handle new connection request from listening socket * @c: Execution context * @ref: epoll reference of listening socket * @now: Current timestamp */ -static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, - const struct timespec *now) +void tcp_listen_handler(struct ctx *c, union epoll_ref ref, + const struct timespec *now) { struct sockaddr_storage sa; union tcp_conn *conn; socklen_t sl; int s; - ASSERT(ref.tcp.listen); - - if (c->tcp.conn_count >= TCP_MAX_CONNS) + if (c->no_tcp || c->tcp.conn_count >= TCP_MAX_CONNS) return; sl = sizeof(sa); @@ -2796,11 +2795,11 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, conn = tc + c->tcp.conn_count++; if (c->mode == MODE_PASTA && - tcp_splice_conn_from_sock(c, ref, &conn->splice, + tcp_splice_conn_from_sock(c, ref.tcp_listen, &conn->splice, s, (struct sockaddr *)&sa)) return; - tcp_tap_conn_from_sock(c, ref, &conn->tap, s, + tcp_tap_conn_from_sock(c, ref.tcp_listen, &conn->tap, s, (struct sockaddr *)&sa, now); } @@ -2926,19 +2925,10 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn, * @c: Execution context * @ref: epoll reference * @events: epoll events bitmap - * @now: Current timestamp */ -void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, - const struct timespec *now) +void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) { - union tcp_conn *conn; - - if (ref.tcp.listen) { - tcp_conn_from_sock(c, ref, now); - return; - } - - conn = tc + ref.tcp.index; + union tcp_conn *conn = tc + ref.tcp.index; if (conn->c.spliced) tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); @@ -2959,8 +2949,9 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, const struct in_addr *addr, const char *ifname) { - in_port_t idx = port + c->tcp.fwd_in.delta[port]; - union tcp_epoll_ref tref = { .listen = 1, .index = idx }; + union tcp_listen_epoll_ref tref = { + .port = port + c->tcp.fwd_in.delta[port], + }; int s; s = sock_l4(c, af, IPPROTO_TCP, addr, ifname, port, tref.u32); @@ -3019,9 +3010,10 @@ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, */ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) { - in_port_t idx = port + c->tcp.fwd_out.delta[port]; - union tcp_epoll_ref tref = { .listen = 1, .outbound = 1, - .index = idx }; + union tcp_listen_epoll_ref tref = { + .port = port + c->tcp.fwd_out.delta[port], + .ns = true, + }; struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; int s; @@ -3044,9 +3036,10 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) */ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) { - in_port_t idx = port + c->tcp.fwd_out.delta[port]; - union tcp_epoll_ref tref = { .listen = 1, .outbound = 1, - .index = idx }; + union tcp_listen_epoll_ref tref = { + .port = port + c->tcp.fwd_out.delta[port], + .ns = true, + }; int s; ASSERT(c->mode == MODE_PASTA); diff --git a/tcp.h b/tcp.h index 8eb7782..be296ec 100644 --- a/tcp.h +++ b/tcp.h @@ -14,8 +14,9 @@ struct ctx; void tcp_timer_handler(struct ctx *c, union epoll_ref ref); -void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, - const struct timespec *now); +void tcp_listen_handler(struct ctx *c, union epoll_ref ref, + const struct timespec *now); +void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events); int tcp_tap_handler(struct ctx *c, int af, const void *addr, const struct pool *p, const struct timespec *now); int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, @@ -30,16 +31,24 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, /** * union tcp_epoll_ref - epoll reference portion for TCP connections - * @listen: Set if this file descriptor is a listening socket - * @outbound: Listening socket maps to outbound, spliced connection - * @index: Index of connection in table, or port for bound sockets + * @index: Index of connection in table * @u32: Opaque u32 value of reference */ union tcp_epoll_ref { + uint32_t index:20; + uint32_t u32; +}; + +/** + * union tcp_listen_epoll_ref - epoll reference portion for TCP listening + * @port: Port number we're forwarding *to* (listening port plus delta) + * @ns: True if listening within the pasta namespace + * @u32: Opaque u32 value of reference + */ +union tcp_listen_epoll_ref { struct { - uint32_t listen:1, - outbound:1, - index:20; + in_port_t port; + bool ns; }; uint32_t u32; }; diff --git a/tcp_splice.c b/tcp_splice.c index 24995e2..64c1263 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -486,7 +486,7 @@ static void tcp_splice_dir(struct tcp_splice_conn *conn, int ref_sock, * Return: true if able to create a spliced connection, false otherwise * #syscalls:pasta setsockopt */ -bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref, +bool tcp_splice_conn_from_sock(struct ctx *c, union tcp_listen_epoll_ref ref, struct tcp_splice_conn *conn, int s, const struct sockaddr *sa) { @@ -516,7 +516,7 @@ bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref, c->tcp.splice_conn_count++; conn->a = s; - if (tcp_splice_new(c, conn, ref.tcp.index, ref.tcp.outbound)) + if (tcp_splice_new(c, conn, ref.port, ref.ns)) conn_flag(c, conn, CLOSING); return true; diff --git a/tcp_splice.h b/tcp_splice.h index 99dbac8..e7a583a 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -10,7 +10,7 @@ struct tcp_splice_conn; void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, int s, uint32_t events); -bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref, +bool tcp_splice_conn_from_sock(struct ctx *c, union tcp_listen_epoll_ref ref, struct tcp_splice_conn *conn, int s, const struct sockaddr *sa); void tcp_splice_init(struct ctx *c); diff --git a/util.c b/util.c index 2cac7ba..d965f48 100644 --- a/util.c +++ b/util.c @@ -120,7 +120,7 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, switch (proto) { case IPPROTO_TCP: - ref.type = EPOLL_TYPE_TCP; + ref.type = EPOLL_TYPE_TCP_LISTEN; break; case IPPROTO_UDP: ref.type = EPOLL_TYPE_UDP;