diff --git a/Makefile b/Makefile index bd504d2..b6329e3 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \ - tcp_buf.c tcp_splice.c udp.c util.c + tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c QRAP_SRCS = qrap.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) diff --git a/udp.c b/udp.c index f27a00b..7731257 100644 --- a/udp.c +++ b/udp.c @@ -95,7 +95,6 @@ #include #include #include -#include #include #include @@ -111,7 +110,6 @@ #include "log.h" #include "flow_table.h" -#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ /* "Spliced" sockets indexed by bound port (host order) */ @@ -276,199 +274,6 @@ static void udp_iov_init(const struct ctx *c) udp_iov_init_one(c, i); } -/** - * udp_at_sidx() - Get UDP specific flow at given sidx - * @sidx: Flow and side to retrieve - * - * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if - * the flow at @sidx is not FLOW_UDP. - */ -struct udp_flow *udp_at_sidx(flow_sidx_t sidx) -{ - union flow *flow = flow_at_sidx(sidx); - - if (!flow) - return NULL; - - ASSERT(flow->f.type == FLOW_UDP); - return &flow->udp; -} - -/* - * udp_flow_close() - Close and clean up UDP flow - * @c: Execution context - * @uflow: UDP flow - */ -static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) -{ - if (uflow->s[INISIDE] >= 0) { - /* The listening socket needs to stay in epoll */ - close(uflow->s[INISIDE]); - uflow->s[INISIDE] = -1; - } - - if (uflow->s[TGTSIDE] >= 0) { - /* But the flow specific one needs to be removed */ - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL); - close(uflow->s[TGTSIDE]); - uflow->s[TGTSIDE] = -1; - } - flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE)); - if (!pif_is_socket(uflow->f.pif[TGTSIDE])) - flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE)); -} - -/** - * udp_flow_new() - Common setup for a new UDP flow - * @c: Execution context - * @flow: Initiated flow - * @s_ini: Initiating socket (or -1) - * @now: Timestamp - * - * Return: UDP specific flow, if successful, NULL on failure - */ -static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, - int s_ini, const struct timespec *now) -{ - const struct flowside *ini = &flow->f.side[INISIDE]; - struct udp_flow *uflow = NULL; - const struct flowside *tgt; - uint8_t tgtpif; - - if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) { - flow_trace(flow, "Invalid endpoint to initiate UDP flow"); - goto cancel; - } - - if (!(tgt = flow_target(c, flow, IPPROTO_UDP))) - goto cancel; - tgtpif = flow->f.pif[TGTSIDE]; - - uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp); - uflow->ts = now->tv_sec; - uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1; - - if (s_ini >= 0) { - /* When using auto port-scanning the listening port could go - * away, so we need to duplicate the socket - */ - uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0); - if (uflow->s[INISIDE] < 0) { - flow_err(uflow, - "Couldn't duplicate listening socket: %s", - strerror(errno)); - goto cancel; - } - } - - if (pif_is_socket(tgtpif)) { - struct mmsghdr discard[UIO_MAXIOV] = { 0 }; - union { - flow_sidx_t sidx; - uint32_t data; - } fref = { - .sidx = FLOW_SIDX(flow, TGTSIDE), - }; - int rc; - - uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY, - tgtpif, tgt, fref.data); - if (uflow->s[TGTSIDE] < 0) { - flow_dbg(uflow, - "Couldn't open socket for spliced flow: %s", - strerror(errno)); - goto cancel; - } - - if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) { - flow_dbg(uflow, - "Couldn't connect flow socket: %s", - strerror(errno)); - goto cancel; - } - - /* It's possible, if unlikely, that we could receive some - * unrelated packets in between the bind() and connect() of this - * socket. For now we just discard these. We could consider - * trying to redirect these to an appropriate handler, if we - * need to. - */ - rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard), - MSG_DONTWAIT, NULL); - if (rc >= ARRAY_SIZE(discard)) { - flow_dbg(uflow, - "Too many (%d) spurious reply datagrams", rc); - goto cancel; - } else if (rc > 0) { - flow_trace(uflow, - "Discarded %d spurious reply datagrams", rc); - } else if (errno != EAGAIN) { - flow_err(uflow, - "Unexpected error discarding datagrams: %s", - strerror(errno)); - } - } - - flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE)); - - /* If the target side is a socket, it will be a reply socket that knows - * its own flowside. But if it's tap, then we need to look it up by - * hash. - */ - if (!pif_is_socket(tgtpif)) - flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE)); - FLOW_ACTIVATE(uflow); - - return FLOW_SIDX(uflow, TGTSIDE); - -cancel: - if (uflow) - udp_flow_close(c, uflow); - flow_alloc_cancel(flow); - return FLOW_SIDX_NONE; -} - -/** - * udp_flow_from_sock() - Find or create UDP flow for "listening" socket - * @c: Execution context - * @ref: epoll reference of the receiving socket - * @s_in: Source socket address, filled in by recvmmsg() - * @now: Timestamp - * - * #syscalls fcntl - * - * Return: sidx for the destination side of the flow for this packet, or - * FLOW_SIDX_NONE if we couldn't find or create a flow. - */ -static flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref, - const union sockaddr_inany *s_in, - const struct timespec *now) -{ - struct udp_flow *uflow; - union flow *flow; - flow_sidx_t sidx; - - ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN); - - sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port); - if ((uflow = udp_at_sidx(sidx))) { - uflow->ts = now->tv_sec; - return flow_sidx_opposite(sidx); - } - - if (!(flow = flow_alloc())) { - char sastr[SOCKADDR_STRLEN]; - - debug("Couldn't allocate flow for UDP datagram from %s %s", - pif_name(ref.udp.pif), - sockaddr_ntop(s_in, sastr, sizeof(sastr))); - return FLOW_SIDX_NONE; - } - - flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port); - return udp_flow_new(c, flow, ref.fd, now); -} - /** * udp_splice_prepare() - Prepare one datagram for splicing * @mmh: Receiving mmsghdr array @@ -804,53 +609,6 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, } } -/** - * udp_flow_from_tap() - Find or create UDP flow for tap packets - * @c: Execution context - * @pif: pif on which the packet is arriving - * @af: Address family, AF_INET or AF_INET6 - * @saddr: Source address on guest side - * @daddr: Destination address guest side - * @srcport: Source port on guest side - * @dstport: Destination port on guest side - * - * Return: sidx for the destination side of the flow for this packet, or - * FLOW_SIDX_NONE if we couldn't find or create a flow. - */ -static flow_sidx_t udp_flow_from_tap(const struct ctx *c, - uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, - in_port_t srcport, in_port_t dstport, - const struct timespec *now) -{ - struct udp_flow *uflow; - union flow *flow; - flow_sidx_t sidx; - - ASSERT(pif == PIF_TAP); - - sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, - srcport, dstport); - if ((uflow = udp_at_sidx(sidx))) { - uflow->ts = now->tv_sec; - return flow_sidx_opposite(sidx); - } - - if (!(flow = flow_alloc())) { - char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; - - debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu", - pif_name(pif), - inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport, - inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport); - return FLOW_SIDX_NONE; - } - - flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); - - return udp_flow_new(c, flow, -1, now); -} - /** * udp_tap_handler() - Handle packets from tap * @c: Execution context @@ -1098,24 +856,6 @@ static int udp_port_rebind_outbound(void *arg) return 0; } -/** - * udp_flow_timer() - Handler for timed events related to a given flow - * @c: Execution context - * @uflow: UDP flow - * @now: Current timestamp - * - * Return: true if the flow is ready to free, false otherwise - */ -bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, - const struct timespec *now) -{ - if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT) - return false; - - udp_flow_close(c, uflow); - return true; -} - /** * udp_timer() - Scan activity bitmaps for ports with associated timed events * @c: Execution context diff --git a/udp_flow.c b/udp_flow.c new file mode 100644 index 0000000..8b25ad1 --- /dev/null +++ b/udp_flow.c @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson + * + * UDP flow tracking functions + */ + +#include +#include +#include + +#include "util.h" +#include "passt.h" +#include "flow_table.h" + +#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ + +/** + * udp_at_sidx() - Get UDP specific flow at given sidx + * @sidx: Flow and side to retrieve + * + * Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if + * the flow at @sidx is not FLOW_UDP. + */ +struct udp_flow *udp_at_sidx(flow_sidx_t sidx) +{ + union flow *flow = flow_at_sidx(sidx); + + if (!flow) + return NULL; + + ASSERT(flow->f.type == FLOW_UDP); + return &flow->udp; +} + +/* + * udp_flow_close() - Close and clean up UDP flow + * @c: Execution context + * @uflow: UDP flow + */ +static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) +{ + if (uflow->s[INISIDE] >= 0) { + /* The listening socket needs to stay in epoll */ + close(uflow->s[INISIDE]); + uflow->s[INISIDE] = -1; + } + + if (uflow->s[TGTSIDE] >= 0) { + /* But the flow specific one needs to be removed */ + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL); + close(uflow->s[TGTSIDE]); + uflow->s[TGTSIDE] = -1; + } + flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE)); + if (!pif_is_socket(uflow->f.pif[TGTSIDE])) + flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE)); +} + +/** + * udp_flow_new() - Common setup for a new UDP flow + * @c: Execution context + * @flow: Initiated flow + * @s_ini: Initiating socket (or -1) + * @now: Timestamp + * + * Return: UDP specific flow, if successful, NULL on failure + */ +static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, + int s_ini, const struct timespec *now) +{ + const struct flowside *ini = &flow->f.side[INISIDE]; + struct udp_flow *uflow = NULL; + const struct flowside *tgt; + uint8_t tgtpif; + + if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) { + flow_trace(flow, "Invalid endpoint to initiate UDP flow"); + goto cancel; + } + + if (!(tgt = flow_target(c, flow, IPPROTO_UDP))) + goto cancel; + tgtpif = flow->f.pif[TGTSIDE]; + + uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp); + uflow->ts = now->tv_sec; + uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1; + + if (s_ini >= 0) { + /* When using auto port-scanning the listening port could go + * away, so we need to duplicate the socket + */ + uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0); + if (uflow->s[INISIDE] < 0) { + flow_err(uflow, + "Couldn't duplicate listening socket: %s", + strerror(errno)); + goto cancel; + } + } + + if (pif_is_socket(tgtpif)) { + struct mmsghdr discard[UIO_MAXIOV] = { 0 }; + union { + flow_sidx_t sidx; + uint32_t data; + } fref = { + .sidx = FLOW_SIDX(flow, TGTSIDE), + }; + int rc; + + uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY, + tgtpif, tgt, fref.data); + if (uflow->s[TGTSIDE] < 0) { + flow_dbg(uflow, + "Couldn't open socket for spliced flow: %s", + strerror(errno)); + goto cancel; + } + + if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) { + flow_dbg(uflow, + "Couldn't connect flow socket: %s", + strerror(errno)); + goto cancel; + } + + /* It's possible, if unlikely, that we could receive some + * unrelated packets in between the bind() and connect() of this + * socket. For now we just discard these. We could consider + * trying to redirect these to an appropriate handler, if we + * need to. + */ + rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard), + MSG_DONTWAIT, NULL); + if (rc >= ARRAY_SIZE(discard)) { + flow_dbg(uflow, + "Too many (%d) spurious reply datagrams", rc); + goto cancel; + } else if (rc > 0) { + flow_trace(uflow, + "Discarded %d spurious reply datagrams", rc); + } else if (errno != EAGAIN) { + flow_err(uflow, + "Unexpected error discarding datagrams: %s", + strerror(errno)); + } + } + + flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE)); + + /* If the target side is a socket, it will be a reply socket that knows + * its own flowside. But if it's tap, then we need to look it up by + * hash. + */ + if (!pif_is_socket(tgtpif)) + flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE)); + FLOW_ACTIVATE(uflow); + + return FLOW_SIDX(uflow, TGTSIDE); + +cancel: + if (uflow) + udp_flow_close(c, uflow); + flow_alloc_cancel(flow); + return FLOW_SIDX_NONE; +} + +/** + * udp_flow_from_sock() - Find or create UDP flow for "listening" socket + * @c: Execution context + * @ref: epoll reference of the receiving socket + * @s_in: Source socket address, filled in by recvmmsg() + * @now: Timestamp + * + * #syscalls fcntl + * + * Return: sidx for the destination side of the flow for this packet, or + * FLOW_SIDX_NONE if we couldn't find or create a flow. + */ +flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref, + const union sockaddr_inany *s_in, + const struct timespec *now) +{ + struct udp_flow *uflow; + union flow *flow; + flow_sidx_t sidx; + + ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN); + + sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port); + if ((uflow = udp_at_sidx(sidx))) { + uflow->ts = now->tv_sec; + return flow_sidx_opposite(sidx); + } + + if (!(flow = flow_alloc())) { + char sastr[SOCKADDR_STRLEN]; + + debug("Couldn't allocate flow for UDP datagram from %s %s", + pif_name(ref.udp.pif), + sockaddr_ntop(s_in, sastr, sizeof(sastr))); + return FLOW_SIDX_NONE; + } + + flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port); + return udp_flow_new(c, flow, ref.fd, now); +} + +/** + * udp_flow_from_tap() - Find or create UDP flow for tap packets + * @c: Execution context + * @pif: pif on which the packet is arriving + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address on guest side + * @daddr: Destination address guest side + * @srcport: Source port on guest side + * @dstport: Destination port on guest side + * + * Return: sidx for the destination side of the flow for this packet, or + * FLOW_SIDX_NONE if we couldn't find or create a flow. + */ +flow_sidx_t udp_flow_from_tap(const struct ctx *c, + uint8_t pif, sa_family_t af, + const void *saddr, const void *daddr, + in_port_t srcport, in_port_t dstport, + const struct timespec *now) +{ + struct udp_flow *uflow; + union flow *flow; + flow_sidx_t sidx; + + ASSERT(pif == PIF_TAP); + + sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, + srcport, dstport); + if ((uflow = udp_at_sidx(sidx))) { + uflow->ts = now->tv_sec; + return flow_sidx_opposite(sidx); + } + + if (!(flow = flow_alloc())) { + char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; + + debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu", + pif_name(pif), + inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport, + inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport); + return FLOW_SIDX_NONE; + } + + flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + + return udp_flow_new(c, flow, -1, now); +} + +/** + * udp_flow_timer() - Handler for timed events related to a given flow + * @c: Execution context + * @uflow: UDP flow + * @now: Current timestamp + * + * Return: true if the flow is ready to free, false otherwise + */ +bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, + const struct timespec *now) +{ + if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT) + return false; + + udp_flow_close(c, uflow); + return true; +} diff --git a/udp_flow.h b/udp_flow.h index e0736f8..12ddf03 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -21,6 +21,15 @@ struct udp_flow { int s[SIDES]; }; +struct udp_flow *udp_at_sidx(flow_sidx_t sidx); +flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref, + const union sockaddr_inany *s_in, + const struct timespec *now); +flow_sidx_t udp_flow_from_tap(const struct ctx *c, + uint8_t pif, sa_family_t af, + const void *saddr, const void *daddr, + in_port_t srcport, in_port_t dstport, + const struct timespec *now); bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, const struct timespec *now);