From 4e2d36e83fb1ff86c2e7f6d0832827d59f829934 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 18 Jul 2024 15:26:28 +1000 Subject: [PATCH] flow: Common address information for target side Require the address and port information for the target (non initiating) side to be populated when a flow enters TGT state. Implement that for TCP and ICMP. For now this leaves some information redundantly recorded in both generic and type specific fields. We'll fix that in later patches. For TCP we now use the information from the flow to construct the destination socket address in both tcp_conn_from_tap() and tcp_splice_connect(). Signed-off-by: David Gibson Signed-off-by: Stefano Brivio --- flow.c | 38 ++++++++++++++++++------ flow_table.h | 5 +++- icmp.c | 3 +- inany.h | 1 - pif.c | 40 +++++++++++++++++++++++++ pif.h | 17 +++++++++++ tcp.c | 82 ++++++++++++++++++++++++++++------------------------ tcp_splice.c | 45 +++++++++++----------------- 8 files changed, 153 insertions(+), 78 deletions(-) diff --git a/flow.c b/flow.c index 223d059..71c44d2 100644 --- a/flow.c +++ b/flow.c @@ -165,8 +165,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) */ static void flow_set_state(struct flow_common *f, enum flow_state state) { - char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN]; + char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN]; const struct flowside *ini = &f->side[INISIDE]; + const struct flowside *tgt = &f->side[TGTSIDE]; uint8_t oldstate = f->state; ASSERT(state < FLOW_NUM_STATES); @@ -177,19 +179,24 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) FLOW_STATE(f)); if (MAX(state, oldstate) >= FLOW_STATE_TGT) - flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => %s", + flow_log_(f, LOG_DEBUG, + "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport, - pif_name(f->pif[TGTSIDE])); + pif_name(f->pif[TGTSIDE]), + inany_ntop(&tgt->faddr, fstr1, sizeof(fstr1)), + tgt->fport, + inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)), + tgt->eport); else if (MAX(state, oldstate) >= FLOW_STATE_INI) flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport); } @@ -261,21 +268,34 @@ const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, } /** - * flow_target() - Move flow to TGT, setting TGTSIDE details + * flow_target_af() - Move flow to TGT, setting TGTSIDE details * @flow: Flow to change state * @pif: pif of the target side + * @af: Address family for @eaddr and @faddr + * @saddr: Source address (pointer to in_addr or in6_addr) + * @sport: Endpoint port + * @daddr: Destination address (pointer to in_addr or in6_addr) + * @dport: Destination port + * + * Return: pointer to the target flowside information */ -void flow_target(union flow *flow, uint8_t pif) +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport) { struct flow_common *f = &flow->f; + struct flowside *tgt = &f->side[TGTSIDE]; ASSERT(pif != PIF_NONE); ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI); ASSERT(f->type == FLOW_TYPE_NONE); ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE); + flowside_from_af(tgt, af, daddr, dport, saddr, sport); f->pif[TGTSIDE] = pif; flow_set_state(f, FLOW_STATE_TGT); + return tgt; } /** diff --git a/flow_table.h b/flow_table.h index 586d857..aabdbb7 100644 --- a/flow_table.h +++ b/flow_table.h @@ -134,7 +134,10 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, const union sockaddr_inany *ssa, in_port_t dport); -void flow_target(union flow *flow, uint8_t pif); +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport); union flow *flow_set_type(union flow *flow, enum flow_type type); #define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) diff --git a/icmp.c b/icmp.c index 9b95be2..ebfa627 100644 --- a/icmp.c +++ b/icmp.c @@ -183,7 +183,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, return NULL; flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); - flow_target(flow, PIF_HOST); + /* FIXME: Record outbound source address when known */ + flow_target_af(flow, PIF_HOST, af, NULL, 0, daddr, 0); pingf = FLOW_SET_TYPE(flow, flowtype, ping); pingf->seq = -1; diff --git a/inany.h b/inany.h index 47b66fa..8eaf533 100644 --- a/inany.h +++ b/inany.h @@ -187,7 +187,6 @@ static inline bool inany_is_unspecified(const union inany_addr *a) * * Return: true if @a is in fe80::/10 (IPv6 link local unicast) */ -/* cppcheck-suppress unusedFunction */ static inline bool inany_is_linklocal6(const union inany_addr *a) { return IN6_IS_ADDR_LINKLOCAL(&a->a6); diff --git a/pif.c b/pif.c index ebf01cc..a099e31 100644 --- a/pif.c +++ b/pif.c @@ -7,9 +7,14 @@ #include #include +#include #include "util.h" #include "pif.h" +#include "siphash.h" +#include "ip.h" +#include "inany.h" +#include "passt.h" const char *pif_type_str[] = { [PIF_NONE] = "", @@ -19,3 +24,38 @@ const char *pif_type_str[] = { }; static_assert(ARRAY_SIZE(pif_type_str) == PIF_NUM_TYPES, "pif_type_str[] doesn't match enum pif_type"); + + +/** pif_sockaddr() - Construct a socket address suitable for an interface + * @c: Execution context + * @sa: Pointer to sockaddr to fill in + * @sl: Updated to relevant length of initialised @sa + * @pif: Interface to create the socket address + * @addr: IPv[46] address + * @port: Port (host byte order) + */ +void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl, + uint8_t pif, const union inany_addr *addr, in_port_t port) +{ + const struct in_addr *v4 = inany_v4(addr); + + ASSERT(pif_is_socket(pif)); + + if (v4) { + sa->sa_family = AF_INET; + sa->sa4.sin_addr = *v4; + sa->sa4.sin_port = htons(port); + memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero)); + *sl = sizeof(sa->sa4); + } else { + sa->sa_family = AF_INET6; + sa->sa6.sin6_addr = addr->a6; + sa->sa6.sin6_port = htons(port); + if (pif == PIF_HOST && IN6_IS_ADDR_LINKLOCAL(&addr->a6)) + sa->sa6.sin6_scope_id = c->ifi6; + else + sa->sa6.sin6_scope_id = 0; + sa->sa6.sin6_flowinfo = 0; + *sl = sizeof(sa->sa6); + } +} diff --git a/pif.h b/pif.h index ca85b34..8777bb5 100644 --- a/pif.h +++ b/pif.h @@ -7,6 +7,9 @@ #ifndef PIF_H #define PIF_H +union inany_addr; +union sockaddr_inany; + /** * enum pif_type - Type of passt/pasta interface ("pif") * @@ -43,4 +46,18 @@ static inline const char *pif_name(uint8_t pif) return pif_type(pif); } +/** + * pif_is_socket() - Is interface implemented via L4 sockets? + * @pif: pif to check + * + * Return: true of @pif is an L4 socket based interface, otherwise false + */ +static inline bool pif_is_socket(uint8_t pif) +{ + return pif == PIF_HOST || pif == PIF_SPLICE; +} + +void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl, + uint8_t pif, const union inany_addr *addr, in_port_t port); + #endif /* PIF_H */ diff --git a/tcp.c b/tcp.c index 286a417..914a074 100644 --- a/tcp.c +++ b/tcp.c @@ -1647,18 +1647,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(dstport), - .sin_addr = *(struct in_addr *)daddr, - }; - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(dstport), - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct sockaddr *sa; + const struct flowside *ini, *tgt; struct tcp_tap_conn *conn; + union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */ + union sockaddr_inany sa; union flow *flow; int s = -1, mss; socklen_t sl; @@ -1666,9 +1658,22 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + ini = flow_initiate_af(flow, PIF_TAP, + af, saddr, srcport, daddr, dstport); - flow_target(flow, PIF_HOST); + dstaddr = ini->faddr; + if (!c->no_map_gw) { + if (inany_equals4(&dstaddr, &c->ip4.gw)) + dstaddr = inany_loopback4; + else if (inany_equals6(&dstaddr, &c->ip6.gw)) + dstaddr = inany_loopback6; + + } + + /* FIXME: Record outbound source address when known */ + tgt = flow_target_af(flow, PIF_HOST, AF_INET6, + NULL, 0, /* Kernel decides source address */ + &dstaddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); if (af == AF_INET) { @@ -1687,9 +1692,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } - - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); } else if (af == AF_INET6) { if (IN6_IS_ADDR_UNSPECIFIED(saddr) || IN6_IS_ADDR_MULTICAST(saddr) || srcport == 0 || @@ -1704,9 +1706,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } - - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); } else { ASSERT(0); } @@ -1714,12 +1713,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if ((s = tcp_conn_sock(c, af)) < 0) goto cancel; - if (!c->no_map_gw) { - if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw)) - addr6.sin6_addr = in6addr_loopback; - } + pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, tgt->eport); /* Use bind() to check if the target address is local (EADDRINUSE or * similar) and already bound, and set the LOCAL flag in that case. @@ -1731,7 +1725,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, * * So, if bind() succeeds, close the socket, get a new one, and proceed. */ - if (bind(s, sa, sl)) { + if (bind(s, &sa.sa, sl)) { if (errno != EADDRNOTAVAIL && errno != EACCES) conn_flag(c, conn, LOCAL); } else { @@ -1741,7 +1735,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, goto cancel; } - if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { + if (inany_is_linklocal6(&tgt->eaddr)) { struct sockaddr_in6 addr6_ll = { .sin6_family = AF_INET6, .sin6_addr = c->ip6.addr_ll, @@ -1749,6 +1743,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, }; if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll))) goto cancel; + } else if (!inany_is_loopback(&tgt->eaddr)) { + tcp_bind_outbound(c, s, af); } conn->sock = s; @@ -1784,12 +1780,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, tcp_hash_insert(c, conn); - if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) || - (af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr))) - tcp_bind_outbound(c, s, af); - - if (connect(s, sa, sl)) { + if (connect(s, &sa.sa, sl)) { if (errno != EINPROGRESS) { tcp_rst(c, conn); goto cancel; @@ -2297,9 +2288,25 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, const union sockaddr_inany *sa, const struct timespec *now) { + union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */ struct tcp_tap_conn *conn; + in_port_t srcport; - flow_target(flow, PIF_TAP); + inany_from_sockaddr(&saddr, &srcport, sa); + tcp_snat_inbound(c, &saddr); + + if (inany_v4(&saddr)) { + daddr = inany_from_v4(c->ip4.addr_seen); + } else { + if (inany_is_linklocal6(&saddr)) + daddr.a6 = c->ip6.addr_ll_seen; + else + daddr.a6 = c->ip6.addr_seen; + } + dstport += c->tcp.fwd_in.delta[dstport]; + + flow_target_af(flow, PIF_TAP, AF_INET6, + &saddr, srcport, &daddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; @@ -2307,10 +2314,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = dstport + c->tcp.fwd_in.delta[dstport]; - - tcp_snat_inbound(c, &conn->faddr); + conn->faddr = saddr; + conn->fport = srcport; + conn->eport = dstport; tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_splice.c b/tcp_splice.c index ffddc85..608e7f2 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -340,31 +340,20 @@ static int tcp_splice_connect_finish(const struct ctx *c, * tcp_splice_connect() - Create and connect socket for new spliced connection * @c: Execution context * @conn: Connection pointer - * @af: Address family - * @pif: pif on which to create socket - * @port: Destination port, host order * * Return: 0 for connect() succeeded or in progress, negative value on error */ -static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, - sa_family_t af, uint8_t pif, in_port_t port) +static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn) { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - .sin_addr = IN4ADDR_LOOPBACK_INIT, - }; - const struct sockaddr *sa; + const struct flowside *tgt = &conn->f.side[TGTSIDE]; + sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6; + uint8_t tgtpif = conn->f.pif[TGTSIDE]; + union sockaddr_inany sa; socklen_t sl; - if (pif == PIF_HOST) + if (tgtpif == PIF_HOST) conn->s[1] = tcp_conn_sock(c, af); - else if (pif == PIF_SPLICE) + else if (tgtpif == PIF_SPLICE) conn->s[1] = tcp_conn_sock_ns(c, af); else ASSERT(0); @@ -378,15 +367,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, conn->s[1]); } - if (CONN_V6(conn)) { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } else { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } + pif_sockaddr(c, &sa, &sl, tgtpif, &tgt->eaddr, tgt->eport); - if (connect(conn->s[1], sa, sl)) { + if (connect(conn->s[1], &sa.sa, sl)) { if (errno != EINPROGRESS) { flow_trace(conn, "Couldn't connect socket for splice: %s", strerror(errno)); @@ -491,7 +474,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, return false; } - flow_target(flow, tgtpif); + /* FIXME: Record outbound source address when known */ + if (af == AF_INET) + flow_target_af(flow, tgtpif, AF_INET, + NULL, 0, &in4addr_loopback, dstport); + else + flow_target_af(flow, tgtpif, AF_INET6, + NULL, 0, &in6addr_loopback, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); conn->flags = af == AF_INET ? 0 : SPLICE_V6; @@ -503,7 +492,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0); - if (tcp_splice_connect(c, conn, af, tgtpif, dstport)) + if (tcp_splice_connect(c, conn)) conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn);