diff --git a/flow.c b/flow.c index c4f1236..c1af136 100644 --- a/flow.c +++ b/flow.c @@ -400,6 +400,59 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif, return tgt; } + +/** + * flow_target() - Determine where flow should forward to, and move to TGT + * @c: Execution context + * @flow: Flow to forward + * @proto: Protocol + * + * Return: pointer to the target flowside information + */ +const struct flowside *flow_target(const struct ctx *c, union flow *flow, + uint8_t proto) +{ + char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + struct flow_common *f = &flow->f; + const struct flowside *ini = &f->side[INISIDE]; + struct flowside *tgt = &f->side[TGTSIDE]; + uint8_t tgtpif = PIF_NONE; + + ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI); + ASSERT(f->type == FLOW_TYPE_NONE); + ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE); + ASSERT(flow->f.state == FLOW_STATE_INI); + + switch (f->pif[INISIDE]) { + case PIF_TAP: + tgtpif = fwd_nat_from_tap(c, proto, ini, tgt); + break; + + case PIF_SPLICE: + tgtpif = fwd_nat_from_splice(c, proto, ini, tgt); + break; + + case PIF_HOST: + tgtpif = fwd_nat_from_host(c, proto, ini, tgt); + break; + + default: + flow_err(flow, "No rules to forward %s [%s]:%hu -> [%s]:%hu", + pif_name(f->pif[INISIDE]), + inany_ntop(&ini->eaddr, estr, sizeof(estr)), + ini->eport, + inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + ini->fport); + } + + if (tgtpif == PIF_NONE) + return NULL; + + f->pif[TGTSIDE] = tgtpif; + flow_set_state(f, FLOW_STATE_TGT); + return tgt; +} + /** * flow_set_type() - Set type and move to TYPED * @flow: Flow to change state diff --git a/flow_table.h b/flow_table.h index aabdbb7..9d912c8 100644 --- a/flow_table.h +++ b/flow_table.h @@ -138,6 +138,8 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif, sa_family_t af, const void *saddr, in_port_t sport, const void *daddr, in_port_t dport); +const struct flowside *flow_target(const struct ctx *c, union flow *flow, + uint8_t proto); union flow *flow_set_type(union flow *flow, enum flow_type type); #define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) diff --git a/fwd.c b/fwd.c index d3f1798..3288b0d 100644 --- a/fwd.c +++ b/fwd.c @@ -25,6 +25,7 @@ #include "fwd.h" #include "passt.h" #include "lineread.h" +#include "flow_table.h" /* See enum in kernel's include/net/tcp_states.h */ #define UDP_LISTEN 0x07 @@ -154,3 +155,150 @@ void fwd_scan_ports_init(struct ctx *c) &c->tcp.fwd_out, &c->tcp.fwd_in); } } + +/** + * fwd_nat_from_tap() - Determine to forward a flow from the tap interface + * @c: Execution context + * @proto: Protocol (IP L4 protocol number) + * @ini: Flow address information of the initiating side + * @tgt: Flow address information on the target side (updated) + * + * Return: pif of the target interface to forward the flow to, PIF_NONE if the + * flow cannot or should not be forwarded at all. + */ +uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt) +{ + (void)proto; + + tgt->eaddr = ini->faddr; + tgt->eport = ini->fport; + + if (!c->no_map_gw) { + if (inany_equals4(&tgt->eaddr, &c->ip4.gw)) + tgt->eaddr = inany_loopback4; + else if (inany_equals6(&tgt->eaddr, &c->ip6.gw)) + tgt->eaddr = inany_loopback6; + } + + /* The relevant addr_out controls the host side source address. This + * may be unspecified, which allows the kernel to pick an address. + */ + if (inany_v4(&tgt->eaddr)) + tgt->faddr = inany_from_v4(c->ip4.addr_out); + else + tgt->faddr.a6 = c->ip6.addr_out; + + /* Let the kernel pick a host side source port */ + tgt->fport = 0; + + return PIF_HOST; +} + +/** + * fwd_nat_from_splice() - Determine to forward a flow from the splice interface + * @c: Execution context + * @proto: Protocol (IP L4 protocol number) + * @ini: Flow address information of the initiating side + * @tgt: Flow address information on the target side (updated) + * + * Return: pif of the target interface to forward the flow to, PIF_NONE if the + * flow cannot or should not be forwarded at all. + */ +uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt) +{ + if (!inany_is_loopback(&ini->eaddr) || + (!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) { + char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + + debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu", + pif_name(PIF_SPLICE), + inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport, + inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport); + return PIF_NONE; + } + + if (inany_v4(&ini->eaddr)) + tgt->eaddr = inany_loopback4; + else + tgt->eaddr = inany_loopback6; + + /* Preserve the specific loopback adddress used, but let the kernel pick + * a source port on the target side + */ + tgt->faddr = ini->eaddr; + tgt->fport = 0; + + tgt->eport = ini->fport; + if (proto == IPPROTO_TCP) + tgt->eport += c->tcp.fwd_out.delta[tgt->eport]; + + /* Let the kernel pick a host side source port */ + tgt->fport = 0; + + return PIF_HOST; +} + +/** + * fwd_nat_from_host() - Determine to forward a flow from the host interface + * @c: Execution context + * @proto: Protocol (IP L4 protocol number) + * @ini: Flow address information of the initiating side + * @tgt: Flow address information on the target side (updated) + * + * Return: pif of the target interface to forward the flow to, PIF_NONE if the + * flow cannot or should not be forwarded at all. + */ +uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt) +{ + /* Common for spliced and non-spliced cases */ + tgt->eport = ini->fport; + if (proto == IPPROTO_TCP) + tgt->eport += c->tcp.fwd_in.delta[tgt->eport]; + + if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) && + proto == IPPROTO_TCP) { + /* spliceable */ + + /* Preserve the specific loopback adddress used, but let the + * kernel pick a source port on the target side + */ + tgt->faddr = ini->eaddr; + tgt->fport = 0; + + if (inany_v4(&ini->eaddr)) + tgt->eaddr = inany_loopback4; + else + tgt->eaddr = inany_loopback6; + return PIF_SPLICE; + } + + tgt->faddr = ini->eaddr; + tgt->fport = ini->eport; + + if (inany_is_loopback4(&tgt->faddr) || + inany_is_unspecified4(&tgt->faddr) || + inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) { + tgt->faddr = inany_from_v4(c->ip4.gw); + } else if (inany_is_loopback6(&tgt->faddr) || + inany_equals6(&tgt->faddr, &c->ip6.addr_seen) || + inany_equals6(&tgt->faddr, &c->ip6.addr)) { + if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) + tgt->faddr.a6 = c->ip6.gw; + else + tgt->faddr.a6 = c->ip6.addr_ll; + } + + if (inany_v4(&tgt->faddr)) { + tgt->eaddr = inany_from_v4(c->ip4.addr_seen); + } else { + if (inany_is_linklocal6(&tgt->faddr)) + tgt->eaddr.a6 = c->ip6.addr_ll_seen; + else + tgt->eaddr.a6 = c->ip6.addr_seen; + } + + return PIF_TAP; +} diff --git a/fwd.h b/fwd.h index 41645d7..b4aa8d5 100644 --- a/fwd.h +++ b/fwd.h @@ -7,6 +7,8 @@ #ifndef FWD_H #define FWD_H +struct flowside; + /* Number of ports for both TCP and UDP */ #define NUM_PORTS (1U << 16) @@ -42,4 +44,11 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev, const struct fwd_ports *tcp_rev); void fwd_scan_ports_init(struct ctx *c); +uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt); +uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt); +uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto, + const struct flowside *ini, struct flowside *tgt); + #endif /* FWD_H */ diff --git a/tcp.c b/tcp.c index b6eca5d..0c66ac8 100644 --- a/tcp.c +++ b/tcp.c @@ -1470,7 +1470,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); - union inany_addr srcaddr, dstaddr; /* FIXME: Avoid bulky temporaries */ const struct flowside *ini, *tgt; struct tcp_tap_conn *conn; union sockaddr_inany sa; @@ -1485,34 +1484,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); - dstaddr = ini->faddr; - if (!c->no_map_gw) { - if (inany_equals4(&dstaddr, &c->ip4.gw)) - dstaddr = inany_loopback4; - else if (inany_equals6(&dstaddr, &c->ip6.gw)) - dstaddr = inany_loopback6; + if (!(tgt = flow_target(c, flow, IPPROTO_TCP))) + goto cancel; + if (flow->f.pif[TGTSIDE] != PIF_HOST) { + flow_err(flow, "No support for forwarding TCP from %s to %s", + pif_name(flow->f.pif[INISIDE]), + pif_name(flow->f.pif[TGTSIDE])); + goto cancel; } - if (inany_is_linklocal6(&dstaddr)) { - srcaddr.a6 = c->ip6.addr_ll; - } else if (inany_is_loopback(&dstaddr)) { - srcaddr = dstaddr; - } else if (inany_v4(&dstaddr)) { - if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out)) - srcaddr = inany_from_v4(c->ip4.addr_out); - else - srcaddr = inany_any4; - } else { - if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out)) - srcaddr.a6 = c->ip6.addr_out; - else - srcaddr = inany_any6; - } - - tgt = flow_target_af(flow, PIF_HOST, AF_INET6, - &srcaddr, 0, /* Kernel decides source port */ - &dstaddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0 || @@ -2060,63 +2041,20 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn) conn_flag(c, conn, ACK_FROM_TAP_DUE); } -/** - * tcp_snat_inbound() - Translate source address for inbound data if needed - * @c: Execution context - * @addr: Source address of inbound packet/connection - */ -static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr) -{ - if (inany_is_loopback4(addr) || - inany_is_unspecified4(addr) || - inany_equals4(addr, &c->ip4.addr_seen)) { - *addr = inany_from_v4(c->ip4.gw); - } else if (inany_is_loopback6(addr) || - inany_equals6(addr, &c->ip6.addr_seen) || - inany_equals6(addr, &c->ip6.addr)) { - if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) - addr->a6 = c->ip6.gw; - else - addr->a6 = c->ip6.addr_ll; - } -} - /** * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection * @c: Execution context - * @dstport: Destination port for connection (host side) * @flow: flow to initialise * @s: Accepted socket * @sa: Peer socket address (from accept()) * @now: Current timestamp */ -static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, - union flow *flow, int s, - const union sockaddr_inany *sa, +static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s, const struct timespec *now) { - union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */ - struct tcp_tap_conn *conn; - in_port_t srcport; + struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); uint64_t hash; - inany_from_sockaddr(&saddr, &srcport, sa); - tcp_snat_inbound(c, &saddr); - - if (inany_v4(&saddr)) { - daddr = inany_from_v4(c->ip4.addr_seen); - } else { - if (inany_is_linklocal6(&saddr)) - daddr.a6 = c->ip6.addr_ll_seen; - else - daddr.a6 = c->ip6.addr_seen; - } - dstport += c->tcp.fwd_in.delta[dstport]; - - flow_target_af(flow, PIF_TAP, AF_INET6, - &saddr, srcport, &daddr, dstport); - conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); - conn->sock = s; conn->timer = -1; conn->ws_to_tap = conn->ws_from_tap = 0; @@ -2174,11 +2112,26 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, goto cancel; } - if (tcp_splice_conn_from_sock(c, ref.tcp_listen.pif, - ref.tcp_listen.port, flow, s, &sa)) - return; + if (!flow_target(c, flow, IPPROTO_TCP)) + goto cancel; + + switch (flow->f.pif[TGTSIDE]) { + case PIF_SPLICE: + case PIF_HOST: + tcp_splice_conn_from_sock(c, flow, s); + break; + + case PIF_TAP: + tcp_tap_conn_from_sock(c, flow, s, now); + break; + + default: + flow_err(flow, "No support for forwarding TCP from %s to %s", + pif_name(flow->f.pif[INISIDE]), + pif_name(flow->f.pif[TGTSIDE])); + goto cancel; + } - tcp_tap_conn_from_sock(c, ref.tcp_listen.port, flow, s, &sa, now); return; cancel: diff --git a/tcp_splice.c b/tcp_splice.c index c81daee..473562b 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -414,72 +414,18 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af) /** * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection * @c: Execution context - * @pif0: pif id of side 0 - * @dstport: Side 0 destination port of connection * @flow: flow to initialise * @s0: Accepted (side 0) socket * @sa: Peer address of connection * - * Return: true if able to create a spliced connection, false otherwise * #syscalls:pasta setsockopt */ -bool tcp_splice_conn_from_sock(const struct ctx *c, - uint8_t pif0, in_port_t dstport, - union flow *flow, int s0, - const union sockaddr_inany *sa) +void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0) { - struct tcp_splice_conn *conn; - union inany_addr src; - in_port_t srcport; - sa_family_t af; - uint8_t tgtpif; + struct tcp_splice_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, + tcp_splice); - if (c->mode != MODE_PASTA) - return false; - - inany_from_sockaddr(&src, &srcport, sa); - af = inany_v4(&src) ? AF_INET : AF_INET6; - - switch (pif0) { - case PIF_SPLICE: - if (!inany_is_loopback(&src)) { - char str[INANY_ADDRSTRLEN]; - - /* We can't use flow_err() etc. because we haven't set - * the flow type yet - */ - warn("Bad source address %s for splice, closing", - inany_ntop(&src, str, sizeof(str))); - - /* We *don't* want to fall back to tap */ - flow_alloc_cancel(flow); - return true; - } - - tgtpif = PIF_HOST; - dstport += c->tcp.fwd_out.delta[dstport]; - break; - - case PIF_HOST: - if (!inany_is_loopback(&src)) - return false; - - tgtpif = PIF_SPLICE; - dstport += c->tcp.fwd_in.delta[dstport]; - break; - - default: - return false; - } - - /* FIXME: Record outbound source address when known */ - if (af == AF_INET) - flow_target_af(flow, tgtpif, AF_INET, - NULL, 0, &in4addr_loopback, dstport); - else - flow_target_af(flow, tgtpif, AF_INET6, - NULL, 0, &in6addr_loopback, dstport); - conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); + ASSERT(c->mode == MODE_PASTA); conn->s[0] = s0; conn->s[1] = -1; @@ -493,8 +439,6 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn); - - return true; } /** diff --git a/tcp_splice.h b/tcp_splice.h index ed8f0c5..a20f3e2 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -11,10 +11,7 @@ union sockaddr_inany; void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events); -bool tcp_splice_conn_from_sock(const struct ctx *c, - uint8_t pif0, in_port_t dstport, - union flow *flow, int s0, - const union sockaddr_inany *sa); +void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0); void tcp_splice_init(struct ctx *c); #endif /* TCP_SPLICE_H */