1
0
mirror of https://passt.top/passt synced 2024-09-08 00:34:50 +00:00

flow, tcp: Flow based NAT and port forwarding for TCP

Currently the code to translate host side addresses and ports to guest side
addresses and ports, and vice versa, is scattered across the TCP code.
This includes both port redirection as controlled by the -t and -T options,
and our special case NAT controlled by the --no-map-gw option.

Gather this logic into fwd_nat_from_*() functions for each input
interface in fwd.c which take protocol and address information for the
initiating side and generates the pif and address information for the
forwarded side.  This performs any NAT or port forwarding needed.

We create a flow_target() helper which applies those forwarding functions
as needed to automatically move a flow from INI to TGT state.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-07-18 15:26:43 +10:00 committed by Stefano Brivio
parent 4cd753e65c
commit 060f24e310
7 changed files with 245 additions and 139 deletions

53
flow.c
View File

@ -400,6 +400,59 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
return tgt;
}
/**
* flow_target() - Determine where flow should forward to, and move to TGT
* @c: Execution context
* @flow: Flow to forward
* @proto: Protocol
*
* Return: pointer to the target flowside information
*/
const struct flowside *flow_target(const struct ctx *c, union flow *flow,
uint8_t proto)
{
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
struct flow_common *f = &flow->f;
const struct flowside *ini = &f->side[INISIDE];
struct flowside *tgt = &f->side[TGTSIDE];
uint8_t tgtpif = PIF_NONE;
ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI);
ASSERT(f->type == FLOW_TYPE_NONE);
ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
ASSERT(flow->f.state == FLOW_STATE_INI);
switch (f->pif[INISIDE]) {
case PIF_TAP:
tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
break;
case PIF_SPLICE:
tgtpif = fwd_nat_from_splice(c, proto, ini, tgt);
break;
case PIF_HOST:
tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
break;
default:
flow_err(flow, "No rules to forward %s [%s]:%hu -> [%s]:%hu",
pif_name(f->pif[INISIDE]),
inany_ntop(&ini->eaddr, estr, sizeof(estr)),
ini->eport,
inany_ntop(&ini->faddr, fstr, sizeof(fstr)),
ini->fport);
}
if (tgtpif == PIF_NONE)
return NULL;
f->pif[TGTSIDE] = tgtpif;
flow_set_state(f, FLOW_STATE_TGT);
return tgt;
}
/**
* flow_set_type() - Set type and move to TYPED
* @flow: Flow to change state

View File

@ -138,6 +138,8 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
const void *daddr, in_port_t dport);
const struct flowside *flow_target(const struct ctx *c, union flow *flow,
uint8_t proto);
union flow *flow_set_type(union flow *flow, enum flow_type type);
#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)

148
fwd.c
View File

@ -25,6 +25,7 @@
#include "fwd.h"
#include "passt.h"
#include "lineread.h"
#include "flow_table.h"
/* See enum in kernel's include/net/tcp_states.h */
#define UDP_LISTEN 0x07
@ -154,3 +155,150 @@ void fwd_scan_ports_init(struct ctx *c)
&c->tcp.fwd_out, &c->tcp.fwd_in);
}
}
/**
* fwd_nat_from_tap() - Determine to forward a flow from the tap interface
* @c: Execution context
* @proto: Protocol (IP L4 protocol number)
* @ini: Flow address information of the initiating side
* @tgt: Flow address information on the target side (updated)
*
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
* flow cannot or should not be forwarded at all.
*/
uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
(void)proto;
tgt->eaddr = ini->faddr;
tgt->eport = ini->fport;
if (!c->no_map_gw) {
if (inany_equals4(&tgt->eaddr, &c->ip4.gw))
tgt->eaddr = inany_loopback4;
else if (inany_equals6(&tgt->eaddr, &c->ip6.gw))
tgt->eaddr = inany_loopback6;
}
/* The relevant addr_out controls the host side source address. This
* may be unspecified, which allows the kernel to pick an address.
*/
if (inany_v4(&tgt->eaddr))
tgt->faddr = inany_from_v4(c->ip4.addr_out);
else
tgt->faddr.a6 = c->ip6.addr_out;
/* Let the kernel pick a host side source port */
tgt->fport = 0;
return PIF_HOST;
}
/**
* fwd_nat_from_splice() - Determine to forward a flow from the splice interface
* @c: Execution context
* @proto: Protocol (IP L4 protocol number)
* @ini: Flow address information of the initiating side
* @tgt: Flow address information on the target side (updated)
*
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
* flow cannot or should not be forwarded at all.
*/
uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
if (!inany_is_loopback(&ini->eaddr) ||
(!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) {
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
pif_name(PIF_SPLICE),
inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport,
inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport);
return PIF_NONE;
}
if (inany_v4(&ini->eaddr))
tgt->eaddr = inany_loopback4;
else
tgt->eaddr = inany_loopback6;
/* Preserve the specific loopback adddress used, but let the kernel pick
* a source port on the target side
*/
tgt->faddr = ini->eaddr;
tgt->fport = 0;
tgt->eport = ini->fport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_out.delta[tgt->eport];
/* Let the kernel pick a host side source port */
tgt->fport = 0;
return PIF_HOST;
}
/**
* fwd_nat_from_host() - Determine to forward a flow from the host interface
* @c: Execution context
* @proto: Protocol (IP L4 protocol number)
* @ini: Flow address information of the initiating side
* @tgt: Flow address information on the target side (updated)
*
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
* flow cannot or should not be forwarded at all.
*/
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt)
{
/* Common for spliced and non-spliced cases */
tgt->eport = ini->fport;
if (proto == IPPROTO_TCP)
tgt->eport += c->tcp.fwd_in.delta[tgt->eport];
if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
proto == IPPROTO_TCP) {
/* spliceable */
/* Preserve the specific loopback adddress used, but let the
* kernel pick a source port on the target side
*/
tgt->faddr = ini->eaddr;
tgt->fport = 0;
if (inany_v4(&ini->eaddr))
tgt->eaddr = inany_loopback4;
else
tgt->eaddr = inany_loopback6;
return PIF_SPLICE;
}
tgt->faddr = ini->eaddr;
tgt->fport = ini->eport;
if (inany_is_loopback4(&tgt->faddr) ||
inany_is_unspecified4(&tgt->faddr) ||
inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) {
tgt->faddr = inany_from_v4(c->ip4.gw);
} else if (inany_is_loopback6(&tgt->faddr) ||
inany_equals6(&tgt->faddr, &c->ip6.addr_seen) ||
inany_equals6(&tgt->faddr, &c->ip6.addr)) {
if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
tgt->faddr.a6 = c->ip6.gw;
else
tgt->faddr.a6 = c->ip6.addr_ll;
}
if (inany_v4(&tgt->faddr)) {
tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
} else {
if (inany_is_linklocal6(&tgt->faddr))
tgt->eaddr.a6 = c->ip6.addr_ll_seen;
else
tgt->eaddr.a6 = c->ip6.addr_seen;
}
return PIF_TAP;
}

9
fwd.h
View File

@ -7,6 +7,8 @@
#ifndef FWD_H
#define FWD_H
struct flowside;
/* Number of ports for both TCP and UDP */
#define NUM_PORTS (1U << 16)
@ -42,4 +44,11 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
const struct fwd_ports *tcp_rev);
void fwd_scan_ports_init(struct ctx *c);
uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
#endif /* FWD_H */

103
tcp.c
View File

@ -1470,7 +1470,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
{
in_port_t srcport = ntohs(th->source);
in_port_t dstport = ntohs(th->dest);
union inany_addr srcaddr, dstaddr; /* FIXME: Avoid bulky temporaries */
const struct flowside *ini, *tgt;
struct tcp_tap_conn *conn;
union sockaddr_inany sa;
@ -1485,34 +1484,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
ini = flow_initiate_af(flow, PIF_TAP,
af, saddr, srcport, daddr, dstport);
dstaddr = ini->faddr;
if (!c->no_map_gw) {
if (inany_equals4(&dstaddr, &c->ip4.gw))
dstaddr = inany_loopback4;
else if (inany_equals6(&dstaddr, &c->ip6.gw))
dstaddr = inany_loopback6;
if (!(tgt = flow_target(c, flow, IPPROTO_TCP)))
goto cancel;
if (flow->f.pif[TGTSIDE] != PIF_HOST) {
flow_err(flow, "No support for forwarding TCP from %s to %s",
pif_name(flow->f.pif[INISIDE]),
pif_name(flow->f.pif[TGTSIDE]));
goto cancel;
}
if (inany_is_linklocal6(&dstaddr)) {
srcaddr.a6 = c->ip6.addr_ll;
} else if (inany_is_loopback(&dstaddr)) {
srcaddr = dstaddr;
} else if (inany_v4(&dstaddr)) {
if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out))
srcaddr = inany_from_v4(c->ip4.addr_out);
else
srcaddr = inany_any4;
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out))
srcaddr.a6 = c->ip6.addr_out;
else
srcaddr = inany_any6;
}
tgt = flow_target_af(flow, PIF_HOST, AF_INET6,
&srcaddr, 0, /* Kernel decides source port */
&dstaddr, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0 ||
@ -2060,63 +2041,20 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
conn_flag(c, conn, ACK_FROM_TAP_DUE);
}
/**
* tcp_snat_inbound() - Translate source address for inbound data if needed
* @c: Execution context
* @addr: Source address of inbound packet/connection
*/
static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
{
if (inany_is_loopback4(addr) ||
inany_is_unspecified4(addr) ||
inany_equals4(addr, &c->ip4.addr_seen)) {
*addr = inany_from_v4(c->ip4.gw);
} else if (inany_is_loopback6(addr) ||
inany_equals6(addr, &c->ip6.addr_seen) ||
inany_equals6(addr, &c->ip6.addr)) {
if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
addr->a6 = c->ip6.gw;
else
addr->a6 = c->ip6.addr_ll;
}
}
/**
* tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
* @c: Execution context
* @dstport: Destination port for connection (host side)
* @flow: flow to initialise
* @s: Accepted socket
* @sa: Peer socket address (from accept())
* @now: Current timestamp
*/
static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
union flow *flow, int s,
const union sockaddr_inany *sa,
static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
const struct timespec *now)
{
union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */
struct tcp_tap_conn *conn;
in_port_t srcport;
struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
uint64_t hash;
inany_from_sockaddr(&saddr, &srcport, sa);
tcp_snat_inbound(c, &saddr);
if (inany_v4(&saddr)) {
daddr = inany_from_v4(c->ip4.addr_seen);
} else {
if (inany_is_linklocal6(&saddr))
daddr.a6 = c->ip6.addr_ll_seen;
else
daddr.a6 = c->ip6.addr_seen;
}
dstport += c->tcp.fwd_in.delta[dstport];
flow_target_af(flow, PIF_TAP, AF_INET6,
&saddr, srcport, &daddr, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
conn->sock = s;
conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0;
@ -2174,11 +2112,26 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
goto cancel;
}
if (tcp_splice_conn_from_sock(c, ref.tcp_listen.pif,
ref.tcp_listen.port, flow, s, &sa))
return;
if (!flow_target(c, flow, IPPROTO_TCP))
goto cancel;
switch (flow->f.pif[TGTSIDE]) {
case PIF_SPLICE:
case PIF_HOST:
tcp_splice_conn_from_sock(c, flow, s);
break;
case PIF_TAP:
tcp_tap_conn_from_sock(c, flow, s, now);
break;
default:
flow_err(flow, "No support for forwarding TCP from %s to %s",
pif_name(flow->f.pif[INISIDE]),
pif_name(flow->f.pif[TGTSIDE]));
goto cancel;
}
tcp_tap_conn_from_sock(c, ref.tcp_listen.port, flow, s, &sa, now);
return;
cancel:

View File

@ -414,72 +414,18 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
/**
* tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
* @c: Execution context
* @pif0: pif id of side 0
* @dstport: Side 0 destination port of connection
* @flow: flow to initialise
* @s0: Accepted (side 0) socket
* @sa: Peer address of connection
*
* Return: true if able to create a spliced connection, false otherwise
* #syscalls:pasta setsockopt
*/
bool tcp_splice_conn_from_sock(const struct ctx *c,
uint8_t pif0, in_port_t dstport,
union flow *flow, int s0,
const union sockaddr_inany *sa)
void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
{
struct tcp_splice_conn *conn;
union inany_addr src;
in_port_t srcport;
sa_family_t af;
uint8_t tgtpif;
struct tcp_splice_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE,
tcp_splice);
if (c->mode != MODE_PASTA)
return false;
inany_from_sockaddr(&src, &srcport, sa);
af = inany_v4(&src) ? AF_INET : AF_INET6;
switch (pif0) {
case PIF_SPLICE:
if (!inany_is_loopback(&src)) {
char str[INANY_ADDRSTRLEN];
/* We can't use flow_err() etc. because we haven't set
* the flow type yet
*/
warn("Bad source address %s for splice, closing",
inany_ntop(&src, str, sizeof(str)));
/* We *don't* want to fall back to tap */
flow_alloc_cancel(flow);
return true;
}
tgtpif = PIF_HOST;
dstport += c->tcp.fwd_out.delta[dstport];
break;
case PIF_HOST:
if (!inany_is_loopback(&src))
return false;
tgtpif = PIF_SPLICE;
dstport += c->tcp.fwd_in.delta[dstport];
break;
default:
return false;
}
/* FIXME: Record outbound source address when known */
if (af == AF_INET)
flow_target_af(flow, tgtpif, AF_INET,
NULL, 0, &in4addr_loopback, dstport);
else
flow_target_af(flow, tgtpif, AF_INET6,
NULL, 0, &in6addr_loopback, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
ASSERT(c->mode == MODE_PASTA);
conn->s[0] = s0;
conn->s[1] = -1;
@ -493,8 +439,6 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
conn_flag(c, conn, CLOSING);
FLOW_ACTIVATE(conn);
return true;
}
/**

View File

@ -11,10 +11,7 @@ union sockaddr_inany;
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
uint32_t events);
bool tcp_splice_conn_from_sock(const struct ctx *c,
uint8_t pif0, in_port_t dstport,
union flow *flow, int s0,
const union sockaddr_inany *sa);
void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0);
void tcp_splice_init(struct ctx *c);
#endif /* TCP_SPLICE_H */