mirror of
https://passt.top/passt
synced 2024-12-22 13:45:32 +00:00
Compare commits
No commits in common. "69303cafbef86ef070d67582169d455eb8da288c" and "0af928eaa020c1062fdc91598dfdc533966e2afe" have entirely different histories.
69303cafbe
...
0af928eaa0
12
icmp.c
12
icmp.c
@ -154,21 +154,17 @@ void icmpv6_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
* icmp_tap_handler() - Handle packets from tap
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
* @addr: Destination address
|
||||
* @p: Packet pool, single packet with ICMP/ICMPv6 header
|
||||
* @now: Current timestamp
|
||||
*
|
||||
* Return: count of consumed packets (always 1, even if malformed)
|
||||
*/
|
||||
int icmp_tap_handler(const struct ctx *c, int af,
|
||||
const void *saddr, const void *daddr,
|
||||
int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now)
|
||||
{
|
||||
size_t plen;
|
||||
|
||||
(void)saddr;
|
||||
|
||||
if (af == AF_INET) {
|
||||
struct sockaddr_in sa = {
|
||||
.sin_family = AF_INET,
|
||||
@ -214,7 +210,7 @@ int icmp_tap_handler(const struct ctx *c, int af,
|
||||
icmp_id_map[V4][id].ts = now->tv_sec;
|
||||
bitmap_set(icmp_act[V4], id);
|
||||
|
||||
sa.sin_addr = *(struct in_addr *)daddr;
|
||||
sa.sin_addr = *(struct in_addr *)addr;
|
||||
if (sendto(s, ih, sizeof(*ih) + plen, MSG_NOSIGNAL,
|
||||
(struct sockaddr *)&sa, sizeof(sa)) < 0) {
|
||||
debug("ICMP: failed to relay request to socket");
|
||||
@ -268,7 +264,7 @@ int icmp_tap_handler(const struct ctx *c, int af,
|
||||
icmp_id_map[V6][id].ts = now->tv_sec;
|
||||
bitmap_set(icmp_act[V6], id);
|
||||
|
||||
sa.sin6_addr = *(struct in6_addr *)daddr;
|
||||
sa.sin6_addr = *(struct in6_addr *)addr;
|
||||
if (sendto(s, ih, sizeof(*ih) + plen, MSG_NOSIGNAL,
|
||||
(struct sockaddr *)&sa, sizeof(sa)) < 1) {
|
||||
debug("ICMPv6: failed to relay request to socket");
|
||||
|
3
icmp.h
3
icmp.h
@ -12,8 +12,7 @@ struct ctx;
|
||||
|
||||
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
|
||||
void icmpv6_sock_handler(const struct ctx *c, union epoll_ref ref);
|
||||
int icmp_tap_handler(const struct ctx *c,
|
||||
int af, const void *saddr, const void *daddr,
|
||||
int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now);
|
||||
void icmp_timer(const struct ctx *c, const struct timespec *ts);
|
||||
void icmp_init(void);
|
||||
|
5
inany.h
5
inany.h
@ -6,9 +6,6 @@
|
||||
* IPv6 or IPv4 (encoded as IPv4-mapped IPv6 addresses)
|
||||
*/
|
||||
|
||||
#ifndef INANY_H
|
||||
#define INANY_H
|
||||
|
||||
/** union inany_addr - Represents either an IPv4 or IPv6 address
|
||||
* @a6: Address as an IPv6 address, may be IPv4-mapped
|
||||
* @v4mapped.zero: All zero-bits for an IPv4 address
|
||||
@ -93,5 +90,3 @@ static inline void inany_from_sockaddr(union inany_addr *aa, in_port_t *port,
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* INANY_H */
|
||||
|
10
passt.c
10
passt.c
@ -117,11 +117,13 @@ static void timer_init(struct ctx *c, const struct timespec *now)
|
||||
* proto_update_l2_buf() - Update scatter-gather L2 buffers in protocol handlers
|
||||
* @eth_d: Ethernet destination address, NULL if unchanged
|
||||
* @eth_s: Ethernet source address, NULL if unchanged
|
||||
* @ip_da: Pointer to IPv4 destination address, NULL if unchanged
|
||||
*/
|
||||
void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da)
|
||||
{
|
||||
tcp_update_l2_buf(eth_d, eth_s);
|
||||
udp_update_l2_buf(eth_d, eth_s);
|
||||
tcp_update_l2_buf(eth_d, eth_s, ip_da);
|
||||
udp_update_l2_buf(eth_d, eth_s, ip_da);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,7 +247,7 @@ int main(int argc, char **argv)
|
||||
if (!c.no_icmp)
|
||||
icmp_init();
|
||||
|
||||
proto_update_l2_buf(c.mac_guest, c.mac);
|
||||
proto_update_l2_buf(c.mac_guest, c.mac, &c.ip4.addr);
|
||||
|
||||
if (c.ifi4 && !c.no_dhcp)
|
||||
dhcp_init();
|
||||
|
4
passt.h
4
passt.h
@ -303,7 +303,7 @@ struct ctx {
|
||||
int low_rmem;
|
||||
};
|
||||
|
||||
void proto_update_l2_buf(const unsigned char *eth_d,
|
||||
const unsigned char *eth_s);
|
||||
void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da);
|
||||
|
||||
#endif /* PASST_H */
|
||||
|
2
pasta.c
2
pasta.c
@ -353,7 +353,7 @@ void pasta_ns_conf(struct ctx *c)
|
||||
}
|
||||
}
|
||||
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
proto_update_l2_buf(c->mac_guest, NULL, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
29
tap.c
29
tap.c
@ -625,8 +625,10 @@ resume:
|
||||
|
||||
l4_len = l3_len - hlen;
|
||||
|
||||
if (iph->saddr && c->ip4.addr_seen.s_addr != iph->saddr)
|
||||
if (iph->saddr && c->ip4.addr_seen.s_addr != iph->saddr) {
|
||||
c->ip4.addr_seen.s_addr = iph->saddr;
|
||||
proto_update_l2_buf(NULL, NULL, &c->ip4.addr_seen);
|
||||
}
|
||||
|
||||
l4h = packet_get(in, i, sizeof(*eh) + hlen, l4_len, NULL);
|
||||
if (!l4h)
|
||||
@ -641,8 +643,7 @@ resume:
|
||||
tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
|
||||
|
||||
packet_add(pkt, l4_len, l4h);
|
||||
icmp_tap_handler(c, AF_INET, &iph->saddr, &iph->daddr,
|
||||
pkt, now);
|
||||
icmp_tap_handler(c, AF_INET, &iph->daddr, pkt, now);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -707,6 +708,7 @@ append:
|
||||
|
||||
for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
|
||||
struct pool *p = (struct pool *)&seq->p;
|
||||
struct in_addr *da = &seq->daddr;
|
||||
size_t n = p->count;
|
||||
|
||||
tap_packet_debug(NULL, NULL, seq, 0, NULL, n);
|
||||
@ -714,13 +716,11 @@ append:
|
||||
if (seq->protocol == IPPROTO_TCP) {
|
||||
if (c->no_tcp)
|
||||
continue;
|
||||
while ((n -= tcp_tap_handler(c, AF_INET, &seq->saddr,
|
||||
&seq->daddr, p, now)));
|
||||
while ((n -= tcp_tap_handler(c, AF_INET, da, p, now)));
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
continue;
|
||||
while ((n -= udp_tap_handler(c, AF_INET, &seq->saddr,
|
||||
&seq->daddr, p, now)));
|
||||
while ((n -= udp_tap_handler(c, AF_INET, da, p, now)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -801,7 +801,7 @@ resume:
|
||||
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
|
||||
|
||||
packet_add(pkt, l4_len, l4h);
|
||||
icmp_tap_handler(c, AF_INET6, saddr, daddr, pkt, now);
|
||||
icmp_tap_handler(c, AF_INET6, daddr, pkt, now);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -818,6 +818,8 @@ resume:
|
||||
continue;
|
||||
}
|
||||
|
||||
*saddr = c->ip6.addr;
|
||||
|
||||
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
|
||||
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
|
||||
continue;
|
||||
@ -868,6 +870,7 @@ append:
|
||||
|
||||
for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
|
||||
struct pool *p = (struct pool *)&seq->p;
|
||||
struct in6_addr *da = &seq->daddr;
|
||||
size_t n = p->count;
|
||||
|
||||
tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq, n);
|
||||
@ -875,13 +878,11 @@ append:
|
||||
if (seq->protocol == IPPROTO_TCP) {
|
||||
if (c->no_tcp)
|
||||
continue;
|
||||
while ((n -= tcp_tap_handler(c, AF_INET6, &seq->saddr,
|
||||
&seq->daddr, p, now)));
|
||||
while ((n -= tcp_tap_handler(c, AF_INET6, da, p, now)));
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
continue;
|
||||
while ((n -= udp_tap_handler(c, AF_INET6, &seq->saddr,
|
||||
&seq->daddr, p, now)));
|
||||
while ((n -= udp_tap_handler(c, AF_INET6, da, p, now)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -967,7 +968,7 @@ redo:
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
proto_update_l2_buf(c->mac_guest, NULL, NULL);
|
||||
}
|
||||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
@ -1028,7 +1029,7 @@ restart:
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
proto_update_l2_buf(c->mac_guest, NULL, NULL);
|
||||
}
|
||||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
|
203
tcp.c
203
tcp.c
@ -309,6 +309,9 @@
|
||||
#define TCP_FRAMES \
|
||||
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
|
||||
|
||||
#define TCP_FILE_PRESSURE 30 /* % of c->nofile */
|
||||
#define TCP_CONN_PRESSURE 30 /* % of c->tcp.conn_count */
|
||||
|
||||
#define TCP_HASH_TABLE_LOAD 70 /* % */
|
||||
#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \
|
||||
TCP_HASH_TABLE_LOAD)
|
||||
@ -320,8 +323,10 @@
|
||||
#define MSS_DEFAULT 536
|
||||
|
||||
struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
|
||||
uint32_t psum;
|
||||
uint32_t tsum;
|
||||
#ifdef __AVX2__
|
||||
uint8_t pad[26];
|
||||
uint8_t pad[18];
|
||||
#else
|
||||
uint8_t pad[2];
|
||||
#endif
|
||||
@ -396,7 +401,7 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
|
||||
#define OPT_SACK 5
|
||||
#define OPT_TS 8
|
||||
|
||||
#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr))
|
||||
#define CONN_V4(conn) (!!inany_v4(&(conn)->addr))
|
||||
#define CONN_V6(conn) (!CONN_V4(conn))
|
||||
#define CONN_IS_CLOSING(conn) \
|
||||
((conn->events & ESTABLISHED) && \
|
||||
@ -429,15 +434,15 @@ static const char *tcp_flag_str[] __attribute((__unused__)) = {
|
||||
static int tcp_sock_init_ext [NUM_PORTS][IP_VERSIONS];
|
||||
static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS];
|
||||
|
||||
/* Table of guest side forwarding addresses with very low RTT (assumed
|
||||
* to be local to the host), LRU
|
||||
*/
|
||||
/* Table of destinations with very low RTT (assumed to be local), LRU */
|
||||
static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||
|
||||
/* Static buffers */
|
||||
|
||||
/**
|
||||
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
||||
* @tsum: Partial TCP header checksum (excluding length and saddr)
|
||||
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
||||
* @taph: Tap-level headers (partially pre-filled)
|
||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||
@ -445,15 +450,17 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||
* @data: Storage for TCP payload
|
||||
*/
|
||||
static struct tcp4_l2_buf_t {
|
||||
uint32_t psum; /* 0 */
|
||||
uint32_t tsum; /* 4 */
|
||||
#ifdef __AVX2__
|
||||
uint8_t pad[26]; /* 0, align th to 32 bytes */
|
||||
uint8_t pad[18]; /* 8, align th to 32 bytes */
|
||||
#else
|
||||
uint8_t pad[2]; /* align iph to 4 bytes 0 */
|
||||
uint8_t pad[2]; /* align iph to 4 bytes 8 */
|
||||
#endif
|
||||
struct tap_hdr taph; /* 26 2 */
|
||||
struct iphdr iph; /* 44 20 */
|
||||
struct tcphdr th; /* 64 40 */
|
||||
uint8_t data[MSS4]; /* 84 60 */
|
||||
struct tap_hdr taph; /* 26 10 */
|
||||
struct iphdr iph; /* 44 28 */
|
||||
struct tcphdr th; /* 64 48 */
|
||||
uint8_t data[MSS4]; /* 84 68 */
|
||||
/* 65536 65532 */
|
||||
#ifdef __AVX2__
|
||||
} __attribute__ ((packed, aligned(32)))
|
||||
@ -508,6 +515,8 @@ static struct iovec tcp_iov [UIO_MAXIOV];
|
||||
|
||||
/**
|
||||
* tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
|
||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
||||
* @tsum: Partial TCP header checksum (excluding length and saddr)
|
||||
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
||||
* @taph: Tap-level headers (partially pre-filled)
|
||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||
@ -515,14 +524,16 @@ static struct iovec tcp_iov [UIO_MAXIOV];
|
||||
* @opts: Headroom for TCP options
|
||||
*/
|
||||
static struct tcp4_l2_flags_buf_t {
|
||||
uint32_t psum; /* 0 */
|
||||
uint32_t tsum; /* 4 */
|
||||
#ifdef __AVX2__
|
||||
uint8_t pad[26]; /* 0, align th to 32 bytes */
|
||||
uint8_t pad[18]; /* 8, align th to 32 bytes */
|
||||
#else
|
||||
uint8_t pad[2]; /* align iph to 4 bytes 0 */
|
||||
uint8_t pad[2]; /* align iph to 4 bytes 8 */
|
||||
#endif
|
||||
struct tap_hdr taph; /* 26 2 */
|
||||
struct iphdr iph; /* 44 20 */
|
||||
struct tcphdr th; /* 64 40 */
|
||||
struct tap_hdr taph; /* 26 10 */
|
||||
struct iphdr iph; /* 44 28 */
|
||||
struct tcphdr th; /* 64 48 */
|
||||
char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
|
||||
#ifdef __AVX2__
|
||||
} __attribute__ ((packed, aligned(32)))
|
||||
@ -631,13 +642,13 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
*/
|
||||
static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||
union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
|
||||
.tcp.index = CONN_IDX(conn) };
|
||||
struct epoll_event ev = { .data.u64 = ref.u64 };
|
||||
|
||||
if (conn->events == CLOSED) {
|
||||
if (conn->in_epoll)
|
||||
if (conn->c.in_epoll)
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->sock, &ev);
|
||||
if (conn->timer != -1)
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->timer, &ev);
|
||||
@ -649,7 +660,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
if (epoll_ctl(c->epollfd, m, conn->sock, &ev))
|
||||
return -errno;
|
||||
|
||||
conn->in_epoll = true;
|
||||
conn->c.in_epoll = true;
|
||||
|
||||
if (conn->timer != -1) {
|
||||
union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
|
||||
@ -847,7 +858,7 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++)
|
||||
if (inany_equals(&conn->faddr, low_rtt_dst + i))
|
||||
if (inany_equals(&conn->addr, low_rtt_dst + i))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -869,7 +880,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
|
||||
return;
|
||||
|
||||
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) {
|
||||
if (inany_equals(&conn->faddr, low_rtt_dst + i))
|
||||
if (inany_equals(&conn->addr, low_rtt_dst + i))
|
||||
return;
|
||||
if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i))
|
||||
hole = i;
|
||||
@ -881,7 +892,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
|
||||
if (hole == -1)
|
||||
return;
|
||||
|
||||
low_rtt_dst[hole++] = conn->faddr;
|
||||
low_rtt_dst[hole++] = conn->addr;
|
||||
if (hole == LOW_RTT_TABLE_SIZE)
|
||||
hole = 0;
|
||||
inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
|
||||
@ -940,13 +951,11 @@ void tcp_sock_set_bufsize(const struct ctx *c, int s)
|
||||
*/
|
||||
static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
|
||||
{
|
||||
uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_TCP);
|
||||
uint32_t sum = buf->psum;
|
||||
|
||||
sum += buf->iph.tot_len;
|
||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||
sum += buf->iph.saddr & 0xffff;
|
||||
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||
sum += buf->iph.daddr & 0xffff;
|
||||
|
||||
buf->iph.check = (uint16_t)~csum_fold(sum);
|
||||
}
|
||||
@ -958,12 +967,10 @@ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
|
||||
static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf)
|
||||
{
|
||||
uint16_t tlen = ntohs(buf->iph.tot_len) - 20;
|
||||
uint32_t sum = htons(IPPROTO_TCP);
|
||||
uint32_t sum = buf->tsum;
|
||||
|
||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||
sum += buf->iph.saddr & 0xffff;
|
||||
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||
sum += buf->iph.daddr & 0xffff;
|
||||
sum += htons(ntohs(buf->iph.tot_len) - 20);
|
||||
|
||||
buf->th.check = 0;
|
||||
@ -994,8 +1001,10 @@ static void tcp_update_check_tcp6(struct tcp6_l2_buf_t *buf)
|
||||
* tcp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses
|
||||
* @eth_d: Ethernet destination address, NULL if unchanged
|
||||
* @eth_s: Ethernet source address, NULL if unchanged
|
||||
* @ip_da: Pointer to IPv4 destination address, NULL if unchanged
|
||||
*/
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -1009,6 +1018,24 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
tap_update_mac(&b6->taph, eth_d, eth_s);
|
||||
tap_update_mac(&b4f->taph, eth_d, eth_s);
|
||||
tap_update_mac(&b6f->taph, eth_d, eth_s);
|
||||
|
||||
if (ip_da) {
|
||||
b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr;
|
||||
if (!i) {
|
||||
b4f->iph.saddr = b4->iph.saddr = 0;
|
||||
b4f->iph.tot_len = b4->iph.tot_len = 0;
|
||||
b4f->iph.check = b4->iph.check = 0;
|
||||
b4f->psum = b4->psum = sum_16b(&b4->iph, 20);
|
||||
|
||||
b4->tsum = ((ip_da->s_addr >> 16) & 0xffff) +
|
||||
(ip_da->s_addr & 0xffff) +
|
||||
htons(IPPROTO_TCP);
|
||||
b4f->tsum = b4->tsum;
|
||||
} else {
|
||||
b4f->psum = b4->psum = tcp4_l2_buf[0].psum;
|
||||
b4f->tsum = b4->tsum = tcp4_l2_buf[0].tsum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1016,16 +1043,15 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
* tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
|
||||
* @c: Execution context
|
||||
*/
|
||||
static void tcp_sock4_iov_init(struct ctx *c)
|
||||
static void tcp_sock4_iov_init(const struct ctx *c)
|
||||
{
|
||||
struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
|
||||
struct iovec *iov;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
|
||||
tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
|
||||
.taph = TAP_HDR_INIT(ETH_P_IP),
|
||||
.iph = iph,
|
||||
.iph = L2_BUF_IP4_INIT(IPPROTO_TCP),
|
||||
.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
|
||||
};
|
||||
}
|
||||
@ -1136,18 +1162,18 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
|
||||
/**
|
||||
* tcp_hash_match() - Check if a connection entry matches address and ports
|
||||
* @conn: Connection entry to match against
|
||||
* @faddr: Guest side forwarding address
|
||||
* @eport: Guest side endpoint port
|
||||
* @fport: Guest side forwarding port
|
||||
* @addr: Remote address
|
||||
* @tap_port: tap-facing port
|
||||
* @sock_port: Socket-facing port
|
||||
*
|
||||
* Return: 1 on match, 0 otherwise
|
||||
*/
|
||||
static int tcp_hash_match(const struct tcp_tap_conn *conn,
|
||||
const union inany_addr *faddr,
|
||||
in_port_t eport, in_port_t fport)
|
||||
const union inany_addr *addr,
|
||||
in_port_t tap_port, in_port_t sock_port)
|
||||
{
|
||||
if (inany_equals(&conn->faddr, faddr) &&
|
||||
conn->eport == eport && conn->fport == fport)
|
||||
if (inany_equals(&conn->addr, addr) &&
|
||||
conn->tap_port == tap_port && conn->sock_port == sock_port)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -1156,21 +1182,21 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
|
||||
/**
|
||||
* tcp_hash() - Calculate hash value for connection given address and ports
|
||||
* @c: Execution context
|
||||
* @faddr: Guest side forwarding address
|
||||
* @eport: Guest side endpoint port
|
||||
* @fport: Guest side forwarding port
|
||||
* @addr: Remote address
|
||||
* @tap_port: tap-facing port
|
||||
* @sock_port: Socket-facing port
|
||||
*
|
||||
* Return: hash value, already modulo size of the hash table
|
||||
*/
|
||||
static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *faddr,
|
||||
in_port_t eport, in_port_t fport)
|
||||
static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *addr,
|
||||
in_port_t tap_port, in_port_t sock_port)
|
||||
{
|
||||
struct {
|
||||
union inany_addr faddr;
|
||||
in_port_t eport;
|
||||
in_port_t fport;
|
||||
union inany_addr addr;
|
||||
in_port_t tap_port;
|
||||
in_port_t sock_port;
|
||||
} __attribute__((__packed__)) in = {
|
||||
*faddr, eport, fport
|
||||
*addr, tap_port, sock_port
|
||||
};
|
||||
uint64_t b = 0;
|
||||
|
||||
@ -1189,7 +1215,7 @@ static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *faddr,
|
||||
static unsigned int tcp_conn_hash(const struct ctx *c,
|
||||
const struct tcp_tap_conn *conn)
|
||||
{
|
||||
return tcp_hash(c, &conn->faddr, conn->eport, conn->fport);
|
||||
return tcp_hash(c, &conn->addr, conn->tap_port, conn->sock_port);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1201,7 +1227,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
int b;
|
||||
|
||||
b = tcp_hash(c, &conn->faddr, conn->eport, conn->fport);
|
||||
b = tcp_hash(c, &conn->addr, conn->tap_port, conn->sock_port);
|
||||
conn->next_index = tc_hash[b] ? CONN_IDX(tc_hash[b]) : -1;
|
||||
tc_hash[b] = conn;
|
||||
|
||||
@ -1270,24 +1296,25 @@ static void tcp_tap_conn_update(struct ctx *c, struct tcp_tap_conn *old,
|
||||
* tcp_hash_lookup() - Look up connection given remote address and ports
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @faddr: Guest side forwarding address (guest remote address)
|
||||
* @eport: Guest side endpoint port (guest local port)
|
||||
* @fport: Guest side forwarding port (guest remote port)
|
||||
* @addr: Remote address, pointer to in_addr or in6_addr
|
||||
* @tap_port: tap-facing port
|
||||
* @sock_port: Socket-facing port
|
||||
*
|
||||
* Return: connection pointer, if found, -ENOENT otherwise
|
||||
*/
|
||||
static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
|
||||
int af, const void *faddr,
|
||||
in_port_t eport, in_port_t fport)
|
||||
int af, const void *addr,
|
||||
in_port_t tap_port,
|
||||
in_port_t sock_port)
|
||||
{
|
||||
union inany_addr aany;
|
||||
struct tcp_tap_conn *conn;
|
||||
int b;
|
||||
|
||||
inany_from_af(&aany, af, faddr);
|
||||
b = tcp_hash(c, &aany, eport, fport);
|
||||
inany_from_af(&aany, af, addr);
|
||||
b = tcp_hash(c, &aany, tap_port, sock_port);
|
||||
for (conn = tc_hash[b]; conn; conn = conn_at_idx(conn->next_index)) {
|
||||
if (tcp_hash_match(conn, &aany, eport, fport))
|
||||
if (tcp_hash_match(conn, &aany, tap_port, sock_port))
|
||||
return conn;
|
||||
}
|
||||
|
||||
@ -1382,11 +1409,17 @@ static void tcp_l2_data_buf_flush(struct ctx *c)
|
||||
*/
|
||||
void tcp_defer_handler(struct ctx *c)
|
||||
{
|
||||
int max_conns = c->tcp.conn_count / 100 * TCP_CONN_PRESSURE;
|
||||
int max_files = c->nofile / 100 * TCP_FILE_PRESSURE;
|
||||
union tcp_conn *conn;
|
||||
|
||||
tcp_l2_flags_buf_flush(c);
|
||||
tcp_l2_data_buf_flush(c);
|
||||
|
||||
if ((c->tcp.conn_count < MIN(max_files, max_conns)) &&
|
||||
(c->tcp.splice_conn_count < MIN(max_files / 6, max_conns)))
|
||||
return;
|
||||
|
||||
for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) {
|
||||
if (conn->c.spliced) {
|
||||
if (conn->splice.flags & CLOSING)
|
||||
@ -1414,13 +1447,13 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
|
||||
void *p, size_t plen,
|
||||
const uint16_t *check, uint32_t seq)
|
||||
{
|
||||
const struct in_addr *a4 = inany_v4(&conn->faddr);
|
||||
const struct in_addr *a4 = inany_v4(&conn->addr);
|
||||
size_t ip_len, tlen;
|
||||
|
||||
#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \
|
||||
do { \
|
||||
b->th.source = htons(conn->fport); \
|
||||
b->th.dest = htons(conn->eport); \
|
||||
b->th.source = htons(conn->sock_port); \
|
||||
b->th.dest = htons(conn->tap_port); \
|
||||
b->th.seq = htonl(seq); \
|
||||
b->th.ack_seq = htonl(conn->seq_ack_to_tap); \
|
||||
if (conn->events & ESTABLISHED) { \
|
||||
@ -1456,7 +1489,7 @@ do { \
|
||||
ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
|
||||
|
||||
b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr));
|
||||
b->ip6h.saddr = conn->faddr.a6;
|
||||
b->ip6h.saddr = conn->addr.a6;
|
||||
if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr))
|
||||
b->ip6h.daddr = c->ip6.addr_ll_seen;
|
||||
else
|
||||
@ -1809,7 +1842,7 @@ static void tcp_clamp_window(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
/**
|
||||
* tcp_seq_init() - Calculate initial sequence number according to RFC 6528
|
||||
* @c: Execution context
|
||||
* @conn: TCP connection, with faddr, fport and eport populated
|
||||
* @conn: TCP connection, with addr, sock_port and tap_port populated
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
@ -1822,9 +1855,9 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
union inany_addr dst;
|
||||
in_port_t dstport;
|
||||
} __attribute__((__packed__)) in = {
|
||||
.src = conn->faddr,
|
||||
.srcport = conn->fport,
|
||||
.dstport = conn->eport,
|
||||
.src = conn->addr,
|
||||
.srcport = conn->tap_port,
|
||||
.dstport = conn->sock_port,
|
||||
};
|
||||
uint32_t ns, seq = 0;
|
||||
|
||||
@ -1972,15 +2005,13 @@ static void tcp_bind_outbound(const struct ctx *c, int s, sa_family_t af)
|
||||
* tcp_conn_from_tap() - Handle connection request (SYN segment) from tap
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @saddr: Source address, pointer to in_addr or in6_addr
|
||||
* @daddr: Destination address, pointer to in_addr or in6_addr
|
||||
* @addr: Remote address, pointer to in_addr or in6_addr
|
||||
* @th: TCP header from tap: caller MUST ensure it's there
|
||||
* @opts: Pointer to start of options
|
||||
* @optlen: Bytes in options: caller MUST ensure available length
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
static void tcp_conn_from_tap(struct ctx *c,
|
||||
int af, const void *saddr, const void *daddr,
|
||||
static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
|
||||
const struct tcphdr *th, const char *opts,
|
||||
size_t optlen, const struct timespec *now)
|
||||
{
|
||||
@ -1988,20 +2019,18 @@ static void tcp_conn_from_tap(struct ctx *c,
|
||||
struct sockaddr_in addr4 = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_port = th->dest,
|
||||
.sin_addr = *(struct in_addr *)daddr,
|
||||
.sin_addr = *(struct in_addr *)addr,
|
||||
};
|
||||
struct sockaddr_in6 addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_port = th->dest,
|
||||
.sin6_addr = *(struct in6_addr *)daddr,
|
||||
.sin6_addr = *(struct in6_addr *)addr,
|
||||
};
|
||||
const struct sockaddr *sa;
|
||||
struct tcp_tap_conn *conn;
|
||||
socklen_t sl;
|
||||
int s, mss;
|
||||
|
||||
(void)saddr;
|
||||
|
||||
if (c->tcp.conn_count >= TCP_MAX_CONNS)
|
||||
return;
|
||||
|
||||
@ -2010,9 +2039,9 @@ static void tcp_conn_from_tap(struct ctx *c,
|
||||
return;
|
||||
|
||||
if (!c->no_map_gw) {
|
||||
if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw))
|
||||
if (af == AF_INET && IN4_ARE_ADDR_EQUAL(addr, &c->ip4.gw))
|
||||
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw))
|
||||
if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(addr, &c->ip6.gw))
|
||||
addr6.sin6_addr = in6addr_loopback;
|
||||
}
|
||||
|
||||
@ -2049,7 +2078,7 @@ static void tcp_conn_from_tap(struct ctx *c,
|
||||
if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap)))
|
||||
conn->wnd_from_tap = 1;
|
||||
|
||||
inany_from_af(&conn->faddr, af, daddr);
|
||||
inany_from_af(&conn->addr, af, addr);
|
||||
|
||||
if (af == AF_INET) {
|
||||
sa = (struct sockaddr *)&addr4;
|
||||
@ -2059,8 +2088,8 @@ static void tcp_conn_from_tap(struct ctx *c,
|
||||
sl = sizeof(addr6);
|
||||
}
|
||||
|
||||
conn->fport = ntohs(th->dest);
|
||||
conn->eport = ntohs(th->source);
|
||||
conn->sock_port = ntohs(th->dest);
|
||||
conn->tap_port = ntohs(th->source);
|
||||
|
||||
conn->seq_init_from_tap = ntohl(th->seq);
|
||||
conn->seq_from_tap = conn->seq_init_from_tap + 1;
|
||||
@ -2527,14 +2556,13 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
|
||||
* tcp_tap_handler() - Handle packets from tap and state transitions
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
* @addr: Destination address
|
||||
* @p: Pool of TCP packets, with TCP headers
|
||||
* @now: Current timestamp
|
||||
*
|
||||
* Return: count of consumed packets
|
||||
*/
|
||||
int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
int tcp_tap_handler(struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now)
|
||||
{
|
||||
struct tcp_tap_conn *conn;
|
||||
@ -2555,13 +2583,12 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
|
||||
opts = packet_get(p, 0, sizeof(*th), optlen, NULL);
|
||||
|
||||
conn = tcp_hash_lookup(c, af, daddr, htons(th->source), htons(th->dest));
|
||||
conn = tcp_hash_lookup(c, af, addr, htons(th->source), htons(th->dest));
|
||||
|
||||
/* New connection from tap */
|
||||
if (!conn) {
|
||||
if (opts && th->syn && !th->ack)
|
||||
tcp_conn_from_tap(c, af, saddr, daddr, th,
|
||||
opts, optlen, now);
|
||||
tcp_conn_from_tap(c, af, addr, th, opts, optlen, now);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2720,10 +2747,10 @@ static void tcp_tap_conn_from_sock(struct ctx *c,
|
||||
conn->ws_to_tap = conn->ws_from_tap = 0;
|
||||
conn_event(c, conn, SOCK_ACCEPTED);
|
||||
|
||||
inany_from_sockaddr(&conn->faddr, &conn->fport, sa);
|
||||
conn->eport = ref.port;
|
||||
inany_from_sockaddr(&conn->addr, &conn->sock_port, sa);
|
||||
conn->tap_port = ref.port;
|
||||
|
||||
tcp_snat_inbound(c, &conn->faddr);
|
||||
tcp_snat_inbound(c, &conn->addr);
|
||||
|
||||
tcp_seq_init(c, conn, now);
|
||||
tcp_hash_insert(c, conn);
|
||||
|
7
tcp.h
7
tcp.h
@ -17,7 +17,7 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref);
|
||||
void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
|
||||
const struct timespec *now);
|
||||
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
|
||||
int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
int tcp_tap_handler(struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now);
|
||||
int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
|
||||
const char *ifname, in_port_t port);
|
||||
@ -26,7 +26,8 @@ void tcp_timer(struct ctx *c, const struct timespec *ts);
|
||||
void tcp_defer_handler(struct ctx *c);
|
||||
|
||||
void tcp_sock_set_bufsize(const struct ctx *c, int s);
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da);
|
||||
|
||||
/**
|
||||
* union tcp_epoll_ref - epoll reference portion for TCP connections
|
||||
@ -56,6 +57,7 @@ union tcp_listen_epoll_ref {
|
||||
* struct tcp_ctx - Execution context for TCP routines
|
||||
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
|
||||
* @conn_count: Count of total connections in connection table
|
||||
* @splice_conn_count: Count of spliced connections in connection table
|
||||
* @port_to_tap: Ports bound host-side, packets to tap or spliced
|
||||
* @fwd_in: Port forwarding configuration for inbound packets
|
||||
* @fwd_out: Port forwarding configuration for outbound packets
|
||||
@ -66,6 +68,7 @@ union tcp_listen_epoll_ref {
|
||||
struct tcp_ctx {
|
||||
uint64_t hash_secret[2];
|
||||
int conn_count;
|
||||
int splice_conn_count;
|
||||
struct port_fwd fwd_in;
|
||||
struct port_fwd fwd_out;
|
||||
struct timespec timer_run;
|
||||
|
18
tcp_conn.h
18
tcp_conn.h
@ -12,9 +12,11 @@
|
||||
/**
|
||||
* struct tcp_conn_common - Common fields for spliced and non-spliced
|
||||
* @spliced: Is this a spliced connection?
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
*/
|
||||
struct tcp_conn_common {
|
||||
bool spliced :1;
|
||||
bool in_epoll :1;
|
||||
};
|
||||
|
||||
extern const char *tcp_common_flag_str[];
|
||||
@ -22,7 +24,6 @@ extern const char *tcp_common_flag_str[];
|
||||
/**
|
||||
* struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
|
||||
* @c: Fields common with tcp_splice_conn
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
* @next_index: Connection index of next item in hash chain, -1 for none
|
||||
* @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
|
||||
* @sock: Socket descriptor number
|
||||
@ -34,9 +35,9 @@ extern const char *tcp_common_flag_str[];
|
||||
* @ws_to_tap: Window scaling factor advertised to tap/guest
|
||||
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
|
||||
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
|
||||
* @faddr: Guest side forwarding address (guest's remote address)
|
||||
* @eport: Guest side endpoint port (guest's local port)
|
||||
* @fport: Guest side forwarding port (guest's remote port)
|
||||
* @addr: Remote address (IPv4 or IPv6)
|
||||
* @tap_port: Guest-facing tap port
|
||||
* @sock_port: Remote, socket-facing port
|
||||
* @wnd_from_tap: Last window size from tap, unscaled (as received)
|
||||
* @wnd_to_tap: Sending window advertised to tap, unscaled (as sent)
|
||||
* @seq_to_tap: Next sequence for packets to tap
|
||||
@ -49,7 +50,6 @@ struct tcp_tap_conn {
|
||||
/* Must be first element to match tcp_splice_conn */
|
||||
struct tcp_conn_common c;
|
||||
|
||||
bool in_epoll :1;
|
||||
int next_index :TCP_CONN_INDEX_BITS + 2;
|
||||
|
||||
#define TCP_RETRANS_BITS 3
|
||||
@ -105,9 +105,9 @@ struct tcp_tap_conn {
|
||||
uint8_t seq_dup_ack_approx;
|
||||
|
||||
|
||||
union inany_addr faddr;
|
||||
in_port_t eport;
|
||||
in_port_t fport;
|
||||
union inany_addr addr;
|
||||
in_port_t tap_port;
|
||||
in_port_t sock_port;
|
||||
|
||||
uint16_t wnd_from_tap;
|
||||
uint16_t wnd_to_tap;
|
||||
@ -122,7 +122,6 @@ struct tcp_tap_conn {
|
||||
/**
|
||||
* struct tcp_splice_conn - Descriptor for a spliced TCP connection
|
||||
* @c: Fields common with tcp_tap_conn
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
* @a: File descriptor number of socket for accepted connection
|
||||
* @pipe_a_b: Pipe ends for splice() from @a to @b
|
||||
* @b: File descriptor number of peer connected socket
|
||||
@ -138,7 +137,6 @@ struct tcp_splice_conn {
|
||||
/* Must be first element to match tcp_tap_conn */
|
||||
struct tcp_conn_common c;
|
||||
|
||||
bool in_epoll :1;
|
||||
int a;
|
||||
int pipe_a_b[2];
|
||||
int b;
|
||||
|
@ -172,7 +172,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||
static int tcp_splice_epoll_ctl(const struct ctx *c,
|
||||
struct tcp_splice_conn *conn)
|
||||
{
|
||||
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||
union epoll_ref ref_a = { .type = EPOLL_TYPE_TCP, .fd = conn->a,
|
||||
.tcp.index = CONN_IDX(conn) };
|
||||
union epoll_ref ref_b = { .type = EPOLL_TYPE_TCP, .fd = conn->b,
|
||||
@ -192,7 +192,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
|
||||
epoll_ctl(c->epollfd, m, conn->b, &ev_b))
|
||||
goto delete;
|
||||
|
||||
conn->in_epoll = true;
|
||||
conn->c.in_epoll = true;
|
||||
|
||||
return 0;
|
||||
|
||||
@ -295,6 +295,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
|
||||
conn->flags = 0;
|
||||
debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
|
||||
|
||||
c->tcp.splice_conn_count--;
|
||||
tcp_table_compact(c, conn_union);
|
||||
}
|
||||
|
||||
@ -512,6 +513,7 @@ bool tcp_splice_conn_from_sock(struct ctx *c, union tcp_listen_epoll_ref ref,
|
||||
trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s);
|
||||
|
||||
conn->c.spliced = true;
|
||||
c->tcp.splice_conn_count++;
|
||||
conn->a = s;
|
||||
|
||||
if (tcp_splice_new(c, conn, ref.port, ref.ns))
|
||||
|
37
udp.c
37
udp.c
@ -168,6 +168,7 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
|
||||
/**
|
||||
* udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
||||
* @s_in: Source socket address, filled in by recvmmsg()
|
||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
||||
* @taph: Tap-level headers (partially pre-filled)
|
||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||
* @uh: Headroom for UDP header
|
||||
@ -175,6 +176,7 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
|
||||
*/
|
||||
static struct udp4_l2_buf_t {
|
||||
struct sockaddr_in s_in;
|
||||
uint32_t psum;
|
||||
|
||||
struct tap_hdr taph;
|
||||
struct iphdr iph;
|
||||
@ -261,13 +263,11 @@ static void udp_invert_portmap(struct udp_port_fwd *fwd)
|
||||
*/
|
||||
static void udp_update_check4(struct udp4_l2_buf_t *buf)
|
||||
{
|
||||
uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_UDP);
|
||||
uint32_t sum = buf->psum;
|
||||
|
||||
sum += buf->iph.tot_len;
|
||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||
sum += buf->iph.saddr & 0xffff;
|
||||
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||
sum += buf->iph.daddr & 0xffff;
|
||||
|
||||
buf->iph.check = (uint16_t)~csum_fold(sum);
|
||||
}
|
||||
@ -276,8 +276,10 @@ static void udp_update_check4(struct udp4_l2_buf_t *buf)
|
||||
* udp_update_l2_buf() - Update L2 buffers with Ethernet and IPv4 addresses
|
||||
* @eth_d: Ethernet destination address, NULL if unchanged
|
||||
* @eth_s: Ethernet source address, NULL if unchanged
|
||||
* @ip_da: Pointer to IPv4 destination address, NULL if unchanged
|
||||
*/
|
||||
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -287,6 +289,18 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
|
||||
tap_update_mac(&b4->taph, eth_d, eth_s);
|
||||
tap_update_mac(&b6->taph, eth_d, eth_s);
|
||||
|
||||
if (ip_da) {
|
||||
b4->iph.daddr = ip_da->s_addr;
|
||||
if (!i) {
|
||||
b4->iph.saddr = 0;
|
||||
b4->iph.tot_len = 0;
|
||||
b4->iph.check = 0;
|
||||
b4->psum = sum_16b(&b4->iph, 20);
|
||||
} else {
|
||||
b4->psum = udp4_l2_buf[0].psum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -572,7 +586,6 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport,
|
||||
ip_len = udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh);
|
||||
|
||||
b->iph.tot_len = htons(ip_len);
|
||||
b->iph.daddr = c->ip4.addr_seen.s_addr;
|
||||
|
||||
src_port = ntohs(b->s_in.sin_port);
|
||||
|
||||
@ -786,8 +799,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
* udp_tap_handler() - Handle packets from tap
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
* @addr: Destination address
|
||||
* @p: Pool of UDP packets, with UDP headers
|
||||
* @now: Current timestamp
|
||||
*
|
||||
@ -795,7 +807,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
*
|
||||
* #syscalls sendmmsg
|
||||
*/
|
||||
int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
int udp_tap_handler(struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now)
|
||||
{
|
||||
struct mmsghdr mm[UIO_MAXIOV];
|
||||
@ -809,7 +821,6 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
socklen_t sl;
|
||||
|
||||
(void)c;
|
||||
(void)saddr;
|
||||
|
||||
uh = packet_get(p, 0, 0, sizeof(*uh), NULL);
|
||||
if (!uh)
|
||||
@ -825,7 +836,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
s_in = (struct sockaddr_in) {
|
||||
.sin_family = AF_INET,
|
||||
.sin_port = uh->dest,
|
||||
.sin_addr = *(struct in_addr *)daddr,
|
||||
.sin_addr = *(struct in_addr *)addr,
|
||||
};
|
||||
|
||||
sa = (struct sockaddr *)&s_in;
|
||||
@ -870,17 +881,17 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
s_in6 = (struct sockaddr_in6) {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_port = uh->dest,
|
||||
.sin6_addr = *(struct in6_addr *)daddr,
|
||||
.sin6_addr = *(struct in6_addr *)addr,
|
||||
};
|
||||
const struct in6_addr *bind_addr = &in6addr_any;
|
||||
|
||||
sa = (struct sockaddr *)&s_in6;
|
||||
sl = sizeof(s_in6);
|
||||
|
||||
if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.dns_match) &&
|
||||
if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.dns_match) &&
|
||||
ntohs(s_in6.sin6_port) == 53) {
|
||||
s_in6.sin6_addr = c->ip6.dns_host;
|
||||
} else if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw) &&
|
||||
} else if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.gw) &&
|
||||
!c->no_map_gw) {
|
||||
if (!(udp_tap_map[V6][dst].flags & PORT_LOCAL) ||
|
||||
(udp_tap_map[V6][dst].flags & PORT_LOOPBACK))
|
||||
|
5
udp.h
5
udp.h
@ -10,13 +10,14 @@
|
||||
|
||||
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
const struct timespec *now);
|
||||
int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
|
||||
int udp_tap_handler(struct ctx *c, int af, const void *addr,
|
||||
const struct pool *p, const struct timespec *now);
|
||||
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||
const void *addr, const char *ifname, in_port_t port);
|
||||
int udp_init(struct ctx *c);
|
||||
void udp_timer(struct ctx *c, const struct timespec *ts);
|
||||
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
|
||||
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||
const struct in_addr *ip_da);
|
||||
|
||||
/**
|
||||
* union udp_epoll_ref - epoll reference portion for TCP connections
|
||||
|
4
util.h
4
util.h
@ -141,13 +141,11 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
|
||||
.tot_len = 0, \
|
||||
.id = 0, \
|
||||
.frag_off = 0, \
|
||||
.ttl = 0xff, \
|
||||
.ttl = 255, \
|
||||
.protocol = (proto), \
|
||||
.saddr = 0, \
|
||||
.daddr = 0, \
|
||||
}
|
||||
#define L2_BUF_IP4_PSUM(proto) ((uint32_t)htons_constant(0x4500) + \
|
||||
(uint32_t)htons_constant(0xff00 | (proto)))
|
||||
|
||||
#define L2_BUF_IP6_INIT(proto) \
|
||||
{ \
|
||||
|
Loading…
Reference in New Issue
Block a user