mirror of
https://passt.top/passt
synced 2024-12-21 21:25:23 +00:00
vhost-user: add vhost-user
add virtio and vhost-user functions to connect with QEMU. $ ./passt --vhost-user and # qemu-system-x86_64 ... -m 4G \ -object memory-backend-memfd,id=memfd0,share=on,size=4G \ -numa node,memdev=memfd0 \ -chardev socket,id=chr0,path=/tmp/passt_1.socket \ -netdev vhost-user,id=netdev0,chardev=chr0 \ -device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \ ... Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> [sbrivio: as suggested by lvivier, include <netinet/if_ether.h> before including <linux/if_ether.h> as C libraries such as musl __UAPI_DEF_ETHHDR in <netinet/if_ether.h> if they already have a definition of struct ethhdr] Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
b2e62f7e85
commit
28997fcb29
6
Makefile
6
Makefile
@ -37,7 +37,8 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
|
||||
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
|
||||
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
|
||||
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
|
||||
tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c vhost_user.c virtio.c
|
||||
tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
|
||||
vhost_user.c virtio.c vu_common.c
|
||||
QRAP_SRCS = qrap.c
|
||||
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
|
||||
|
||||
@ -47,7 +48,8 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
|
||||
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
|
||||
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
|
||||
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
|
||||
udp.h udp_flow.h util.h vhost_user.h virtio.h
|
||||
tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h vhost_user.h \
|
||||
virtio.h vu_common.h
|
||||
HEADERS = $(PASST_HEADERS) seccomp.h
|
||||
|
||||
C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
|
||||
|
19
conf.c
19
conf.c
@ -45,6 +45,7 @@
|
||||
#include "lineread.h"
|
||||
#include "isolation.h"
|
||||
#include "log.h"
|
||||
#include "vhost_user.h"
|
||||
|
||||
#define NETNS_RUN_DIR "/run/netns"
|
||||
|
||||
@ -807,9 +808,14 @@ static void usage(const char *name, FILE *f, int status)
|
||||
" default: same interface name as external one\n");
|
||||
} else {
|
||||
FPRINTF(f,
|
||||
" -s, --socket PATH UNIX domain socket path\n"
|
||||
" -s, --socket, --socket-path PATH UNIX domain socket path\n"
|
||||
" default: probe free path starting from "
|
||||
UNIX_SOCK_PATH "\n", 1);
|
||||
FPRINTF(f,
|
||||
" --vhost-user Enable vhost-user mode\n"
|
||||
" UNIX domain socket is provided by -s option\n"
|
||||
" --print-capabilities print back-end capabilities in JSON format,\n"
|
||||
" only meaningful for vhost-user mode\n");
|
||||
}
|
||||
|
||||
FPRINTF(f,
|
||||
@ -1345,6 +1351,10 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
{"map-guest-addr", required_argument, NULL, 22 },
|
||||
{"host-lo-to-ns-lo", no_argument, NULL, 23 },
|
||||
{"dns-host", required_argument, NULL, 24 },
|
||||
{"vhost-user", no_argument, NULL, 25 },
|
||||
/* vhost-user backend program convention */
|
||||
{"print-capabilities", no_argument, NULL, 26 },
|
||||
{"socket-path", required_argument, NULL, 's' },
|
||||
{ 0 },
|
||||
};
|
||||
const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
|
||||
@ -1538,6 +1548,13 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
break;
|
||||
|
||||
die("Invalid host nameserver address: %s", optarg);
|
||||
case 25:
|
||||
if (c->mode == MODE_PASTA)
|
||||
die("--vhost-user is for passt mode only");
|
||||
c->mode = MODE_VU;
|
||||
break;
|
||||
case 26:
|
||||
vu_print_capabilities();
|
||||
break;
|
||||
case 'd':
|
||||
c->debug = 1;
|
||||
|
@ -36,6 +36,10 @@ enum epoll_type {
|
||||
EPOLL_TYPE_TAP_PASST,
|
||||
/* socket listening for qemu socket connections */
|
||||
EPOLL_TYPE_TAP_LISTEN,
|
||||
/* vhost-user command socket */
|
||||
EPOLL_TYPE_VHOST_CMD,
|
||||
/* vhost-user kick event socket */
|
||||
EPOLL_TYPE_VHOST_KICK,
|
||||
|
||||
EPOLL_NUM_TYPES,
|
||||
};
|
||||
|
1
iov.c
1
iov.c
@ -68,7 +68,6 @@ size_t iov_skip_bytes(const struct iovec *iov, size_t n,
|
||||
*
|
||||
* Returns: The number of bytes successfully copied.
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
|
||||
size_t offset, const void *buf, size_t bytes)
|
||||
{
|
||||
|
17
isolation.c
17
isolation.c
@ -379,12 +379,21 @@ void isolate_postfork(const struct ctx *c)
|
||||
|
||||
prctl(PR_SET_DUMPABLE, 0);
|
||||
|
||||
if (c->mode == MODE_PASTA) {
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
|
||||
prog.filter = filter_pasta;
|
||||
} else {
|
||||
switch (c->mode) {
|
||||
case MODE_PASST:
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
|
||||
prog.filter = filter_passt;
|
||||
break;
|
||||
case MODE_PASTA:
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
|
||||
prog.filter = filter_pasta;
|
||||
break;
|
||||
case MODE_VU:
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_vu);
|
||||
prog.filter = filter_vu;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
|
||||
|
11
packet.c
11
packet.c
@ -36,6 +36,17 @@
|
||||
static int packet_check_range(const struct pool *p, size_t offset, size_t len,
|
||||
const char *start, const char *func, int line)
|
||||
{
|
||||
if (p->buf_size == 0) {
|
||||
int ret;
|
||||
|
||||
ret = vu_packet_check_range((void *)p->buf, offset, len, start);
|
||||
|
||||
if (ret == -1)
|
||||
trace("cannot find region, %s:%i", func, line);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (start < p->buf) {
|
||||
trace("packet start %p before buffer start %p, "
|
||||
"%s:%i", (void *)start, (void *)p->buf, func, line);
|
||||
|
8
packet.h
8
packet.h
@ -8,8 +8,10 @@
|
||||
|
||||
/**
|
||||
* struct pool - Generic pool of packets stored in a buffer
|
||||
* @buf: Buffer storing packet descriptors
|
||||
* @buf_size: Total size of buffer
|
||||
* @buf: Buffer storing packet descriptors,
|
||||
* a struct vu_dev_region array for passt vhost-user mode
|
||||
* @buf_size: Total size of buffer,
|
||||
* 0 for passt vhost-user mode
|
||||
* @size: Number of usable descriptors for the pool
|
||||
* @count: Number of used descriptors for the pool
|
||||
* @pkt: Descriptors: see macros below
|
||||
@ -22,6 +24,8 @@ struct pool {
|
||||
struct iovec pkt[1];
|
||||
};
|
||||
|
||||
int vu_packet_check_range(void *buf, size_t offset, size_t len,
|
||||
const char *start);
|
||||
void packet_add_do(struct pool *p, size_t len, const char *start,
|
||||
const char *func, int line);
|
||||
void *packet_get_do(const struct pool *p, const size_t idx,
|
||||
|
10
passt.1
10
passt.1
@ -404,12 +404,20 @@ interface address are configured on a given host interface.
|
||||
.SS \fBpasst\fR-only options
|
||||
|
||||
.TP
|
||||
.BR \-s ", " \-\-socket " " \fIpath
|
||||
.BR \-s ", " \-\-socket-path ", " \-\-socket " " \fIpath
|
||||
Path for UNIX domain socket used by \fBqemu\fR(1) or \fBqrap\fR(1) to connect to
|
||||
\fBpasst\fR.
|
||||
Default is to probe a free socket, not accepting connections, starting from
|
||||
\fI/tmp/passt_1.socket\fR to \fI/tmp/passt_64.socket\fR.
|
||||
|
||||
.TP
|
||||
.BR \-\-vhost-user
|
||||
Enable vhost-user. The vhost-user command socket is provided by \fB--socket\fR.
|
||||
|
||||
.TP
|
||||
.BR \-\-print-capabilities
|
||||
Print back-end capabilities in JSON format, only meaningful for vhost-user mode.
|
||||
|
||||
.TP
|
||||
.BR \-F ", " \-\-fd " " \fIFD
|
||||
Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened
|
||||
|
9
passt.c
9
passt.c
@ -50,6 +50,7 @@
|
||||
#include "log.h"
|
||||
#include "tcp_splice.h"
|
||||
#include "ndp.h"
|
||||
#include "vu_common.h"
|
||||
|
||||
#define EPOLL_EVENTS 8
|
||||
|
||||
@ -72,6 +73,8 @@ char *epoll_type_str[] = {
|
||||
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
|
||||
[EPOLL_TYPE_TAP_PASST] = "connected qemu socket",
|
||||
[EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket",
|
||||
[EPOLL_TYPE_VHOST_CMD] = "vhost-user command socket",
|
||||
[EPOLL_TYPE_VHOST_KICK] = "vhost-user kick socket",
|
||||
};
|
||||
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
|
||||
"epoll_type_str[] doesn't match enum epoll_type");
|
||||
@ -347,6 +350,12 @@ loop:
|
||||
case EPOLL_TYPE_PING:
|
||||
icmp_sock_handler(&c, ref);
|
||||
break;
|
||||
case EPOLL_TYPE_VHOST_CMD:
|
||||
vu_control_handler(c.vdev, c.fd_tap, eventmask);
|
||||
break;
|
||||
case EPOLL_TYPE_VHOST_KICK:
|
||||
vu_kick_cb(c.vdev, ref, &now);
|
||||
break;
|
||||
default:
|
||||
/* Can't happen */
|
||||
ASSERT(0);
|
||||
|
7
passt.h
7
passt.h
@ -25,6 +25,7 @@ union epoll_ref;
|
||||
#include "fwd.h"
|
||||
#include "tcp.h"
|
||||
#include "udp.h"
|
||||
#include "vhost_user.h"
|
||||
|
||||
/* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0
|
||||
* (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise
|
||||
@ -43,6 +44,7 @@ union epoll_ref;
|
||||
* @icmp: ICMP-specific reference part
|
||||
* @data: Data handled by protocol handlers
|
||||
* @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone
|
||||
* @queue: vhost-user queue index for this fd
|
||||
* @u64: Opaque reference for epoll_ctl() and epoll_wait()
|
||||
*/
|
||||
union epoll_ref {
|
||||
@ -58,6 +60,7 @@ union epoll_ref {
|
||||
union udp_listen_epoll_ref udp;
|
||||
uint32_t data;
|
||||
int nsdir_fd;
|
||||
int queue;
|
||||
};
|
||||
};
|
||||
uint64_t u64;
|
||||
@ -94,6 +97,7 @@ struct fqdn {
|
||||
enum passt_modes {
|
||||
MODE_PASST,
|
||||
MODE_PASTA,
|
||||
MODE_VU,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -229,6 +233,7 @@ struct ip6_ctx {
|
||||
* @freebind: Allow binding of non-local addresses for forwarding
|
||||
* @low_wmem: Low probed net.core.wmem_max
|
||||
* @low_rmem: Low probed net.core.rmem_max
|
||||
* @vdev: vhost-user device
|
||||
*/
|
||||
struct ctx {
|
||||
enum passt_modes mode;
|
||||
@ -291,6 +296,8 @@ struct ctx {
|
||||
|
||||
int low_wmem;
|
||||
int low_rmem;
|
||||
|
||||
struct vu_dev *vdev;
|
||||
};
|
||||
|
||||
void proto_update_l2_buf(const unsigned char *eth_d,
|
||||
|
1
pcap.c
1
pcap.c
@ -143,7 +143,6 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
|
||||
* @iovcnt: Number of buffers (@iov entries)
|
||||
* @offset: Offset of the L2 frame within the full data length
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
|
||||
{
|
||||
struct timespec now = { 0 };
|
||||
|
77
tap.c
77
tap.c
@ -58,6 +58,8 @@
|
||||
#include "packet.h"
|
||||
#include "tap.h"
|
||||
#include "log.h"
|
||||
#include "vhost_user.h"
|
||||
#include "vu_common.h"
|
||||
|
||||
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
|
||||
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
|
||||
@ -78,16 +80,22 @@ void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
|
||||
struct iovec iov[2];
|
||||
size_t iovcnt = 0;
|
||||
|
||||
if (c->mode == MODE_PASST) {
|
||||
switch (c->mode) {
|
||||
case MODE_PASST:
|
||||
iov[iovcnt] = IOV_OF_LVALUE(vnet_len);
|
||||
iovcnt++;
|
||||
/* fall through */
|
||||
case MODE_PASTA:
|
||||
iov[iovcnt].iov_base = (void *)data;
|
||||
iov[iovcnt].iov_len = l2len;
|
||||
iovcnt++;
|
||||
|
||||
tap_send_frames(c, iov, iovcnt, 1);
|
||||
break;
|
||||
case MODE_VU:
|
||||
vu_send_single(c, data, l2len);
|
||||
break;
|
||||
}
|
||||
|
||||
iov[iovcnt].iov_base = (void *)data;
|
||||
iov[iovcnt].iov_len = l2len;
|
||||
iovcnt++;
|
||||
|
||||
tap_send_frames(c, iov, iovcnt, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -414,10 +422,18 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
|
||||
if (!nframes)
|
||||
return 0;
|
||||
|
||||
if (c->mode == MODE_PASTA)
|
||||
switch (c->mode) {
|
||||
case MODE_PASTA:
|
||||
m = tap_send_frames_pasta(c, iov, bufs_per_frame, nframes);
|
||||
else
|
||||
break;
|
||||
case MODE_PASST:
|
||||
m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes);
|
||||
break;
|
||||
case MODE_VU:
|
||||
/* fall through */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (m < nframes)
|
||||
debug("tap: failed to send %zu frames of %zu",
|
||||
@ -979,7 +995,7 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
|
||||
* tap_sock_reset() - Handle closing or failure of connect AF_UNIX socket
|
||||
* @c: Execution context
|
||||
*/
|
||||
static void tap_sock_reset(struct ctx *c)
|
||||
void tap_sock_reset(struct ctx *c)
|
||||
{
|
||||
info("Client connection closed%s", c->one_off ? ", exiting" : "");
|
||||
|
||||
@ -990,6 +1006,8 @@ static void tap_sock_reset(struct ctx *c)
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_tap, NULL);
|
||||
close(c->fd_tap);
|
||||
c->fd_tap = -1;
|
||||
if (c->mode == MODE_VU)
|
||||
vu_cleanup(c->vdev);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1210,6 +1228,11 @@ static void tap_backend_show_hints(struct ctx *c)
|
||||
info("or qrap, for earlier qemu versions:");
|
||||
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
|
||||
break;
|
||||
case MODE_VU:
|
||||
info("You can start qemu with:");
|
||||
info(" kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n",
|
||||
c->sock_path);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1237,8 +1260,8 @@ static void tap_sock_unix_init(const struct ctx *c)
|
||||
*/
|
||||
void tap_listen_handler(struct ctx *c, uint32_t events)
|
||||
{
|
||||
union epoll_ref ref = { .type = EPOLL_TYPE_TAP_PASST };
|
||||
struct epoll_event ev = { 0 };
|
||||
union epoll_ref ref = { 0 };
|
||||
int v = INT_MAX / 2;
|
||||
struct ucred ucred;
|
||||
socklen_t len;
|
||||
@ -1278,6 +1301,10 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
|
||||
trace("tap: failed to set SO_SNDBUF to %i", v);
|
||||
|
||||
ref.fd = c->fd_tap;
|
||||
if (c->mode == MODE_VU)
|
||||
ref.type = EPOLL_TYPE_VHOST_CMD;
|
||||
else
|
||||
ref.type = EPOLL_TYPE_TAP_PASST;
|
||||
ev.events = EPOLLIN | EPOLLRDHUP;
|
||||
ev.data.u64 = ref.u64;
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
||||
@ -1344,7 +1371,7 @@ static void tap_sock_tun_init(struct ctx *c)
|
||||
* @base: Buffer base
|
||||
* @size Buffer size
|
||||
*/
|
||||
static void tap_sock_update_pool(void *base, size_t size)
|
||||
void tap_sock_update_pool(void *base, size_t size)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -1364,7 +1391,10 @@ static void tap_sock_update_pool(void *base, size_t size)
|
||||
*/
|
||||
void tap_backend_init(struct ctx *c)
|
||||
{
|
||||
tap_sock_update_pool(pkt_buf, sizeof(pkt_buf));
|
||||
if (c->mode == MODE_VU)
|
||||
tap_sock_update_pool(NULL, 0);
|
||||
else
|
||||
tap_sock_update_pool(pkt_buf, sizeof(pkt_buf));
|
||||
|
||||
if (c->fd_tap != -1) { /* Passed as --fd */
|
||||
struct epoll_event ev = { 0 };
|
||||
@ -1372,10 +1402,17 @@ void tap_backend_init(struct ctx *c)
|
||||
|
||||
ASSERT(c->one_off);
|
||||
ref.fd = c->fd_tap;
|
||||
if (c->mode == MODE_PASST)
|
||||
switch (c->mode) {
|
||||
case MODE_PASST:
|
||||
ref.type = EPOLL_TYPE_TAP_PASST;
|
||||
else
|
||||
break;
|
||||
case MODE_PASTA:
|
||||
ref.type = EPOLL_TYPE_TAP_PASTA;
|
||||
break;
|
||||
case MODE_VU:
|
||||
ref.type = EPOLL_TYPE_VHOST_CMD;
|
||||
break;
|
||||
}
|
||||
|
||||
ev.events = EPOLLIN | EPOLLRDHUP;
|
||||
ev.data.u64 = ref.u64;
|
||||
@ -1383,9 +1420,14 @@ void tap_backend_init(struct ctx *c)
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->mode == MODE_PASTA) {
|
||||
switch (c->mode) {
|
||||
case MODE_PASTA:
|
||||
tap_sock_tun_init(c);
|
||||
} else {
|
||||
break;
|
||||
case MODE_VU:
|
||||
vu_init(c);
|
||||
/* fall through */
|
||||
case MODE_PASST:
|
||||
tap_sock_unix_init(c);
|
||||
|
||||
/* In passt mode, we don't know the guest's MAC address until it
|
||||
@ -1393,6 +1435,7 @@ void tap_backend_init(struct ctx *c)
|
||||
* first packets will reach it.
|
||||
*/
|
||||
memset(&c->guest_mac, 0xff, sizeof(c->guest_mac));
|
||||
break;
|
||||
}
|
||||
|
||||
tap_backend_show_hints(c);
|
||||
|
5
tap.h
5
tap.h
@ -40,7 +40,8 @@ static inline struct iovec tap_hdr_iov(const struct ctx *c,
|
||||
*/
|
||||
static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
|
||||
{
|
||||
thdr->vnet_len = htonl(l2len);
|
||||
if (thdr)
|
||||
thdr->vnet_len = htonl(l2len);
|
||||
}
|
||||
|
||||
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
|
||||
@ -68,6 +69,8 @@ void tap_handler_pasta(struct ctx *c, uint32_t events,
|
||||
void tap_handler_passt(struct ctx *c, uint32_t events,
|
||||
const struct timespec *now);
|
||||
int tap_sock_unix_open(char *sock_path);
|
||||
void tap_sock_reset(struct ctx *c);
|
||||
void tap_sock_update_pool(void *base, size_t size);
|
||||
void tap_backend_init(struct ctx *c);
|
||||
void tap_flush_pools(void);
|
||||
void tap_handler(struct ctx *c, const struct timespec *now);
|
||||
|
7
tcp.c
7
tcp.c
@ -304,6 +304,7 @@
|
||||
#include "flow_table.h"
|
||||
#include "tcp_internal.h"
|
||||
#include "tcp_buf.h"
|
||||
#include "tcp_vu.h"
|
||||
|
||||
/* MSS rounding: see SET_MSS() */
|
||||
#define MSS_DEFAULT 536
|
||||
@ -1314,6 +1315,9 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
int flags)
|
||||
{
|
||||
if (c->mode == MODE_VU)
|
||||
return tcp_vu_send_flag(c, conn, flags);
|
||||
|
||||
return tcp_buf_send_flag(c, conn, flags);
|
||||
}
|
||||
|
||||
@ -1707,6 +1711,9 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
|
||||
*/
|
||||
static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
if (c->mode == MODE_VU)
|
||||
return tcp_vu_data_from_sock(c, conn);
|
||||
|
||||
return tcp_buf_data_from_sock(c, conn);
|
||||
}
|
||||
|
||||
|
498
tcp_vu.c
Normal file
498
tcp_vu.c
Normal file
@ -0,0 +1,498 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* tcp_vu.c - TCP L2 vhost-user management functions
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/tcp.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include <netinet/if_ether.h>
|
||||
#include <linux/virtio_net.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "ip.h"
|
||||
#include "passt.h"
|
||||
#include "siphash.h"
|
||||
#include "inany.h"
|
||||
#include "vhost_user.h"
|
||||
#include "tcp.h"
|
||||
#include "pcap.h"
|
||||
#include "flow.h"
|
||||
#include "tcp_conn.h"
|
||||
#include "flow_table.h"
|
||||
#include "tcp_vu.h"
|
||||
#include "tap.h"
|
||||
#include "tcp_internal.h"
|
||||
#include "checksum.h"
|
||||
#include "vu_common.h"
|
||||
#include <time.h>
|
||||
|
||||
static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1];
|
||||
static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
|
||||
static int head[VIRTQUEUE_MAX_SIZE + 1];
|
||||
static int head_cnt;
|
||||
|
||||
/**
|
||||
* tcp_vu_hdrlen() - return the size of the header in level 2 frame (TCP)
|
||||
* @v6: Set for IPv6 packet
|
||||
*
|
||||
* Return: Return the size of the header
|
||||
*/
|
||||
static size_t tcp_vu_hdrlen(bool v6)
|
||||
{
|
||||
size_t hdrlen;
|
||||
|
||||
hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
|
||||
sizeof(struct ethhdr) + sizeof(struct tcphdr);
|
||||
|
||||
if (v6)
|
||||
hdrlen += sizeof(struct ipv6hdr);
|
||||
else
|
||||
hdrlen += sizeof(struct iphdr);
|
||||
|
||||
return hdrlen;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_vu_update_check() - Calculate TCP checksum
|
||||
* @tapside: Address information for one side of the flow
|
||||
* @iov: Pointer to the array of IO vectors
|
||||
* @iov_cnt: Length of the array
|
||||
*/
|
||||
static void tcp_vu_update_check(const struct flowside *tapside,
|
||||
struct iovec *iov, int iov_cnt)
|
||||
{
|
||||
char *base = iov[0].iov_base;
|
||||
|
||||
if (inany_v4(&tapside->oaddr)) {
|
||||
const struct iphdr *iph = vu_ip(base);
|
||||
|
||||
tcp_update_check_tcp4(iph, iov, iov_cnt,
|
||||
(char *)vu_payloadv4(base) - base);
|
||||
} else {
|
||||
const struct ipv6hdr *ip6h = vu_ip(base);
|
||||
|
||||
tcp_update_check_tcp6(ip6h, iov, iov_cnt,
|
||||
(char *)vu_payloadv6(base) - base);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_vu_send_flag() - Send segment with flags to vhost-user (no payload)
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @flags: TCP flags: if not set, send segment only if ACK is due
|
||||
*
|
||||
* Return: negative error code on connection reset, 0 otherwise
|
||||
*/
|
||||
int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
|
||||
{
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
const struct flowside *tapside = TAPFLOW(conn);
|
||||
size_t l2len, l4len, optlen, hdrlen;
|
||||
struct vu_virtq_element flags_elem[2];
|
||||
struct tcp_payload_t *payload;
|
||||
struct ipv6hdr *ip6h = NULL;
|
||||
struct iovec flags_iov[2];
|
||||
struct iphdr *iph = NULL;
|
||||
struct ethhdr *eh;
|
||||
uint32_t seq;
|
||||
int elem_cnt;
|
||||
int nb_ack;
|
||||
int ret;
|
||||
|
||||
hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
|
||||
|
||||
vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
|
||||
|
||||
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
|
||||
hdrlen + sizeof(struct tcp_syn_opts), NULL);
|
||||
if (elem_cnt != 1)
|
||||
return -1;
|
||||
|
||||
ASSERT(flags_elem[0].in_sg[0].iov_len >=
|
||||
hdrlen + sizeof(struct tcp_syn_opts));
|
||||
|
||||
vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1);
|
||||
|
||||
eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
|
||||
|
||||
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
|
||||
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
|
||||
|
||||
if (CONN_V4(conn)) {
|
||||
eh->h_proto = htons(ETH_P_IP);
|
||||
|
||||
iph = vu_ip(flags_elem[0].in_sg[0].iov_base);
|
||||
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
|
||||
|
||||
payload = vu_payloadv4(flags_elem[0].in_sg[0].iov_base);
|
||||
} else {
|
||||
eh->h_proto = htons(ETH_P_IPV6);
|
||||
|
||||
ip6h = vu_ip(flags_elem[0].in_sg[0].iov_base);
|
||||
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
|
||||
payload = vu_payloadv6(flags_elem[0].in_sg[0].iov_base);
|
||||
}
|
||||
|
||||
memset(&payload->th, 0, sizeof(payload->th));
|
||||
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
|
||||
payload->th.ack = 1;
|
||||
|
||||
seq = conn->seq_to_tap;
|
||||
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
|
||||
(struct tcp_syn_opts *)payload->data,
|
||||
&optlen);
|
||||
if (ret <= 0) {
|
||||
vu_queue_rewind(vq, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (CONN_V4(conn)) {
|
||||
l4len = tcp_fill_headers4(conn, NULL, iph, payload, optlen,
|
||||
NULL, seq, true);
|
||||
l2len = sizeof(*iph);
|
||||
} else {
|
||||
l4len = tcp_fill_headers6(conn, NULL, ip6h, payload, optlen,
|
||||
seq, true);
|
||||
l2len = sizeof(*ip6h);
|
||||
}
|
||||
l2len += l4len + sizeof(struct ethhdr);
|
||||
|
||||
flags_elem[0].in_sg[0].iov_len = l2len +
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
if (*c->pcap) {
|
||||
tcp_vu_update_check(tapside, &flags_elem[0].in_sg[0], 1);
|
||||
pcap_iov(&flags_elem[0].in_sg[0], 1,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
nb_ack = 1;
|
||||
|
||||
if (flags & DUP_ACK) {
|
||||
vu_set_element(&flags_elem[1], NULL, &flags_iov[1]);
|
||||
|
||||
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
|
||||
flags_elem[0].in_sg[0].iov_len, NULL);
|
||||
if (elem_cnt == 1 &&
|
||||
flags_elem[1].in_sg[0].iov_len >=
|
||||
flags_elem[0].in_sg[0].iov_len) {
|
||||
memcpy(flags_elem[1].in_sg[0].iov_base,
|
||||
flags_elem[0].in_sg[0].iov_base,
|
||||
flags_elem[0].in_sg[0].iov_len);
|
||||
nb_ack++;
|
||||
|
||||
if (*c->pcap) {
|
||||
pcap_iov(&flags_elem[1].in_sg[0], 1,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vu_flush(vdev, vq, flags_elem, nb_ack);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** tcp_vu_sock_recv() - Receive datastream from socket into vhost-user buffers
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @v6: Set for IPv6 connections
|
||||
* @already_sent: Number of bytes already sent
|
||||
* @fillsize: Maximum bytes to fill in guest-side receiving window
|
||||
* @iov_cnt: number of iov (output)
|
||||
*
|
||||
* Return: Number of iov entries used to store the data or negative error code
|
||||
*/
|
||||
static ssize_t tcp_vu_sock_recv(const struct ctx *c,
|
||||
const struct tcp_tap_conn *conn, bool v6,
|
||||
uint32_t already_sent, size_t fillsize,
|
||||
int *iov_cnt)
|
||||
{
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
struct msghdr mh_sock = { 0 };
|
||||
uint16_t mss = MSS_GET(conn);
|
||||
int s = conn->sock;
|
||||
ssize_t ret, len;
|
||||
size_t hdrlen;
|
||||
int elem_cnt;
|
||||
int i;
|
||||
|
||||
*iov_cnt = 0;
|
||||
|
||||
hdrlen = tcp_vu_hdrlen(v6);
|
||||
|
||||
vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE);
|
||||
|
||||
elem_cnt = 0;
|
||||
head_cnt = 0;
|
||||
while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) {
|
||||
struct iovec *iov;
|
||||
size_t frame_size, dlen;
|
||||
int cnt;
|
||||
|
||||
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
|
||||
VIRTQUEUE_MAX_SIZE - elem_cnt,
|
||||
MIN(mss, fillsize) + hdrlen, &frame_size);
|
||||
if (cnt == 0)
|
||||
break;
|
||||
|
||||
dlen = frame_size - hdrlen;
|
||||
|
||||
/* reserve space for headers in iov */
|
||||
iov = &elem[elem_cnt].in_sg[0];
|
||||
ASSERT(iov->iov_len >= hdrlen);
|
||||
iov->iov_base = (char *)iov->iov_base + hdrlen;
|
||||
iov->iov_len -= hdrlen;
|
||||
head[head_cnt++] = elem_cnt;
|
||||
|
||||
fillsize -= dlen;
|
||||
elem_cnt += cnt;
|
||||
}
|
||||
|
||||
if (peek_offset_cap) {
|
||||
mh_sock.msg_iov = iov_vu + 1;
|
||||
mh_sock.msg_iovlen = elem_cnt;
|
||||
} else {
|
||||
iov_vu[0].iov_base = tcp_buf_discard;
|
||||
iov_vu[0].iov_len = already_sent;
|
||||
|
||||
mh_sock.msg_iov = iov_vu;
|
||||
mh_sock.msg_iovlen = elem_cnt + 1;
|
||||
}
|
||||
|
||||
do
|
||||
ret = recvmsg(s, &mh_sock, MSG_PEEK);
|
||||
while (ret < 0 && errno == EINTR);
|
||||
|
||||
if (ret < 0) {
|
||||
vu_queue_rewind(vq, elem_cnt);
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (!peek_offset_cap)
|
||||
ret -= already_sent;
|
||||
|
||||
/* adjust iov number and length of the last iov */
|
||||
len = ret;
|
||||
for (i = 0; len && i < elem_cnt; i++) {
|
||||
struct iovec *iov = &elem[i].in_sg[0];
|
||||
|
||||
if (iov->iov_len > (size_t)len)
|
||||
iov->iov_len = len;
|
||||
|
||||
len -= iov->iov_len;
|
||||
}
|
||||
/* adjust head count */
|
||||
while (head_cnt > 0 && head[head_cnt - 1] > i)
|
||||
head_cnt--;
|
||||
/* mark end of array */
|
||||
head[head_cnt] = i;
|
||||
*iov_cnt = i;
|
||||
|
||||
/* release unused buffers */
|
||||
vu_queue_rewind(vq, elem_cnt - i);
|
||||
|
||||
/* restore space for headers in iov */
|
||||
for (i = 0; i < head_cnt; i++) {
|
||||
struct iovec *iov = &elem[head[i]].in_sg[0];
|
||||
|
||||
iov->iov_base = (char *)iov->iov_base - hdrlen;
|
||||
iov->iov_len += hdrlen;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_vu_prepare() - Prepare the frame header
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @first: Pointer to the array of IO vectors
|
||||
* @dlen: Packet data length
|
||||
* @check: Checksum, if already known
|
||||
*/
|
||||
static void tcp_vu_prepare(const struct ctx *c,
|
||||
struct tcp_tap_conn *conn, char *base,
|
||||
size_t dlen, const uint16_t **check)
|
||||
{
|
||||
const struct flowside *toside = TAPFLOW(conn);
|
||||
struct tcp_payload_t *payload;
|
||||
struct ipv6hdr *ip6h = NULL;
|
||||
struct iphdr *iph = NULL;
|
||||
struct ethhdr *eh;
|
||||
|
||||
/* we guess the first iovec provided by the guest can embed
|
||||
* all the headers needed by L2 frame
|
||||
*/
|
||||
|
||||
eh = vu_eth(base);
|
||||
|
||||
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
|
||||
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
|
||||
|
||||
/* initialize header */
|
||||
|
||||
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
|
||||
eh->h_proto = htons(ETH_P_IP);
|
||||
|
||||
iph = vu_ip(base);
|
||||
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
|
||||
payload = vu_payloadv4(base);
|
||||
} else {
|
||||
eh->h_proto = htons(ETH_P_IPV6);
|
||||
|
||||
ip6h = vu_ip(base);
|
||||
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
|
||||
|
||||
payload = vu_payloadv6(base);
|
||||
}
|
||||
|
||||
memset(&payload->th, 0, sizeof(payload->th));
|
||||
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
|
||||
payload->th.ack = 1;
|
||||
|
||||
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
|
||||
tcp_fill_headers4(conn, NULL, iph, payload, dlen,
|
||||
*check, conn->seq_to_tap, true);
|
||||
*check = &iph->check;
|
||||
} else {
|
||||
tcp_fill_headers6(conn, NULL, ip6h, payload, dlen,
|
||||
conn->seq_to_tap, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_vu_data_from_sock() - Handle new data from socket, queue to vhost-user,
|
||||
* in window
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
*
|
||||
* Return: Negative on connection reset, 0 otherwise
|
||||
*/
|
||||
int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
const struct flowside *tapside = TAPFLOW(conn);
|
||||
size_t fillsize, hdrlen;
|
||||
int v6 = CONN_V6(conn);
|
||||
uint32_t already_sent;
|
||||
const uint16_t *check;
|
||||
int i, iov_cnt;
|
||||
ssize_t len;
|
||||
|
||||
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
|
||||
debug("Got packet, but RX virtqueue not usable yet");
|
||||
return 0;
|
||||
}
|
||||
|
||||
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
|
||||
|
||||
if (SEQ_LT(already_sent, 0)) {
|
||||
/* RFC 761, section 2.1. */
|
||||
flow_trace(conn, "ACK sequence gap: ACK for %u, sent: %u",
|
||||
conn->seq_ack_from_tap, conn->seq_to_tap);
|
||||
conn->seq_to_tap = conn->seq_ack_from_tap;
|
||||
already_sent = 0;
|
||||
if (tcp_set_peek_offset(conn->sock, 0)) {
|
||||
tcp_rst(c, conn);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wnd_scaled || already_sent >= wnd_scaled) {
|
||||
conn_flag(c, conn, STALLED);
|
||||
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set up buffer descriptors we'll fill completely and partially. */
|
||||
|
||||
fillsize = wnd_scaled - already_sent;
|
||||
|
||||
/* collect the buffers from vhost-user and fill them with the
|
||||
* data from the socket
|
||||
*/
|
||||
len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize, &iov_cnt);
|
||||
if (len < 0) {
|
||||
if (len != -EAGAIN && len != -EWOULDBLOCK) {
|
||||
tcp_rst(c, conn);
|
||||
return len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!len) {
|
||||
if (already_sent) {
|
||||
conn_flag(c, conn, STALLED);
|
||||
} else if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) ==
|
||||
SOCK_FIN_RCVD) {
|
||||
int ret = tcp_vu_send_flag(c, conn, FIN | ACK);
|
||||
if (ret) {
|
||||
tcp_rst(c, conn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
conn_event(c, conn, TAP_FIN_SENT);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
conn_flag(c, conn, ~STALLED);
|
||||
|
||||
/* Likely, some new data was acked too. */
|
||||
tcp_update_seqack_wnd(c, conn, false, NULL);
|
||||
|
||||
/* initialize headers */
|
||||
/* iov_vu is an array of buffers and the buffer size can be
|
||||
* smaller than the frame size we want to use but with
|
||||
* num_buffer we can merge several virtio iov buffers in one packet
|
||||
* we need only to set the packet headers in the first iov and
|
||||
* num_buffer to the number of iov entries
|
||||
*/
|
||||
|
||||
hdrlen = tcp_vu_hdrlen(v6);
|
||||
for (i = 0, check = NULL; i < head_cnt; i++) {
|
||||
struct iovec *iov = &elem[head[i]].in_sg[0];
|
||||
int buf_cnt = head[i + 1] - head[i];
|
||||
int dlen = iov_size(iov, buf_cnt) - hdrlen;
|
||||
|
||||
vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
|
||||
|
||||
/* we compute IPv4 header checksum only for the
|
||||
* first and the last, all other checksums are the
|
||||
* same as the first one
|
||||
*/
|
||||
if (i + 1 == head_cnt)
|
||||
check = NULL;
|
||||
|
||||
tcp_vu_prepare(c, conn, iov->iov_base, dlen, &check);
|
||||
|
||||
if (*c->pcap) {
|
||||
tcp_vu_update_check(tapside, iov, buf_cnt);
|
||||
pcap_iov(iov, buf_cnt,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
|
||||
conn->seq_to_tap += dlen;
|
||||
}
|
||||
|
||||
/* send packets */
|
||||
vu_flush(vdev, vq, elem, iov_cnt);
|
||||
|
||||
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
||||
|
||||
return 0;
|
||||
}
|
12
tcp_vu.h
Normal file
12
tcp_vu.h
Normal file
@ -0,0 +1,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef TCP_VU_H
|
||||
#define TCP_VU_H
|
||||
|
||||
int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
|
||||
int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
|
||||
|
||||
#endif /*TCP_VU_H */
|
11
udp.c
11
udp.c
@ -110,6 +110,7 @@
|
||||
#include "log.h"
|
||||
#include "flow_table.h"
|
||||
#include "udp_internal.h"
|
||||
#include "udp_vu.h"
|
||||
|
||||
/* "Spliced" sockets indexed by bound port (host order) */
|
||||
static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
|
||||
@ -628,6 +629,11 @@ void udp_listen_sock_handler(const struct ctx *c,
|
||||
union epoll_ref ref, uint32_t events,
|
||||
const struct timespec *now)
|
||||
{
|
||||
if (c->mode == MODE_VU) {
|
||||
udp_vu_listen_sock_handler(c, ref, events, now);
|
||||
return;
|
||||
}
|
||||
|
||||
udp_buf_listen_sock_handler(c, ref, events, now);
|
||||
}
|
||||
|
||||
@ -698,6 +704,11 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now)
|
||||
{
|
||||
if (c->mode == MODE_VU) {
|
||||
udp_vu_reply_sock_handler(c, ref, events, now);
|
||||
return;
|
||||
}
|
||||
|
||||
udp_buf_reply_sock_handler(c, ref, events, now);
|
||||
}
|
||||
|
||||
|
343
udp_vu.c
Normal file
343
udp_vu.c
Normal file
@ -0,0 +1,343 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* udp_vu.c - UDP L2 vhost-user management functions
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/udp.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/uio.h>
|
||||
#include <linux/virtio_net.h>
|
||||
|
||||
#include "checksum.h"
|
||||
#include "util.h"
|
||||
#include "ip.h"
|
||||
#include "siphash.h"
|
||||
#include "inany.h"
|
||||
#include "passt.h"
|
||||
#include "pcap.h"
|
||||
#include "log.h"
|
||||
#include "vhost_user.h"
|
||||
#include "udp_internal.h"
|
||||
#include "flow.h"
|
||||
#include "flow_table.h"
|
||||
#include "udp_flow.h"
|
||||
#include "udp_vu.h"
|
||||
#include "vu_common.h"
|
||||
|
||||
static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
|
||||
static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
|
||||
|
||||
/**
|
||||
* udp_vu_hdrlen() - return the size of the header in level 2 frame (UDP)
|
||||
* @v6: Set for IPv6 packet
|
||||
*
|
||||
* Return: Return the size of the header
|
||||
*/
|
||||
static size_t udp_vu_hdrlen(bool v6)
|
||||
{
|
||||
size_t hdrlen;
|
||||
|
||||
hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
|
||||
sizeof(struct ethhdr) + sizeof(struct udphdr);
|
||||
|
||||
if (v6)
|
||||
hdrlen += sizeof(struct ipv6hdr);
|
||||
else
|
||||
hdrlen += sizeof(struct iphdr);
|
||||
|
||||
return hdrlen;
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_sock_info() - get socket information
|
||||
* @s: Socket to get information from
|
||||
* @s_in: Socket address (output)
|
||||
*
|
||||
* Return: 0 if socket address can be read, -1 otherwise
|
||||
*/
|
||||
static int udp_vu_sock_info(int s, union sockaddr_inany *s_in)
|
||||
{
|
||||
struct msghdr msg = {
|
||||
.msg_name = s_in,
|
||||
.msg_namelen = sizeof(union sockaddr_inany),
|
||||
};
|
||||
|
||||
return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT);
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
|
||||
* @c: Execution context
|
||||
* @s: Socket to receive from
|
||||
* @events: epoll events bitmap
|
||||
* @v6: Set for IPv6 connections
|
||||
* @dlen: Size of received data (output)
|
||||
*
|
||||
* Return: Number of iov entries used to store the datagram
|
||||
*/
|
||||
static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events,
|
||||
bool v6, ssize_t *dlen)
|
||||
{
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
int iov_cnt, idx, iov_used;
|
||||
struct msghdr msg = { 0 };
|
||||
size_t off, hdrlen;
|
||||
|
||||
ASSERT(!c->no_udp);
|
||||
|
||||
if (!(events & EPOLLIN))
|
||||
return 0;
|
||||
|
||||
/* compute L2 header length */
|
||||
hdrlen = udp_vu_hdrlen(v6);
|
||||
|
||||
vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE);
|
||||
|
||||
iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE,
|
||||
IP_MAX_MTU - sizeof(struct udphdr) + hdrlen,
|
||||
NULL);
|
||||
if (iov_cnt == 0)
|
||||
return 0;
|
||||
|
||||
/* reserve space for the headers */
|
||||
ASSERT(iov_vu[0].iov_len >= hdrlen);
|
||||
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
|
||||
iov_vu[0].iov_len -= hdrlen;
|
||||
|
||||
/* read data from the socket */
|
||||
msg.msg_iov = iov_vu;
|
||||
msg.msg_iovlen = iov_cnt;
|
||||
|
||||
*dlen = recvmsg(s, &msg, 0);
|
||||
if (*dlen < 0) {
|
||||
vu_queue_rewind(vq, iov_cnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* restore the pointer to the headers address */
|
||||
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
|
||||
iov_vu[0].iov_len += hdrlen;
|
||||
|
||||
/* count the numbers of buffer filled by recvmsg() */
|
||||
idx = iov_skip_bytes(iov_vu, iov_cnt, *dlen + hdrlen, &off);
|
||||
|
||||
/* adjust last iov length */
|
||||
if (idx < iov_cnt)
|
||||
iov_vu[idx].iov_len = off;
|
||||
iov_used = idx + !!off;
|
||||
|
||||
vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used);
|
||||
|
||||
/* release unused buffers */
|
||||
vu_queue_rewind(vq, iov_cnt - iov_used);
|
||||
|
||||
return iov_used;
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_prepare() - Prepare the packet header
|
||||
* @c: Execution context
|
||||
* @toside: Address information for one side of the flow
|
||||
* @dlen: Packet data length
|
||||
*
|
||||
* Return: Layer-4 length
|
||||
*/
|
||||
static size_t udp_vu_prepare(const struct ctx *c,
|
||||
const struct flowside *toside, ssize_t dlen)
|
||||
{
|
||||
struct ethhdr *eh;
|
||||
size_t l4len;
|
||||
|
||||
/* ethernet header */
|
||||
eh = vu_eth(iov_vu[0].iov_base);
|
||||
|
||||
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
|
||||
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
|
||||
|
||||
/* initialize header */
|
||||
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
|
||||
struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
|
||||
struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
|
||||
|
||||
eh->h_proto = htons(ETH_P_IP);
|
||||
|
||||
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
|
||||
|
||||
l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
|
||||
} else {
|
||||
struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
|
||||
struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
|
||||
|
||||
eh->h_proto = htons(ETH_P_IPV6);
|
||||
|
||||
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
|
||||
|
||||
l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
|
||||
}
|
||||
|
||||
return l4len;
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_csum() - Calculate and set checksum for a UDP packet
|
||||
* @toside: Address information for one side of the flow
|
||||
* @iov_used: Number of used iov_vu items
|
||||
*/
|
||||
static void udp_vu_csum(const struct flowside *toside, int iov_used)
|
||||
{
|
||||
const struct in_addr *src4 = inany_v4(&toside->oaddr);
|
||||
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
|
||||
char *base = iov_vu[0].iov_base;
|
||||
struct udp_payload_t *bp;
|
||||
|
||||
if (src4 && dst4) {
|
||||
bp = vu_payloadv4(base);
|
||||
csum_udp4(&bp->uh, *src4, *dst4, iov_vu, iov_used,
|
||||
(char *)&bp->data - base);
|
||||
} else {
|
||||
bp = vu_payloadv6(base);
|
||||
csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6,
|
||||
iov_vu, iov_used, (char *)&bp->data - base);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_listen_sock_handler() - Handle new data from socket
|
||||
* @c: Execution context
|
||||
* @ref: epoll reference
|
||||
* @events: epoll events bitmap
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now)
|
||||
{
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
int i;
|
||||
|
||||
if (udp_sock_errs(c, ref.fd, events) < 0) {
|
||||
err("UDP: Unrecoverable error on listening socket:"
|
||||
" (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < UDP_MAX_FRAMES; i++) {
|
||||
const struct flowside *toside;
|
||||
union sockaddr_inany s_in;
|
||||
flow_sidx_t sidx;
|
||||
uint8_t pif;
|
||||
ssize_t dlen;
|
||||
int iov_used;
|
||||
bool v6;
|
||||
|
||||
if (udp_vu_sock_info(ref.fd, &s_in) < 0)
|
||||
break;
|
||||
|
||||
sidx = udp_flow_from_sock(c, ref, &s_in, now);
|
||||
pif = pif_at_sidx(sidx);
|
||||
|
||||
if (pif != PIF_TAP) {
|
||||
if (flow_sidx_valid(sidx)) {
|
||||
flow_sidx_t fromsidx = flow_sidx_opposite(sidx);
|
||||
struct udp_flow *uflow = udp_at_sidx(sidx);
|
||||
|
||||
flow_err(uflow,
|
||||
"No support for forwarding UDP from %s to %s",
|
||||
pif_name(pif_at_sidx(fromsidx)),
|
||||
pif_name(pif));
|
||||
} else {
|
||||
debug("Discarding 1 datagram without flow");
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
toside = flowside_at_sidx(sidx);
|
||||
|
||||
v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
|
||||
|
||||
iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &dlen);
|
||||
if (iov_used <= 0)
|
||||
break;
|
||||
|
||||
udp_vu_prepare(c, toside, dlen);
|
||||
if (*c->pcap) {
|
||||
udp_vu_csum(toside, iov_used);
|
||||
pcap_iov(iov_vu, iov_used,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
vu_flush(vdev, vq, elem, iov_used);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_vu_reply_sock_handler() - Handle new data from flow specific socket
|
||||
* @c: Execution context
|
||||
* @ref: epoll reference
|
||||
* @events: epoll events bitmap
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now)
|
||||
{
|
||||
flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
|
||||
const struct flowside *toside = flowside_at_sidx(tosidx);
|
||||
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
|
||||
int from_s = uflow->s[ref.flowside.sidei];
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
int i;
|
||||
|
||||
ASSERT(!c->no_udp);
|
||||
|
||||
if (udp_sock_errs(c, from_s, events) < 0) {
|
||||
flow_err(uflow, "Unrecoverable error on reply socket");
|
||||
flow_err_details(uflow);
|
||||
udp_flow_close(c, uflow);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < UDP_MAX_FRAMES; i++) {
|
||||
uint8_t topif = pif_at_sidx(tosidx);
|
||||
ssize_t dlen;
|
||||
int iov_used;
|
||||
bool v6;
|
||||
|
||||
ASSERT(uflow);
|
||||
|
||||
if (topif != PIF_TAP) {
|
||||
uint8_t frompif = pif_at_sidx(ref.flowside);
|
||||
|
||||
flow_err(uflow,
|
||||
"No support for forwarding UDP from %s to %s",
|
||||
pif_name(frompif), pif_name(topif));
|
||||
continue;
|
||||
}
|
||||
|
||||
v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
|
||||
|
||||
iov_used = udp_vu_sock_recv(c, from_s, events, v6, &dlen);
|
||||
if (iov_used <= 0)
|
||||
break;
|
||||
flow_trace(uflow, "Received 1 datagram on reply socket");
|
||||
uflow->ts = now->tv_sec;
|
||||
|
||||
udp_vu_prepare(c, toside, dlen);
|
||||
if (*c->pcap) {
|
||||
udp_vu_csum(toside, iov_used);
|
||||
pcap_iov(iov_vu, iov_used,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
vu_flush(vdev, vq, elem, iov_used);
|
||||
}
|
||||
}
|
13
udp_vu.h
Normal file
13
udp_vu.h
Normal file
@ -0,0 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef UDP_VU_H
|
||||
#define UDP_VU_H
|
||||
|
||||
void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now);
|
||||
void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now);
|
||||
#endif /* UDP_VU_H */
|
41
vhost_user.c
41
vhost_user.c
@ -48,12 +48,13 @@
|
||||
/* vhost-user version we are compatible with */
|
||||
#define VHOST_USER_VERSION 1
|
||||
|
||||
static struct vu_dev vdev_storage;
|
||||
|
||||
/**
|
||||
* vu_print_capabilities() - print vhost-user capabilities
|
||||
* this is part of the vhost-user backend
|
||||
* convention.
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_print_capabilities(void)
|
||||
{
|
||||
info("{");
|
||||
@ -163,9 +164,7 @@ static void vmsg_close_fds(const struct vhost_user_msg *vmsg)
|
||||
*/
|
||||
static void vu_remove_watch(const struct vu_dev *vdev, int fd)
|
||||
{
|
||||
/* Placeholder to add passt related code */
|
||||
(void)vdev;
|
||||
(void)fd;
|
||||
epoll_ctl(vdev->context->epollfd, EPOLL_CTL_DEL, fd, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -487,6 +486,14 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
|
||||
}
|
||||
}
|
||||
|
||||
/* As vu_packet_check_range() has no access to the number of
|
||||
* memory regions, mark the end of the array with mmap_addr = 0
|
||||
*/
|
||||
ASSERT(vdev->nregions < VHOST_USER_MAX_RAM_SLOTS - 1);
|
||||
vdev->regions[vdev->nregions].mmap_addr = 0;
|
||||
|
||||
tap_sock_update_pool(vdev->regions, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -615,9 +622,16 @@ static bool vu_get_vring_base_exec(struct vu_dev *vdev,
|
||||
*/
|
||||
static void vu_set_watch(const struct vu_dev *vdev, int idx)
|
||||
{
|
||||
/* Placeholder to add passt related code */
|
||||
(void)vdev;
|
||||
(void)idx;
|
||||
union epoll_ref ref = {
|
||||
.type = EPOLL_TYPE_VHOST_KICK,
|
||||
.fd = vdev->vq[idx].kick_fd,
|
||||
.queue = idx
|
||||
};
|
||||
struct epoll_event ev = { 0 };
|
||||
|
||||
ev.data.u64 = ref.u64;
|
||||
ev.events = EPOLLIN;
|
||||
epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -829,14 +843,14 @@ static bool vu_set_vring_enable_exec(struct vu_dev *vdev,
|
||||
* @c: execution context
|
||||
* @vdev: vhost-user device
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_init(struct ctx *c, struct vu_dev *vdev)
|
||||
void vu_init(struct ctx *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
vdev->context = c;
|
||||
c->vdev = &vdev_storage;
|
||||
c->vdev->context = c;
|
||||
for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) {
|
||||
vdev->vq[i] = (struct vu_virtq){
|
||||
c->vdev->vq[i] = (struct vu_virtq){
|
||||
.call_fd = -1,
|
||||
.kick_fd = -1,
|
||||
.err_fd = -1,
|
||||
@ -849,7 +863,6 @@ void vu_init(struct ctx *c, struct vu_dev *vdev)
|
||||
* vu_cleanup() - Reset vhost-user device
|
||||
* @vdev: vhost-user device
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_cleanup(struct vu_dev *vdev)
|
||||
{
|
||||
unsigned int i;
|
||||
@ -896,8 +909,7 @@ void vu_cleanup(struct vu_dev *vdev)
|
||||
*/
|
||||
static void vu_sock_reset(struct vu_dev *vdev)
|
||||
{
|
||||
/* Placeholder to add passt related code */
|
||||
(void)vdev;
|
||||
tap_sock_reset(vdev->context);
|
||||
}
|
||||
|
||||
static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
|
||||
@ -925,7 +937,6 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
|
||||
* @fd: vhost-user message socket
|
||||
* @events: epoll events
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events)
|
||||
{
|
||||
struct vhost_user_msg msg = { 0 };
|
||||
|
@ -183,7 +183,6 @@ struct vhost_user_msg {
|
||||
*
|
||||
* Return: true if the virqueue is enabled, false otherwise
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
static inline bool vu_queue_enabled(const struct vu_virtq *vq)
|
||||
{
|
||||
return vq->enable;
|
||||
@ -195,14 +194,13 @@ static inline bool vu_queue_enabled(const struct vu_virtq *vq)
|
||||
*
|
||||
* Return: true if the virqueue is started, false otherwise
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
static inline bool vu_queue_started(const struct vu_virtq *vq)
|
||||
{
|
||||
return vq->started;
|
||||
}
|
||||
|
||||
void vu_print_capabilities(void);
|
||||
void vu_init(struct ctx *c, struct vu_dev *vdev);
|
||||
void vu_init(struct ctx *c);
|
||||
void vu_cleanup(struct vu_dev *vdev);
|
||||
void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
|
||||
#endif /* VHOST_USER_H */
|
||||
|
5
virtio.c
5
virtio.c
@ -325,7 +325,6 @@ static bool vring_can_notify(const struct vu_dev *dev, struct vu_virtq *vq)
|
||||
* @dev: Vhost-user device
|
||||
* @vq: Virtqueue
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
|
||||
{
|
||||
if (!vring_can_notify(dev, vq)) {
|
||||
@ -498,7 +497,6 @@ static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned i
|
||||
*
|
||||
* Return: -1 if there is an error, 0 otherwise
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem)
|
||||
{
|
||||
unsigned int head;
|
||||
@ -556,7 +554,6 @@ void vu_queue_unpop(struct vu_virtq *vq)
|
||||
* @vq: Virtqueue
|
||||
* @num: Number of element to unpop
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num)
|
||||
{
|
||||
if (num > vq->inuse)
|
||||
@ -609,7 +606,6 @@ void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
|
||||
* @len: Size of the element
|
||||
* @idx: Used ring entry index
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem,
|
||||
unsigned int len, unsigned int idx)
|
||||
{
|
||||
@ -633,7 +629,6 @@ static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val)
|
||||
* @vq: Virtqueue
|
||||
* @count: Number of entry to flush
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
void vu_queue_flush(struct vu_virtq *vq, unsigned int count)
|
||||
{
|
||||
uint16_t old, new;
|
||||
|
283
vu_common.c
Normal file
283
vu_common.c
Normal file
@ -0,0 +1,283 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*
|
||||
* common_vu.c - vhost-user common UDP and TCP functions
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <netinet/if_ether.h>
|
||||
#include <linux/virtio_net.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "passt.h"
|
||||
#include "tap.h"
|
||||
#include "vhost_user.h"
|
||||
#include "pcap.h"
|
||||
#include "vu_common.h"
|
||||
|
||||
/**
|
||||
* vu_packet_check_range() - Check if a given memory zone is contained in
|
||||
* a mapped guest memory region
|
||||
* @buf: Array of the available memory regions
|
||||
* @offset: Offset of data range in packet descriptor
|
||||
* @size: Length of desired data range
|
||||
* @start: Start of the packet descriptor
|
||||
*
|
||||
* Return: 0 if the zone is in a mapped memory region, -1 otherwise
|
||||
*/
|
||||
int vu_packet_check_range(void *buf, size_t offset, size_t len,
|
||||
const char *start)
|
||||
{
|
||||
struct vu_dev_region *dev_region;
|
||||
|
||||
for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
char *m = (char *)dev_region->mmap_addr;
|
||||
|
||||
if (m <= start &&
|
||||
start + offset + len <= m + dev_region->mmap_offset +
|
||||
dev_region->size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each
|
||||
* @elem: Array of virtqueue elements to initialize
|
||||
* @iov: Array of iovec to assign to virtqueue element
|
||||
* @elem_cnt: Number of virtqueue element
|
||||
*/
|
||||
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, int elem_cnt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < elem_cnt; i++)
|
||||
vu_set_element(&elem[i], NULL, &iov[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_collect() - collect virtio buffers from a given virtqueue
|
||||
* @vdev: vhost-user device
|
||||
* @vq: virtqueue to collect from
|
||||
* @elem: Array of virtqueue element
|
||||
* each element must be initialized with one iovec entry
|
||||
* in the in_sg array.
|
||||
* @max_elem: Number of virtqueue elements in the array
|
||||
* @size: Maximum size of the data in the frame
|
||||
* @frame_size: The total size of the buffers (output)
|
||||
*
|
||||
* Return: number of elements used to contain the frame
|
||||
*/
|
||||
int vu_collect(struct vu_dev *vdev, struct vu_virtq *vq,
|
||||
struct vu_virtq_element *elem, int max_elem,
|
||||
size_t size, size_t *frame_size)
|
||||
{
|
||||
size_t current_size = 0;
|
||||
int elem_cnt = 0;
|
||||
|
||||
while (current_size < size && elem_cnt < max_elem) {
|
||||
struct iovec *iov;
|
||||
int ret;
|
||||
|
||||
ret = vu_queue_pop(vdev, vq, &elem[elem_cnt]);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (elem[elem_cnt].in_num < 1) {
|
||||
warn("virtio-net receive queue contains no in buffers");
|
||||
vu_queue_detach_element(vq);
|
||||
break;
|
||||
}
|
||||
|
||||
iov = &elem[elem_cnt].in_sg[0];
|
||||
|
||||
if (iov->iov_len > size - current_size)
|
||||
iov->iov_len = size - current_size;
|
||||
|
||||
current_size += iov->iov_len;
|
||||
elem_cnt++;
|
||||
|
||||
if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
|
||||
break;
|
||||
}
|
||||
|
||||
if (frame_size)
|
||||
*frame_size = current_size;
|
||||
|
||||
return elem_cnt;
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_set_vnethdr() - set virtio-net headers
|
||||
* @vdev: vhost-user device
|
||||
* @vnethdr: Address of the header to set
|
||||
* @num_buffers: Number of guest buffers of the frame
|
||||
*/
|
||||
void vu_set_vnethdr(const struct vu_dev *vdev,
|
||||
struct virtio_net_hdr_mrg_rxbuf *vnethdr,
|
||||
int num_buffers)
|
||||
{
|
||||
vnethdr->hdr = VU_HEADER;
|
||||
if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
|
||||
vnethdr->num_buffers = htole16(num_buffers);
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_flush() - flush all the collected buffers to the vhost-user interface
|
||||
* @vdev: vhost-user device
|
||||
* @vq: vhost-user virtqueue
|
||||
* @elem: virtqueue elements array to send back to the virtqueue
|
||||
* @elem_cnt: Length of the array
|
||||
*/
|
||||
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
|
||||
struct vu_virtq_element *elem, int elem_cnt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < elem_cnt; i++)
|
||||
vu_queue_fill(vq, &elem[i], elem[i].in_sg[0].iov_len, i);
|
||||
|
||||
vu_queue_flush(vq, elem_cnt);
|
||||
vu_queue_notify(vdev, vq);
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_handle_tx() - Receive data from the TX virtqueue
|
||||
* @vdev: vhost-user device
|
||||
* @index: index of the virtqueue
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
static void vu_handle_tx(struct vu_dev *vdev, int index,
|
||||
const struct timespec *now)
|
||||
{
|
||||
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
|
||||
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
|
||||
struct vu_virtq *vq = &vdev->vq[index];
|
||||
int hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
int out_sg_count;
|
||||
int count;
|
||||
|
||||
ASSERT(VHOST_USER_IS_QUEUE_TX(index));
|
||||
|
||||
tap_flush_pools();
|
||||
|
||||
count = 0;
|
||||
out_sg_count = 0;
|
||||
while (count < VIRTQUEUE_MAX_SIZE) {
|
||||
int ret;
|
||||
|
||||
vu_set_element(&elem[count], &out_sg[out_sg_count], NULL);
|
||||
ret = vu_queue_pop(vdev, vq, &elem[count]);
|
||||
if (ret < 0)
|
||||
break;
|
||||
out_sg_count += elem[count].out_num;
|
||||
|
||||
if (elem[count].out_num < 1) {
|
||||
warn("virtio-net transmit queue contains no out buffers");
|
||||
break;
|
||||
}
|
||||
ASSERT(elem[count].out_num == 1);
|
||||
|
||||
tap_add_packet(vdev->context,
|
||||
elem[count].out_sg[0].iov_len - hdrlen,
|
||||
(char *)elem[count].out_sg[0].iov_base + hdrlen);
|
||||
count++;
|
||||
}
|
||||
tap_handler(vdev->context, now);
|
||||
|
||||
if (count) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
vu_queue_fill(vq, &elem[i], 0, i);
|
||||
vu_queue_flush(vq, count);
|
||||
vu_queue_notify(vdev, vq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_kick_cb() - Called on a kick event to start to receive data
|
||||
* @vdev: vhost-user device
|
||||
* @ref: epoll reference information
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
|
||||
const struct timespec *now)
|
||||
{
|
||||
eventfd_t kick_data;
|
||||
ssize_t rc;
|
||||
|
||||
rc = eventfd_read(ref.fd, &kick_data);
|
||||
if (rc == -1)
|
||||
die_perror("vhost-user kick eventfd_read()");
|
||||
|
||||
debug("vhost-user: got kick_data: %016"PRIx64" idx: %d",
|
||||
kick_data, ref.queue);
|
||||
if (VHOST_USER_IS_QUEUE_TX(ref.queue))
|
||||
vu_handle_tx(vdev, ref.queue, now);
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_send_single() - Send a buffer to the front-end using the RX virtqueue
|
||||
* @c: execution context
|
||||
* @buf: address of the buffer
|
||||
* @size: size of the buffer
|
||||
*
|
||||
* Return: number of bytes sent, -1 if there is an error
|
||||
*/
|
||||
int vu_send_single(const struct ctx *c, const void *buf, size_t size)
|
||||
{
|
||||
struct vu_dev *vdev = c->vdev;
|
||||
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
|
||||
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
|
||||
size_t total;
|
||||
int elem_cnt;
|
||||
int i;
|
||||
|
||||
debug("vu_send_single size %zu", size);
|
||||
|
||||
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
|
||||
debug("Got packet, but RX virtqueue not usable yet");
|
||||
return -1;
|
||||
}
|
||||
|
||||
vu_init_elem(elem, in_sg, VIRTQUEUE_MAX_SIZE);
|
||||
|
||||
size += sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
elem_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, size, &total);
|
||||
if (total < size) {
|
||||
debug("vu_send_single: no space to send the data "
|
||||
"elem_cnt %d size %zd", elem_cnt, total);
|
||||
goto err;
|
||||
}
|
||||
|
||||
vu_set_vnethdr(vdev, in_sg[0].iov_base, elem_cnt);
|
||||
|
||||
total -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||
|
||||
/* copy data from the buffer to the iovec */
|
||||
iov_from_buf(in_sg, elem_cnt, sizeof(struct virtio_net_hdr_mrg_rxbuf),
|
||||
buf, total);
|
||||
|
||||
if (*c->pcap) {
|
||||
pcap_iov(in_sg, elem_cnt,
|
||||
sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
|
||||
vu_flush(vdev, vq, elem, elem_cnt);
|
||||
|
||||
debug("vhost-user sent %zu", total);
|
||||
|
||||
return total;
|
||||
err:
|
||||
for (i = 0; i < elem_cnt; i++)
|
||||
vu_queue_detach_element(vq);
|
||||
|
||||
return -1;
|
||||
}
|
60
vu_common.h
Normal file
60
vu_common.h
Normal file
@ -0,0 +1,60 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright Red Hat
|
||||
* Author: Laurent Vivier <lvivier@redhat.com>
|
||||
*
|
||||
* vhost-user common UDP and TCP functions
|
||||
*/
|
||||
|
||||
#ifndef VU_COMMON_H
|
||||
#define VU_COMMON_H
|
||||
#include <linux/virtio_net.h>
|
||||
|
||||
static inline void *vu_eth(void *base)
|
||||
{
|
||||
return ((char *)base + sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
||||
}
|
||||
|
||||
static inline void *vu_ip(void *base)
|
||||
{
|
||||
return (struct ethhdr *)vu_eth(base) + 1;
|
||||
}
|
||||
|
||||
static inline void *vu_payloadv4(void *base)
|
||||
{
|
||||
return (struct iphdr *)vu_ip(base) + 1;
|
||||
}
|
||||
|
||||
static inline void *vu_payloadv6(void *base)
|
||||
{
|
||||
return (struct ipv6hdr *)vu_ip(base) + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* vu_set_element() - Initialize a vu_virtq_element
|
||||
* @elem: Element to initialize
|
||||
* @out_sg: One out iovec entry to set in elem
|
||||
* @in_sg: One in iovec entry to set in elem
|
||||
*/
|
||||
static inline void vu_set_element(struct vu_virtq_element *elem,
|
||||
struct iovec *out_sg, struct iovec *in_sg)
|
||||
{
|
||||
elem->out_num = !!out_sg;
|
||||
elem->out_sg = out_sg;
|
||||
elem->in_num = !!in_sg;
|
||||
elem->in_sg = in_sg;
|
||||
}
|
||||
|
||||
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
|
||||
int elem_cnt);
|
||||
int vu_collect(struct vu_dev *vdev, struct vu_virtq *vq,
|
||||
struct vu_virtq_element *elem, int max_elem, size_t size,
|
||||
size_t *frame_size);
|
||||
void vu_set_vnethdr(const struct vu_dev *vdev,
|
||||
struct virtio_net_hdr_mrg_rxbuf *vnethdr,
|
||||
int num_buffers);
|
||||
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
|
||||
struct vu_virtq_element *elem, int elem_cnt);
|
||||
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
|
||||
const struct timespec *now);
|
||||
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
|
||||
#endif /* VU_COMMON_H */
|
Loading…
Reference in New Issue
Block a user