1
0
mirror of https://passt.top/passt synced 2025-01-05 04:15:25 +00:00

vhost-user: add vhost-user

add virtio and vhost-user functions to connect with QEMU.

  $ ./passt --vhost-user

and

  # qemu-system-x86_64 ... -m 4G \
        -object memory-backend-memfd,id=memfd0,share=on,size=4G \
        -numa node,memdev=memfd0 \
        -chardev socket,id=chr0,path=/tmp/passt_1.socket \
        -netdev vhost-user,id=netdev0,chardev=chr0 \
        -device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \
        ...

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
This commit is contained in:
Laurent Vivier 2024-11-13 09:04:06 +01:00 committed by Stefano Brivio
parent 007af94bb9
commit 92fe7e967a
24 changed files with 1412 additions and 54 deletions

View File

@ -37,7 +37,8 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \ ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c vhost_user.c virtio.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
vhost_user.c virtio.c vu_common.c
QRAP_SRCS = qrap.c QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS) SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@ -47,7 +48,8 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \ lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \ siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
udp.h udp_flow.h util.h vhost_user.h virtio.h tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h vhost_user.h \
virtio.h vu_common.h
HEADERS = $(PASST_HEADERS) seccomp.h HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);} C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}

21
conf.c
View File

@ -45,6 +45,7 @@
#include "lineread.h" #include "lineread.h"
#include "isolation.h" #include "isolation.h"
#include "log.h" #include "log.h"
#include "vhost_user.h"
#define NETNS_RUN_DIR "/run/netns" #define NETNS_RUN_DIR "/run/netns"
@ -769,9 +770,14 @@ static void usage(const char *name, FILE *f, int status)
" default: same interface name as external one\n"); " default: same interface name as external one\n");
} else { } else {
FPRINTF(f, FPRINTF(f,
" -s, --socket PATH UNIX domain socket path\n" " -s, --socket, --socket-path PATH UNIX domain socket path\n"
" default: probe free path starting from " " default: probe free path starting from "
UNIX_SOCK_PATH "\n", 1); UNIX_SOCK_PATH "\n", 1);
FPRINTF(f,
" --vhost-user Enable vhost-user mode\n"
" UNIX domain socket is provided by -s option\n"
" --print-capabilities print back-end capabilities in JSON format,\n"
" only meaningful for vhost-user mode\n");
} }
FPRINTF(f, FPRINTF(f,
@ -1305,6 +1311,10 @@ void conf(struct ctx *c, int argc, char **argv)
{"map-guest-addr", required_argument, NULL, 22 }, {"map-guest-addr", required_argument, NULL, 22 },
{"host-lo-to-ns-lo", no_argument, NULL, 23 }, {"host-lo-to-ns-lo", no_argument, NULL, 23 },
{"dns-host", required_argument, NULL, 24 }, {"dns-host", required_argument, NULL, 24 },
{"vhost-user", no_argument, NULL, 25 },
/* vhost-user backend program convention */
{"print-capabilities", no_argument, NULL, 26 },
{"socket-path", required_argument, NULL, 's' },
{ 0 }, { 0 },
}; };
const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt"; const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
@ -1498,6 +1508,15 @@ void conf(struct ctx *c, int argc, char **argv)
break; break;
die("Invalid host nameserver address: %s", optarg); die("Invalid host nameserver address: %s", optarg);
case 25:
if (c->mode == MODE_PASTA) {
err("--vhost-user is for passt mode only");
usage(argv[0], stdout, EXIT_SUCCESS);
}
c->mode = MODE_VU;
break;
case 26:
vu_print_capabilities();
break; break;
case 'd': case 'd':
c->debug = 1; c->debug = 1;

View File

@ -36,6 +36,10 @@ enum epoll_type {
EPOLL_TYPE_TAP_PASST, EPOLL_TYPE_TAP_PASST,
/* socket listening for qemu socket connections */ /* socket listening for qemu socket connections */
EPOLL_TYPE_TAP_LISTEN, EPOLL_TYPE_TAP_LISTEN,
/* vhost-user command socket */
EPOLL_TYPE_VHOST_CMD,
/* vhost-user kick event socket */
EPOLL_TYPE_VHOST_KICK,
EPOLL_NUM_TYPES, EPOLL_NUM_TYPES,
}; };

1
iov.c
View File

@ -68,7 +68,6 @@ size_t iov_skip_bytes(const struct iovec *iov, size_t n,
* *
* Returns: The number of bytes successfully copied. * Returns: The number of bytes successfully copied.
*/ */
/* cppcheck-suppress unusedFunction */
size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt, size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
size_t offset, const void *buf, size_t bytes) size_t offset, const void *buf, size_t bytes)
{ {

View File

@ -379,12 +379,21 @@ void isolate_postfork(const struct ctx *c)
prctl(PR_SET_DUMPABLE, 0); prctl(PR_SET_DUMPABLE, 0);
if (c->mode == MODE_PASTA) { switch (c->mode) {
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta); case MODE_PASST:
prog.filter = filter_pasta;
} else {
prog.len = (unsigned short)ARRAY_SIZE(filter_passt); prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
prog.filter = filter_passt; prog.filter = filter_passt;
break;
case MODE_PASTA:
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
prog.filter = filter_pasta;
break;
case MODE_VU:
prog.len = (unsigned short)ARRAY_SIZE(filter_vu);
prog.filter = filter_vu;
break;
default:
ASSERT(0);
} }
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||

View File

@ -36,6 +36,17 @@
static int packet_check_range(const struct pool *p, size_t offset, size_t len, static int packet_check_range(const struct pool *p, size_t offset, size_t len,
const char *start, const char *func, int line) const char *start, const char *func, int line)
{ {
if (p->buf_size == 0) {
int ret;
ret = vu_packet_check_range((void *)p->buf, offset, len, start);
if (ret == -1)
trace("cannot find region, %s:%i", func, line);
return ret;
}
if (start < p->buf) { if (start < p->buf) {
trace("packet start %p before buffer start %p, " trace("packet start %p before buffer start %p, "
"%s:%i", (void *)start, (void *)p->buf, func, line); "%s:%i", (void *)start, (void *)p->buf, func, line);

View File

@ -8,8 +8,10 @@
/** /**
* struct pool - Generic pool of packets stored in a buffer * struct pool - Generic pool of packets stored in a buffer
* @buf: Buffer storing packet descriptors * @buf: Buffer storing packet descriptors,
* @buf_size: Total size of buffer * a struct vu_dev_region array for passt vhost-user mode
* @buf_size: Total size of buffer,
* 0 for passt vhost-user mode
* @size: Number of usable descriptors for the pool * @size: Number of usable descriptors for the pool
* @count: Number of used descriptors for the pool * @count: Number of used descriptors for the pool
* @pkt: Descriptors: see macros below * @pkt: Descriptors: see macros below
@ -22,6 +24,8 @@ struct pool {
struct iovec pkt[1]; struct iovec pkt[1];
}; };
int vu_packet_check_range(void *buf, size_t offset, size_t len,
const char *start);
void packet_add_do(struct pool *p, size_t len, const char *start, void packet_add_do(struct pool *p, size_t len, const char *start,
const char *func, int line); const char *func, int line);
void *packet_get_do(const struct pool *p, const size_t idx, void *packet_get_do(const struct pool *p, const size_t idx,

10
passt.1
View File

@ -397,12 +397,20 @@ interface address are configured on a given host interface.
.SS \fBpasst\fR-only options .SS \fBpasst\fR-only options
.TP .TP
.BR \-s ", " \-\-socket " " \fIpath .BR \-s ", " \-\-socket-path ", " \-\-socket " " \fIpath
Path for UNIX domain socket used by \fBqemu\fR(1) or \fBqrap\fR(1) to connect to Path for UNIX domain socket used by \fBqemu\fR(1) or \fBqrap\fR(1) to connect to
\fBpasst\fR. \fBpasst\fR.
Default is to probe a free socket, not accepting connections, starting from Default is to probe a free socket, not accepting connections, starting from
\fI/tmp/passt_1.socket\fR to \fI/tmp/passt_64.socket\fR. \fI/tmp/passt_1.socket\fR to \fI/tmp/passt_64.socket\fR.
.TP
.BR \-\-vhost-user
Enable vhost-user. The vhost-user command socket is provided by \fB--socket\fR.
.TP
.BR \-\-print-capabilities
Print back-end capabilities in JSON format, only meaningful for vhost-user mode.
.TP .TP
.BR \-F ", " \-\-fd " " \fIFD .BR \-F ", " \-\-fd " " \fIFD
Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened

View File

@ -50,6 +50,7 @@
#include "log.h" #include "log.h"
#include "tcp_splice.h" #include "tcp_splice.h"
#include "ndp.h" #include "ndp.h"
#include "vu_common.h"
#define EPOLL_EVENTS 8 #define EPOLL_EVENTS 8
@ -72,6 +73,8 @@ char *epoll_type_str[] = {
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device", [EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
[EPOLL_TYPE_TAP_PASST] = "connected qemu socket", [EPOLL_TYPE_TAP_PASST] = "connected qemu socket",
[EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket", [EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket",
[EPOLL_TYPE_VHOST_CMD] = "vhost-user command socket",
[EPOLL_TYPE_VHOST_KICK] = "vhost-user kick socket",
}; };
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES, static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
"epoll_type_str[] doesn't match enum epoll_type"); "epoll_type_str[] doesn't match enum epoll_type");
@ -346,6 +349,12 @@ loop:
case EPOLL_TYPE_PING: case EPOLL_TYPE_PING:
icmp_sock_handler(&c, ref); icmp_sock_handler(&c, ref);
break; break;
case EPOLL_TYPE_VHOST_CMD:
vu_control_handler(c.vdev, c.fd_tap, eventmask);
break;
case EPOLL_TYPE_VHOST_KICK:
vu_kick_cb(c.vdev, ref, &now);
break;
default: default:
/* Can't happen */ /* Can't happen */
ASSERT(0); ASSERT(0);

View File

@ -25,6 +25,7 @@ union epoll_ref;
#include "fwd.h" #include "fwd.h"
#include "tcp.h" #include "tcp.h"
#include "udp.h" #include "udp.h"
#include "vhost_user.h"
/* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0 /* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0
* (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise * (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise
@ -43,6 +44,7 @@ union epoll_ref;
* @icmp: ICMP-specific reference part * @icmp: ICMP-specific reference part
* @data: Data handled by protocol handlers * @data: Data handled by protocol handlers
* @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone
* @queue: vhost-user queue index for this fd
* @u64: Opaque reference for epoll_ctl() and epoll_wait() * @u64: Opaque reference for epoll_ctl() and epoll_wait()
*/ */
union epoll_ref { union epoll_ref {
@ -58,6 +60,7 @@ union epoll_ref {
union udp_listen_epoll_ref udp; union udp_listen_epoll_ref udp;
uint32_t data; uint32_t data;
int nsdir_fd; int nsdir_fd;
int queue;
}; };
}; };
uint64_t u64; uint64_t u64;
@ -94,6 +97,7 @@ struct fqdn {
enum passt_modes { enum passt_modes {
MODE_PASST, MODE_PASST,
MODE_PASTA, MODE_PASTA,
MODE_VU,
}; };
/** /**
@ -229,6 +233,7 @@ struct ip6_ctx {
* @freebind: Allow binding of non-local addresses for forwarding * @freebind: Allow binding of non-local addresses for forwarding
* @low_wmem: Low probed net.core.wmem_max * @low_wmem: Low probed net.core.wmem_max
* @low_rmem: Low probed net.core.rmem_max * @low_rmem: Low probed net.core.rmem_max
* @vdev: vhost-user device
*/ */
struct ctx { struct ctx {
enum passt_modes mode; enum passt_modes mode;
@ -291,6 +296,8 @@ struct ctx {
int low_wmem; int low_wmem;
int low_rmem; int low_rmem;
struct vu_dev *vdev;
}; };
void proto_update_l2_buf(const unsigned char *eth_d, void proto_update_l2_buf(const unsigned char *eth_d,

1
pcap.c
View File

@ -143,7 +143,6 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
* @iovcnt: Number of buffers (@iov entries) * @iovcnt: Number of buffers (@iov entries)
* @offset: Offset of the L2 frame within the full data length * @offset: Offset of the L2 frame within the full data length
*/ */
/* cppcheck-suppress unusedFunction */
void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
{ {
struct timespec now = { 0 }; struct timespec now = { 0 };

67
tap.c
View File

@ -58,6 +58,8 @@
#include "packet.h" #include "packet.h"
#include "tap.h" #include "tap.h"
#include "log.h" #include "log.h"
#include "vhost_user.h"
#include "vu_common.h"
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
@ -78,16 +80,22 @@ void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
struct iovec iov[2]; struct iovec iov[2];
size_t iovcnt = 0; size_t iovcnt = 0;
if (c->mode == MODE_PASST) { switch (c->mode) {
case MODE_PASST:
iov[iovcnt] = IOV_OF_LVALUE(vnet_len); iov[iovcnt] = IOV_OF_LVALUE(vnet_len);
iovcnt++; iovcnt++;
} /* fall through */
case MODE_PASTA:
iov[iovcnt].iov_base = (void *)data; iov[iovcnt].iov_base = (void *)data;
iov[iovcnt].iov_len = l2len; iov[iovcnt].iov_len = l2len;
iovcnt++; iovcnt++;
tap_send_frames(c, iov, iovcnt, 1); tap_send_frames(c, iov, iovcnt, 1);
break;
case MODE_VU:
vu_send_single(c, data, l2len);
break;
}
} }
/** /**
@ -414,10 +422,18 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
if (!nframes) if (!nframes)
return 0; return 0;
if (c->mode == MODE_PASTA) switch (c->mode) {
case MODE_PASTA:
m = tap_send_frames_pasta(c, iov, bufs_per_frame, nframes); m = tap_send_frames_pasta(c, iov, bufs_per_frame, nframes);
else break;
case MODE_PASST:
m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes); m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes);
break;
case MODE_VU:
/* fall through */
default:
ASSERT(0);
}
if (m < nframes) if (m < nframes)
debug("tap: failed to send %zu frames of %zu", debug("tap: failed to send %zu frames of %zu",
@ -976,7 +992,7 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
* tap_sock_reset() - Handle closing or failure of connect AF_UNIX socket * tap_sock_reset() - Handle closing or failure of connect AF_UNIX socket
* @c: Execution context * @c: Execution context
*/ */
static void tap_sock_reset(struct ctx *c) void tap_sock_reset(struct ctx *c)
{ {
info("Client connection closed%s", c->one_off ? ", exiting" : ""); info("Client connection closed%s", c->one_off ? ", exiting" : "");
@ -987,6 +1003,8 @@ static void tap_sock_reset(struct ctx *c)
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_tap, NULL); epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_tap, NULL);
close(c->fd_tap); close(c->fd_tap);
c->fd_tap = -1; c->fd_tap = -1;
if (c->mode == MODE_VU)
vu_cleanup(c->vdev);
} }
/** /**
@ -1207,6 +1225,11 @@ static void tap_backend_show_hints(struct ctx *c)
info("or qrap, for earlier qemu versions:"); info("or qrap, for earlier qemu versions:");
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio"); info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
break; break;
case MODE_VU:
info("You can start qemu with:");
info(" kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n",
c->sock_path);
break;
} }
} }
@ -1234,8 +1257,8 @@ static void tap_sock_unix_init(const struct ctx *c)
*/ */
void tap_listen_handler(struct ctx *c, uint32_t events) void tap_listen_handler(struct ctx *c, uint32_t events)
{ {
union epoll_ref ref = { .type = EPOLL_TYPE_TAP_PASST };
struct epoll_event ev = { 0 }; struct epoll_event ev = { 0 };
union epoll_ref ref = { 0 };
int v = INT_MAX / 2; int v = INT_MAX / 2;
struct ucred ucred; struct ucred ucred;
socklen_t len; socklen_t len;
@ -1275,6 +1298,10 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
trace("tap: failed to set SO_SNDBUF to %i", v); trace("tap: failed to set SO_SNDBUF to %i", v);
ref.fd = c->fd_tap; ref.fd = c->fd_tap;
if (c->mode == MODE_VU)
ref.type = EPOLL_TYPE_VHOST_CMD;
else
ref.type = EPOLL_TYPE_TAP_PASST;
ev.events = EPOLLIN | EPOLLRDHUP; ev.events = EPOLLIN | EPOLLRDHUP;
ev.data.u64 = ref.u64; ev.data.u64 = ref.u64;
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
@ -1341,7 +1368,7 @@ static void tap_sock_tun_init(struct ctx *c)
* @base: Buffer base * @base: Buffer base
* @size Buffer size * @size Buffer size
*/ */
static void tap_sock_update_pool(void *base, size_t size) void tap_sock_update_pool(void *base, size_t size)
{ {
int i; int i;
@ -1361,6 +1388,9 @@ static void tap_sock_update_pool(void *base, size_t size)
*/ */
void tap_backend_init(struct ctx *c) void tap_backend_init(struct ctx *c)
{ {
if (c->mode == MODE_VU)
tap_sock_update_pool(NULL, 0);
else
tap_sock_update_pool(pkt_buf, sizeof(pkt_buf)); tap_sock_update_pool(pkt_buf, sizeof(pkt_buf));
if (c->fd_tap != -1) { /* Passed as --fd */ if (c->fd_tap != -1) { /* Passed as --fd */
@ -1369,10 +1399,17 @@ void tap_backend_init(struct ctx *c)
ASSERT(c->one_off); ASSERT(c->one_off);
ref.fd = c->fd_tap; ref.fd = c->fd_tap;
if (c->mode == MODE_PASST) switch (c->mode) {
case MODE_PASST:
ref.type = EPOLL_TYPE_TAP_PASST; ref.type = EPOLL_TYPE_TAP_PASST;
else break;
case MODE_PASTA:
ref.type = EPOLL_TYPE_TAP_PASTA; ref.type = EPOLL_TYPE_TAP_PASTA;
break;
case MODE_VU:
ref.type = EPOLL_TYPE_VHOST_CMD;
break;
}
ev.events = EPOLLIN | EPOLLRDHUP; ev.events = EPOLLIN | EPOLLRDHUP;
ev.data.u64 = ref.u64; ev.data.u64 = ref.u64;
@ -1380,9 +1417,14 @@ void tap_backend_init(struct ctx *c)
return; return;
} }
if (c->mode == MODE_PASTA) { switch (c->mode) {
case MODE_PASTA:
tap_sock_tun_init(c); tap_sock_tun_init(c);
} else { break;
case MODE_VU:
vu_init(c);
/* fall through */
case MODE_PASST:
tap_sock_unix_init(c); tap_sock_unix_init(c);
/* In passt mode, we don't know the guest's MAC address until it /* In passt mode, we don't know the guest's MAC address until it
@ -1390,6 +1432,7 @@ void tap_backend_init(struct ctx *c)
* first packets will reach it. * first packets will reach it.
*/ */
memset(&c->guest_mac, 0xff, sizeof(c->guest_mac)); memset(&c->guest_mac, 0xff, sizeof(c->guest_mac));
break;
} }
tap_backend_show_hints(c); tap_backend_show_hints(c);

3
tap.h
View File

@ -40,6 +40,7 @@ static inline struct iovec tap_hdr_iov(const struct ctx *c,
*/ */
static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
{ {
if (thdr)
thdr->vnet_len = htonl(l2len); thdr->vnet_len = htonl(l2len);
} }
@ -68,6 +69,8 @@ void tap_handler_pasta(struct ctx *c, uint32_t events,
void tap_handler_passt(struct ctx *c, uint32_t events, void tap_handler_passt(struct ctx *c, uint32_t events,
const struct timespec *now); const struct timespec *now);
int tap_sock_unix_open(char *sock_path); int tap_sock_unix_open(char *sock_path);
void tap_sock_reset(struct ctx *c);
void tap_sock_update_pool(void *base, size_t size);
void tap_backend_init(struct ctx *c); void tap_backend_init(struct ctx *c);
void tap_flush_pools(void); void tap_flush_pools(void);
void tap_handler(struct ctx *c, const struct timespec *now); void tap_handler(struct ctx *c, const struct timespec *now);

7
tcp.c
View File

@ -304,6 +304,7 @@
#include "flow_table.h" #include "flow_table.h"
#include "tcp_internal.h" #include "tcp_internal.h"
#include "tcp_buf.h" #include "tcp_buf.h"
#include "tcp_vu.h"
/* MSS rounding: see SET_MSS() */ /* MSS rounding: see SET_MSS() */
#define MSS_DEFAULT 536 #define MSS_DEFAULT 536
@ -1312,6 +1313,9 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
int flags) int flags)
{ {
if (c->mode == MODE_VU)
return tcp_vu_send_flag(c, conn, flags);
return tcp_buf_send_flag(c, conn, flags); return tcp_buf_send_flag(c, conn, flags);
} }
@ -1705,6 +1709,9 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
*/ */
static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{ {
if (c->mode == MODE_VU)
return tcp_vu_data_from_sock(c, conn);
return tcp_buf_data_from_sock(c, conn); return tcp_buf_data_from_sock(c, conn);
} }

513
tcp_vu.c Normal file
View File

@ -0,0 +1,513 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* tcp_vu.c - TCP L2 vhost-user management functions
*
* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*/
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
#include <linux/virtio_net.h>
#include "util.h"
#include "ip.h"
#include "passt.h"
#include "siphash.h"
#include "inany.h"
#include "vhost_user.h"
#include "tcp.h"
#include "pcap.h"
#include "flow.h"
#include "tcp_conn.h"
#include "flow_table.h"
#include "tcp_vu.h"
#include "tap.h"
#include "tcp_internal.h"
#include "checksum.h"
#include "vu_common.h"
static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1];
static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
/**
* tcp_vu_hdrlen() - return the size of the header in level 2 frame (TCP)
* @v6: Set for IPv6 packet
*
* Return: Return the size of the header
*/
static size_t tcp_vu_hdrlen(bool v6)
{
size_t hdrlen;
hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
sizeof(struct ethhdr) + sizeof(struct tcphdr);
if (v6)
hdrlen += sizeof(struct ipv6hdr);
else
hdrlen += sizeof(struct iphdr);
return hdrlen;
}
/**
* tcp_vu_update_check() - Calculate TCP checksum
* @tapside: Address information for one side of the flow
* @iov: Pointer to the array of IO vectors
* @iov_used: Length of the array
*/
static void tcp_vu_update_check(const struct flowside *tapside,
struct iovec *iov, int iov_used)
{
char *base = iov[0].iov_base;
if (inany_v4(&tapside->oaddr)) {
const struct iphdr *iph = vu_ip(base);
tcp_update_check_tcp4(iph, iov, iov_used,
(char *)vu_payloadv4(base) - base);
} else {
const struct ipv6hdr *ip6h = vu_ip(base);
tcp_update_check_tcp6(ip6h, iov, iov_used,
(char *)vu_payloadv6(base) - base);
}
}
/**
* tcp_vu_send_flag() - Send segment with flags to vhost-user (no payload)
* @c: Execution context
* @conn: Connection pointer
* @flags: TCP flags: if not set, send segment only if ACK is due
*
* Return: negative error code on connection reset, 0 otherwise
*/
int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
const struct flowside *tapside = TAPFLOW(conn);
size_t l2len, l4len, optlen, hdrlen;
struct vu_virtq_element flags_elem[2];
struct iovec flags_iov[2];
struct ethhdr *eh;
int elem_cnt;
int nb_ack;
int ret;
hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
hdrlen + sizeof(struct tcp_syn_opts), NULL);
if (elem_cnt != 1)
return -1;
vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1);
eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
if (CONN_V4(conn)) {
struct tcp_payload_t *payload;
struct iphdr *iph;
uint32_t seq;
eh->h_proto = htons(ETH_P_IP);
iph = vu_ip(flags_elem[0].in_sg[0].iov_base);
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
payload = vu_payloadv4(flags_elem[0].in_sg[0].iov_base);
memset(&payload->th, 0, sizeof(payload->th));
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
payload->th.ack = 1;
seq = conn->seq_to_tap;
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
(struct tcp_syn_opts *)payload->data,
&optlen);
if (ret <= 0) {
vu_queue_rewind(vq, 1);
return ret;
}
l4len = tcp_fill_headers4(conn, NULL, iph, payload, optlen,
NULL, seq, true);
l2len = sizeof(*iph);
} else {
struct tcp_payload_t *payload;
struct ipv6hdr *ip6h;
uint32_t seq;
eh->h_proto = htons(ETH_P_IPV6);
ip6h = vu_ip(flags_elem[0].in_sg[0].iov_base);
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
payload = vu_payloadv6(flags_elem[0].in_sg[0].iov_base);
memset(&payload->th, 0, sizeof(payload->th));
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
payload->th.ack = 1;
seq = conn->seq_to_tap;
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
(struct tcp_syn_opts *)payload->data,
&optlen);
if (ret <= 0) {
vu_queue_rewind(vq, 1);
return ret;
}
l4len = tcp_fill_headers6(conn, NULL, ip6h, payload, optlen,
seq, true);
l2len = sizeof(*ip6h);
}
l2len += l4len + sizeof(struct ethhdr);
flags_elem[0].in_sg[0].iov_len = l2len +
sizeof(struct virtio_net_hdr_mrg_rxbuf);
if (*c->pcap) {
tcp_vu_update_check(tapside, &flags_elem[0].in_sg[0], 1);
pcap_iov(&flags_elem[0].in_sg[0], 1,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
nb_ack = 1;
if (flags & DUP_ACK) {
vu_set_element(&flags_elem[1], NULL, &flags_iov[1]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
flags_elem[0].in_sg[0].iov_len, NULL);
if (elem_cnt == 1) {
memcpy(flags_elem[1].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
nb_ack++;
if (*c->pcap)
pcap_iov(&flags_elem[1].in_sg[0], 1, 0);
}
}
vu_flush(vdev, vq, flags_elem, nb_ack);
return 0;
}
/** tcp_vu_sock_recv() - Receive datastream from socket into vhost-user buffers
* @c: Execution context
* @conn: Connection pointer
* @v6: Set for IPv6 connections
* @already_sent: Number of bytes already sent
* @fillsize: Number of bytes we can receive
* @iov_cnt: number of iov (output)
*
* Return: Number of iov entries used to store the data
*/
static ssize_t tcp_vu_sock_recv(const struct ctx *c,
const struct tcp_tap_conn *conn, bool v6,
uint32_t already_sent, size_t fillsize,
int *iov_cnt)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct msghdr mh_sock = { 0 };
uint16_t mss = MSS_GET(conn);
int s = conn->sock;
size_t hdrlen;
int elem_cnt;
ssize_t ret;
*iov_cnt = 0;
hdrlen = tcp_vu_hdrlen(v6);
vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE);
elem_cnt = 0;
while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) {
struct iovec *iov;
size_t frame_size;
int cnt;
if (mss > fillsize)
mss = fillsize;
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
VIRTQUEUE_MAX_SIZE - elem_cnt,
mss + hdrlen, &frame_size);
if (cnt == 0)
break;
frame_size -= hdrlen;
iov = &elem[elem_cnt].in_sg[0];
iov->iov_base = (char *)iov->iov_base + hdrlen;
iov->iov_len -= hdrlen;
fillsize -= frame_size;
elem_cnt += cnt;
/* All the frames must have the same size (except the last one),
* otherwise we will no able to scan the iov array
* to find iov entries with headers
* (headers are spread every frame_size in the the array
*/
if (frame_size < mss)
break;
}
if (peek_offset_cap) {
mh_sock.msg_iov = iov_vu + 1;
mh_sock.msg_iovlen = elem_cnt;
} else {
iov_vu[0].iov_base = tcp_buf_discard;
iov_vu[0].iov_len = already_sent;
mh_sock.msg_iov = iov_vu;
mh_sock.msg_iovlen = elem_cnt + 1;
}
do
ret = recvmsg(s, &mh_sock, MSG_PEEK);
while (ret < 0 && errno == EINTR);
*iov_cnt = elem_cnt;
return ret;
}
/**
* tcp_vu_prepare() - Prepare the frame header
* @c: Execution context
* @conn: Connection pointer
* @first: Pointer to the array of IO vectors
* @dlen: Packet data length
* @check: Checksum, if already known
*/
static void tcp_vu_prepare(const struct ctx *c,
struct tcp_tap_conn *conn, struct iovec *first,
size_t dlen, const uint16_t **check)
{
const struct flowside *toside = TAPFLOW(conn);
char *base = first->iov_base;
struct ethhdr *eh;
/* we guess the first iovec provided by the guest can embed
* all the headers needed by L2 frame
*/
eh = vu_eth(base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
struct tcp_payload_t *payload;
struct iphdr *iph;
ASSERT(first[0].iov_len >= tcp_vu_hdrlen(false));
eh->h_proto = htons(ETH_P_IP);
iph = vu_ip(base);
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP);
payload = vu_payloadv4(base);
memset(&payload->th, 0, sizeof(payload->th));
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
payload->th.ack = 1;
tcp_fill_headers4(conn, NULL, iph, payload, dlen,
*check, conn->seq_to_tap, true);
*check = &iph->check;
} else {
struct tcp_payload_t *payload;
struct ipv6hdr *ip6h;
ASSERT(first[0].iov_len >= tcp_vu_hdrlen(true));
eh->h_proto = htons(ETH_P_IPV6);
ip6h = vu_ip(base);
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP);
payload = vu_payloadv6(base);
memset(&payload->th, 0, sizeof(payload->th));
payload->th.doff = offsetof(struct tcp_payload_t, data) / 4;
payload->th.ack = 1;
tcp_fill_headers6(conn, NULL, ip6h, payload, dlen,
conn->seq_to_tap, true);
}
}
/**
* tcp_vu_data_from_sock() - Handle new data from socket, queue to vhost-user,
* in window
* @c: Execution context
* @conn: Connection pointer
*
* Return: Negative on connection reset, 0 otherwise
*/
int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
const struct flowside *tapside = TAPFLOW(conn);
uint16_t mss = MSS_GET(conn);
size_t hdrlen, fillsize;
int i, iov_cnt, iov_used;
int v6 = CONN_V6(conn);
uint32_t already_sent = 0;
const uint16_t *check;
struct iovec *first;
int frame_size;
int num_buffers;
ssize_t len;
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
flow_err(conn,
"Got packet, but RX virtqueue not usable yet");
return 0;
}
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
if (SEQ_LT(already_sent, 0)) {
/* RFC 761, section 2.1. */
flow_trace(conn, "ACK sequence gap: ACK for %u, sent: %u",
conn->seq_ack_from_tap, conn->seq_to_tap);
conn->seq_to_tap = conn->seq_ack_from_tap;
already_sent = 0;
if (tcp_set_peek_offset(conn->sock, 0)) {
tcp_rst(c, conn);
return -1;
}
}
if (!wnd_scaled || already_sent >= wnd_scaled) {
conn_flag(c, conn, STALLED);
conn_flag(c, conn, ACK_FROM_TAP_DUE);
return 0;
}
/* Set up buffer descriptors we'll fill completely and partially. */
fillsize = wnd_scaled - already_sent;
/* collect the buffers from vhost-user and fill them with the
* data from the socket
*/
len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize, &iov_cnt);
if (len < 0) {
vu_queue_rewind(vq, iov_cnt);
if (errno != EAGAIN && errno != EWOULDBLOCK) {
tcp_rst(c, conn);
return -errno;
}
return 0;
}
if (!len) {
vu_queue_rewind(vq, iov_cnt);
if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
int ret = tcp_vu_send_flag(c, conn, FIN | ACK);
if (ret) {
tcp_rst(c, conn);
return ret;
}
conn_event(c, conn, TAP_FIN_SENT);
}
return 0;
}
if (!peek_offset_cap)
len -= already_sent;
if (len <= 0) {
vu_queue_rewind(vq, iov_cnt);
conn_flag(c, conn, STALLED);
return 0;
}
conn_flag(c, conn, ~STALLED);
/* Likely, some new data was acked too. */
tcp_update_seqack_wnd(c, conn, false, NULL);
/* initialize headers */
hdrlen = tcp_vu_hdrlen(v6);
iov_used = 0;
num_buffers = 0;
check = NULL;
frame_size = 0;
/* iov_vu is an array of buffers and the buffer size can be
* smaller than the frame size we want to use but with
* num_buffer we can merge several virtio iov buffers in one packet
* we need only to set the packet headers in the first iov and
* num_buffer to the number of iov entries
*/
for (i = 0; i < iov_cnt && len; i++) {
if (frame_size == 0)
first = &iov_vu[i + 1];
if (iov_vu[i + 1].iov_len > (size_t)len)
iov_vu[i + 1].iov_len = len;
len -= iov_vu[i + 1].iov_len;
iov_used++;
frame_size += iov_vu[i + 1].iov_len;
num_buffers++;
if (frame_size >= mss || len == 0 ||
i + 1 == iov_cnt || !vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
if (i + 1 == iov_cnt)
check = NULL;
/* restore first iovec base: point to vnet header */
first->iov_base = (char *)first->iov_base - hdrlen;
first->iov_len += hdrlen;
vu_set_vnethdr(vdev, first->iov_base, num_buffers);
tcp_vu_prepare(c, conn, first, frame_size, &check);
if (*c->pcap) {
tcp_vu_update_check(tapside, first, num_buffers);
pcap_iov(first, num_buffers,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
conn->seq_to_tap += frame_size;
frame_size = 0;
num_buffers = 0;
}
}
/* release unused buffers */
vu_queue_rewind(vq, iov_cnt - iov_used);
/* send packets */
vu_flush(vdev, vq, elem, iov_used);
conn_flag(c, conn, ACK_FROM_TAP_DUE);
return 0;
}

12
tcp_vu.h Normal file
View File

@ -0,0 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*/
#ifndef TCP_VU_H
#define TCP_VU_H
int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
#endif /*TCP_VU_H */

11
udp.c
View File

@ -110,6 +110,7 @@
#include "log.h" #include "log.h"
#include "flow_table.h" #include "flow_table.h"
#include "udp_internal.h" #include "udp_internal.h"
#include "udp_vu.h"
/* "Spliced" sockets indexed by bound port (host order) */ /* "Spliced" sockets indexed by bound port (host order) */
static int udp_splice_ns [IP_VERSIONS][NUM_PORTS]; static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
@ -628,6 +629,11 @@ void udp_listen_sock_handler(const struct ctx *c,
union epoll_ref ref, uint32_t events, union epoll_ref ref, uint32_t events,
const struct timespec *now) const struct timespec *now)
{ {
if (c->mode == MODE_VU) {
udp_vu_listen_sock_handler(c, ref, events, now);
return;
}
udp_buf_listen_sock_handler(c, ref, events, now); udp_buf_listen_sock_handler(c, ref, events, now);
} }
@ -698,6 +704,11 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now) uint32_t events, const struct timespec *now)
{ {
if (c->mode == MODE_VU) {
udp_vu_reply_sock_handler(c, ref, events, now);
return;
}
udp_buf_reply_sock_handler(c, ref, events, now); udp_buf_reply_sock_handler(c, ref, events, now);
} }

336
udp_vu.c Normal file
View File

@ -0,0 +1,336 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* udp_vu.c - UDP L2 vhost-user management functions
*
* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*/
#include <unistd.h>
#include <assert.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <stdint.h>
#include <stddef.h>
#include <sys/uio.h>
#include <linux/virtio_net.h>
#include "checksum.h"
#include "util.h"
#include "ip.h"
#include "siphash.h"
#include "inany.h"
#include "passt.h"
#include "pcap.h"
#include "log.h"
#include "vhost_user.h"
#include "udp_internal.h"
#include "flow.h"
#include "flow_table.h"
#include "udp_flow.h"
#include "udp_vu.h"
#include "vu_common.h"
static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
/**
* udp_vu_hdrlen() - return the size of the header in level 2 frame (UDP)
* @v6: Set for IPv6 packet
*
* Return: Return the size of the header
*/
static size_t udp_vu_hdrlen(bool v6)
{
size_t hdrlen;
hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
sizeof(struct ethhdr) + sizeof(struct udphdr);
if (v6)
hdrlen += sizeof(struct ipv6hdr);
else
hdrlen += sizeof(struct iphdr);
return hdrlen;
}
static int udp_vu_sock_init(int s, union sockaddr_inany *s_in)
{
struct msghdr msg = {
.msg_name = s_in,
.msg_namelen = sizeof(union sockaddr_inany),
};
return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT);
}
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
* @c: Execution context
* @s: Socket to receive from
* @events: epoll events bitmap
* @v6: Set for IPv6 connections
* @dlen: Size of received data (output)
*
* Return: Number of iov entries used to store the datagram
*/
static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events,
bool v6, ssize_t *dlen)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int iov_cnt, idx, iov_used;
struct msghdr msg = { 0 };
size_t off, hdrlen;
ASSERT(!c->no_udp);
if (!(events & EPOLLIN))
return 0;
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE);
iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE,
IP_MAX_MTU - sizeof(struct udphdr) + hdrlen,
NULL);
if (iov_cnt == 0)
return 0;
/* reserve space for the headers */
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
iov_vu[0].iov_len -= hdrlen;
/* read data from the socket */
msg.msg_iov = iov_vu;
msg.msg_iovlen = iov_cnt;
*dlen = recvmsg(s, &msg, 0);
if (*dlen < 0) {
vu_queue_rewind(vq, iov_cnt);
return 0;
}
/* restore the pointer to the headers address */
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
/* count the numbers of buffer filled by recvmsg() */
idx = iov_skip_bytes(iov_vu, iov_cnt, *dlen + hdrlen, &off);
/* adjust last iov length */
if (idx < iov_cnt)
iov_vu[idx].iov_len = off;
iov_used = idx + !!off;
vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used);
/* release unused buffers */
vu_queue_rewind(vq, iov_cnt - iov_used);
return iov_used;
}
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
* @toside: Address information for one side of the flow
* @dlen: Packet data length
*
* Return: Layer-4 length
*/
static size_t udp_vu_prepare(const struct ctx *c,
const struct flowside *toside, ssize_t dlen)
{
struct ethhdr *eh;
size_t l4len;
/* ethernet header */
eh = vu_eth(iov_vu[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
eh->h_proto = htons(ETH_P_IP);
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
} else {
struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
eh->h_proto = htons(ETH_P_IPV6);
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
}
return l4len;
}
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: ddress information for one side of the flow
* @l4len: IPv4 Payload length
* @iov_used: Length of the array
*/
static void udp_vu_csum(const struct flowside *toside, int iov_used)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
char *base = iov_vu[0].iov_base;
struct udp_payload_t *bp;
if (src4 && dst4) {
bp = vu_payloadv4(base);
csum_udp4(&bp->uh, *src4, *dst4, iov_vu, iov_used,
(char *)&bp->data - base);
} else {
bp = vu_payloadv6(base);
csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6,
iov_vu, iov_used, (char *)&bp->data - base);
}
}
/**
* udp_vu_listen_sock_handler() - Handle new data from socket
* @c: Execution context
* @ref: epoll reference
* @events: epoll events bitmap
* @now: Current timestamp
*/
void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
if (udp_sock_errs(c, ref.fd, events) < 0) {
err("UDP: Unrecoverable error on listening socket:"
" (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
return;
}
for (i = 0; i < UDP_MAX_FRAMES; i++) {
const struct flowside *toside;
union sockaddr_inany s_in;
flow_sidx_t sidx;
uint8_t pif;
ssize_t dlen;
int iov_used;
bool v6;
if (udp_vu_sock_init(ref.fd, &s_in) < 0)
break;
sidx = udp_flow_from_sock(c, ref, &s_in, now);
pif = pif_at_sidx(sidx);
if (pif != PIF_TAP) {
if (flow_sidx_valid(sidx)) {
flow_sidx_t fromsidx = flow_sidx_opposite(sidx);
struct udp_flow *uflow = udp_at_sidx(sidx);
flow_err(uflow,
"No support for forwarding UDP from %s to %s",
pif_name(pif_at_sidx(fromsidx)),
pif_name(pif));
} else {
debug("Discarding 1 datagram without flow");
}
continue;
}
toside = flowside_at_sidx(sidx);
v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &dlen);
if (iov_used <= 0)
break;
udp_vu_prepare(c, toside, dlen);
if (*c->pcap) {
udp_vu_csum(toside, iov_used);
pcap_iov(iov_vu, iov_used,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
vu_flush(vdev, vq, elem, iov_used);
}
}
/**
* udp_vu_reply_sock_handler() - Handle new data from flow specific socket
* @c: Execution context
* @ref: epoll reference
* @events: epoll events bitmap
* @now: Current timestamp
*/
void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now)
{
flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
const struct flowside *toside = flowside_at_sidx(tosidx);
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
int from_s = uflow->s[ref.flowside.sidei];
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
ASSERT(!c->no_udp);
if (udp_sock_errs(c, from_s, events) < 0) {
flow_err(uflow, "Unrecoverable error on reply socket");
flow_err_details(uflow);
udp_flow_close(c, uflow);
return;
}
for (i = 0; i < UDP_MAX_FRAMES; i++) {
uint8_t topif = pif_at_sidx(tosidx);
ssize_t dlen;
int iov_used;
bool v6;
ASSERT(uflow);
if (topif != PIF_TAP) {
uint8_t frompif = pif_at_sidx(ref.flowside);
flow_err(uflow,
"No support for forwarding UDP from %s to %s",
pif_name(frompif), pif_name(topif));
continue;
}
v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
iov_used = udp_vu_sock_recv(c, from_s, events, v6, &dlen);
if (iov_used <= 0)
break;
flow_trace(uflow, "Received 1 datagram on reply socket");
uflow->ts = now->tv_sec;
udp_vu_prepare(c, toside, dlen);
if (*c->pcap) {
udp_vu_csum(toside, iov_used);
pcap_iov(iov_vu, iov_used,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
vu_flush(vdev, vq, elem, iov_used);
}
}

13
udp_vu.h Normal file
View File

@ -0,0 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*/
#ifndef UDP_VU_H
#define UDP_VU_H
void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
#endif /* UDP_VU_H */

View File

@ -48,12 +48,13 @@
/* vhost-user version we are compatible with */ /* vhost-user version we are compatible with */
#define VHOST_USER_VERSION 1 #define VHOST_USER_VERSION 1
static struct vu_dev vdev_storage;
/** /**
* vu_print_capabilities() - print vhost-user capabilities * vu_print_capabilities() - print vhost-user capabilities
* this is part of the vhost-user backend * this is part of the vhost-user backend
* convention. * convention.
*/ */
/* cppcheck-suppress unusedFunction */
void vu_print_capabilities(void) void vu_print_capabilities(void)
{ {
info("{"); info("{");
@ -163,9 +164,7 @@ static void vmsg_close_fds(const struct vhost_user_msg *vmsg)
*/ */
static void vu_remove_watch(const struct vu_dev *vdev, int fd) static void vu_remove_watch(const struct vu_dev *vdev, int fd)
{ {
/* Placeholder to add passt related code */ epoll_ctl(vdev->context->epollfd, EPOLL_CTL_DEL, fd, NULL);
(void)vdev;
(void)fd;
} }
/** /**
@ -487,6 +486,14 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
} }
} }
/* As vu_packet_check_range() has no access to the number of
* memory regions, mark the end of the array with mmap_addr = 0
*/
ASSERT(vdev->nregions < VHOST_USER_MAX_RAM_SLOTS - 1);
vdev->regions[vdev->nregions].mmap_addr = 0;
tap_sock_update_pool(vdev->regions, 0);
return false; return false;
} }
@ -615,9 +622,16 @@ static bool vu_get_vring_base_exec(struct vu_dev *vdev,
*/ */
static void vu_set_watch(const struct vu_dev *vdev, int idx) static void vu_set_watch(const struct vu_dev *vdev, int idx)
{ {
/* Placeholder to add passt related code */ union epoll_ref ref = {
(void)vdev; .type = EPOLL_TYPE_VHOST_KICK,
(void)idx; .fd = vdev->vq[idx].kick_fd,
.queue = idx
};
struct epoll_event ev = { 0 };
ev.data.u64 = ref.u64;
ev.events = EPOLLIN;
epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev);
} }
/** /**
@ -674,7 +688,7 @@ static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
vdev->vq[idx].started = true; vdev->vq[idx].started = true;
if (vdev->vq[idx].kick_fd != -1 && VHOST_USER_IS_QUEUE_TX(idx)) { if (vdev->vq[idx].kick_fd != -1 && VHOST_USER_IS_QUEUE_TX(idx)) {
vu_set_watch(vdev, vdev->vq[idx].kick_fd); vu_set_watch(vdev, idx);
debug("Waiting for kicks on fd: %d for vq: %d", debug("Waiting for kicks on fd: %d for vq: %d",
vdev->vq[idx].kick_fd, idx); vdev->vq[idx].kick_fd, idx);
} }
@ -829,14 +843,14 @@ static bool vu_set_vring_enable_exec(struct vu_dev *vdev,
* @c: execution context * @c: execution context
* @vdev: vhost-user device * @vdev: vhost-user device
*/ */
/* cppcheck-suppress unusedFunction */ void vu_init(struct ctx *c)
void vu_init(struct ctx *c, struct vu_dev *vdev)
{ {
int i; int i;
vdev->context = c; c->vdev = &vdev_storage;
c->vdev->context = c;
for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) { for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) {
vdev->vq[i] = (struct vu_virtq){ c->vdev->vq[i] = (struct vu_virtq){
.call_fd = -1, .call_fd = -1,
.kick_fd = -1, .kick_fd = -1,
.err_fd = -1, .err_fd = -1,
@ -849,7 +863,6 @@ void vu_init(struct ctx *c, struct vu_dev *vdev)
* vu_cleanup() - Reset vhost-user device * vu_cleanup() - Reset vhost-user device
* @vdev: vhost-user device * @vdev: vhost-user device
*/ */
/* cppcheck-suppress unusedFunction */
void vu_cleanup(struct vu_dev *vdev) void vu_cleanup(struct vu_dev *vdev)
{ {
unsigned int i; unsigned int i;
@ -896,8 +909,7 @@ void vu_cleanup(struct vu_dev *vdev)
*/ */
static void vu_sock_reset(struct vu_dev *vdev) static void vu_sock_reset(struct vu_dev *vdev)
{ {
/* Placeholder to add passt related code */ tap_sock_reset(vdev->context);
(void)vdev;
} }
static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev, static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
@ -925,7 +937,6 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
* @fd: vhost-user message socket * @fd: vhost-user message socket
* @events: epoll events * @events: epoll events
*/ */
/* cppcheck-suppress unusedFunction */
void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events) void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events)
{ {
struct vhost_user_msg msg = { 0 }; struct vhost_user_msg msg = { 0 };

View File

@ -183,7 +183,6 @@ struct vhost_user_msg {
* *
* Return: true if the virqueue is enabled, false otherwise * Return: true if the virqueue is enabled, false otherwise
*/ */
/* cppcheck-suppress unusedFunction */
static inline bool vu_queue_enabled(const struct vu_virtq *vq) static inline bool vu_queue_enabled(const struct vu_virtq *vq)
{ {
return vq->enable; return vq->enable;
@ -195,14 +194,13 @@ static inline bool vu_queue_enabled(const struct vu_virtq *vq)
* *
* Return: true if the virqueue is started, false otherwise * Return: true if the virqueue is started, false otherwise
*/ */
/* cppcheck-suppress unusedFunction */
static inline bool vu_queue_started(const struct vu_virtq *vq) static inline bool vu_queue_started(const struct vu_virtq *vq)
{ {
return vq->started; return vq->started;
} }
void vu_print_capabilities(void); void vu_print_capabilities(void);
void vu_init(struct ctx *c, struct vu_dev *vdev); void vu_init(struct ctx *c);
void vu_cleanup(struct vu_dev *vdev); void vu_cleanup(struct vu_dev *vdev);
void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events); void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
#endif /* VHOST_USER_H */ #endif /* VHOST_USER_H */

View File

@ -328,7 +328,6 @@ static bool vring_can_notify(const struct vu_dev *dev, struct vu_virtq *vq)
* @dev: Vhost-user device * @dev: Vhost-user device
* @vq: Virtqueue * @vq: Virtqueue
*/ */
/* cppcheck-suppress unusedFunction */
void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq) void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
{ {
if (!vq->vring.avail) if (!vq->vring.avail)
@ -504,7 +503,6 @@ static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned i
* *
* Return: -1 if there is an error, 0 otherwise * Return: -1 if there is an error, 0 otherwise
*/ */
/* cppcheck-suppress unusedFunction */
int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem) int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem)
{ {
unsigned int head; unsigned int head;
@ -565,7 +563,6 @@ void vu_queue_unpop(struct vu_virtq *vq)
* @vq: Virtqueue * @vq: Virtqueue
* @num: Number of element to unpop * @num: Number of element to unpop
*/ */
/* cppcheck-suppress unusedFunction */
bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num) bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num)
{ {
if (num > vq->inuse) if (num > vq->inuse)
@ -621,7 +618,6 @@ void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
* @len: Size of the element * @len: Size of the element
* @idx: Used ring entry index * @idx: Used ring entry index
*/ */
/* cppcheck-suppress unusedFunction */
void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem, void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem,
unsigned int len, unsigned int idx) unsigned int len, unsigned int idx)
{ {
@ -645,7 +641,6 @@ static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val)
* @vq: Virtqueue * @vq: Virtqueue
* @count: Number of entry to flush * @count: Number of entry to flush
*/ */
/* cppcheck-suppress unusedFunction */
void vu_queue_flush(struct vu_virtq *vq, unsigned int count) void vu_queue_flush(struct vu_virtq *vq, unsigned int count)
{ {
uint16_t old, new; uint16_t old, new;

285
vu_common.c Normal file
View File

@ -0,0 +1,285 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*
* common_vu.c - vhost-user common UDP and TCP functions
*/
#include <unistd.h>
#include <sys/uio.h>
#include <sys/eventfd.h>
#include <linux/virtio_net.h>
#include "util.h"
#include "passt.h"
#include "tap.h"
#include "vhost_user.h"
#include "pcap.h"
#include "vu_common.h"
/**
* vu_packet_check_range() - Check if a given memory zone is contained in
* a mapped guest memory region
* @buf: Array of the available memory regions
* @offset: Offset of data range in packet descriptor
* @size: Length of desired data range
* @start: Start of the packet descriptor
*
* Return: 0 if the zone is in a mapped memory region, -1 otherwise
*/
int vu_packet_check_range(void *buf, size_t offset, size_t len,
const char *start)
{
struct vu_dev_region *dev_region;
for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
char *m = (char *)dev_region->mmap_addr;
if (m <= start &&
start + offset + len <= m + dev_region->mmap_offset +
dev_region->size)
return 0;
}
return -1;
}
/**
* vu_init_elem() - initialize an array of virtqueue element with 1 iov in each
* @elem: Array of virtqueue element to initialize
* @iov: Array of iovec to assign to virtqueue element
* @elem_cnt: Number of virtqueue element
*/
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, int elem_cnt)
{
int i;
for (i = 0; i < elem_cnt; i++)
vu_set_element(&elem[i], NULL, &iov[i]);
}
/**
* vu_collect() - collect virtio buffers from a given virtqueue
* @vdev: vhost-user device
* @vq: virtqueue to collect from
* @elem: Array of virtqueue element
* each element must be initialized with one iovec entry
* in the in_sg array.
* @max_elem: Number of virtqueue element in the array
* @size: Maximum size of the data in the frame
* @frame_size: The total size of the buffers (output)
*
* Return: number of elements used to contain the frame
*/
int vu_collect(struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem,
size_t size, size_t *frame_size)
{
size_t current_size = 0;
int elem_cnt = 0;
while (current_size < size && elem_cnt < max_elem) {
struct iovec *iov;
int ret;
ret = vu_queue_pop(vdev, vq, &elem[elem_cnt]);
if (ret < 0)
break;
if (elem[elem_cnt].in_num < 1) {
warn("virtio-net receive queue contains no in buffers");
vu_queue_detach_element(vq);
break;
}
iov = &elem[elem_cnt].in_sg[0];
if (iov->iov_len > size - current_size)
iov->iov_len = size - current_size;
current_size += iov->iov_len;
elem_cnt++;
if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
break;
}
if (frame_size)
*frame_size = current_size;
return elem_cnt;
}
/**
* vu_set_vnethdr() - set virtio-net headers
* @vdev: vhost-user device
* @vnethdr: Address of the header to set
* @num_buffers: Number of guest buffers of the frame
*/
void vu_set_vnethdr(const struct vu_dev *vdev,
struct virtio_net_hdr_mrg_rxbuf *vnethdr,
int num_buffers)
{
vnethdr->hdr = VU_HEADER;
if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vnethdr->num_buffers = htole16(num_buffers);
}
/**
* vu_flush() - flush all the collected buffers to the vhost-user interface
* @vdev: vhost-user device
* @vq: vhost-user virtqueue
* @elem: virtqueue element array to send back to the virqueue
* @iov_used: Length of the array
*/
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt)
{
int i;
for (i = 0; i < elem_cnt; i++)
vu_queue_fill(vq, &elem[i], elem[i].in_sg[0].iov_len, i);
vu_queue_flush(vq, elem_cnt);
vu_queue_notify(vdev, vq);
}
/**
* vu_handle_tx() - Receive data from the TX virtqueue
* @vdev: vhost-user device
* @index: index of the virtqueue
* @now: Current timestamp
*/
static void vu_handle_tx(struct vu_dev *vdev, int index,
const struct timespec *now)
{
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
struct vu_virtq *vq = &vdev->vq[index];
int hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
int out_sg_count;
int count;
if (!VHOST_USER_IS_QUEUE_TX(index)) {
debug("vhost-user: index %d is not a TX queue", index);
return;
}
tap_flush_pools();
count = 0;
out_sg_count = 0;
while (count < VIRTQUEUE_MAX_SIZE) {
int ret;
vu_set_element(&elem[count], &out_sg[out_sg_count], NULL);
ret = vu_queue_pop(vdev, vq, &elem[count]);
if (ret < 0)
break;
out_sg_count += elem[count].out_num;
if (elem[count].out_num < 1) {
warn("virtio-net transmit queue contains no out buffers");
break;
}
ASSERT(elem[count].out_num == 1);
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
(char *)elem[count].out_sg[0].iov_base + hdrlen);
count++;
}
tap_handler(vdev->context, now);
if (count) {
int i;
for (i = 0; i < count; i++)
vu_queue_fill(vq, &elem[i], 0, i);
vu_queue_flush(vq, count);
vu_queue_notify(vdev, vq);
}
}
/**
* vu_kick_cb() - Called on a kick event to start to receive data
* @vdev: vhost-user device
* @ref: epoll reference information
* @now: Current timestamp
*/
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now)
{
eventfd_t kick_data;
ssize_t rc;
rc = eventfd_read(ref.fd, &kick_data);
if (rc == -1)
die_perror("vhost-user kick eventfd_read()");
debug("vhost-user: ot kick_data: %016"PRIx64" idx:%d",
kick_data, ref.queue);
if (VHOST_USER_IS_QUEUE_TX(ref.queue))
vu_handle_tx(vdev, ref.queue, now);
}
/**
* vu_send_single() - Send a buffer to the front-end using the RX virtqueue
* @c: execution context
* @buf: address of the buffer
* @size: size of the buffer
*
* Return: number of bytes sent, -1 if there is an error
*/
int vu_send_single(const struct ctx *c, const void *buf, size_t size)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
size_t total;
int elem_cnt;
int i;
debug("vu_send_single size %zu", size);
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
err("Got packet, but RX virtqueue not usable yet");
return 0;
}
vu_init_elem(elem, in_sg, VIRTQUEUE_MAX_SIZE);
size += sizeof(struct virtio_net_hdr_mrg_rxbuf);
elem_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, size, &total);
if (total < size) {
debug("vu_send_single: no space to send the data "
"elem_cnt %d size %zd", elem_cnt, total);
goto err;
}
vu_set_vnethdr(vdev, in_sg[0].iov_base, elem_cnt);
total -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
/* copy data from the buffer to the iovec */
iov_from_buf(in_sg, elem_cnt, sizeof(struct virtio_net_hdr_mrg_rxbuf),
buf, total);
if (*c->pcap) {
pcap_iov(in_sg, elem_cnt,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
vu_flush(vdev, vq, elem, elem_cnt);
debug("vhost-user sent %zu", total);
return total;
err:
for (i = 0; i < elem_cnt; i++)
vu_queue_detach_element(vq);
return -1;
}

60
vu_common.h Normal file
View File

@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright Red Hat
* Author: Laurent Vivier <lvivier@redhat.com>
*
* vhost-user common UDP and TCP functions
*/
#ifndef VU_COMMON_H
#define VU_COMMON_H
#include <linux/virtio_net.h>
static inline void *vu_eth(void *base)
{
return ((char *)base + sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
static inline void *vu_ip(void *base)
{
return (struct ethhdr *)vu_eth(base) + 1;
}
static inline void *vu_payloadv4(void *base)
{
return (struct iphdr *)vu_ip(base) + 1;
}
static inline void *vu_payloadv6(void *base)
{
return (struct ipv6hdr *)vu_ip(base) + 1;
}
/**
* vu_set_element() - Initialize a vu_virtq_element
* @elem: Element to initialize
* @out_sg: One out iovec entry to set in elem
* @in_sg: One in iovec entry to set in elem
*/
static inline void vu_set_element(struct vu_virtq_element *elem,
struct iovec *out_sg, struct iovec *in_sg)
{
elem->out_num = !!out_sg;
elem->out_sg = out_sg;
elem->in_num = !!in_sg;
elem->in_sg = in_sg;
}
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
int elem_cnt);
int vu_collect(struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem, size_t size,
size_t *frame_size);
void vu_set_vnethdr(const struct vu_dev *vdev,
struct virtio_net_hdr_mrg_rxbuf *vnethdr,
int num_buffers);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
#endif /* VU_COMMON_H */