mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2025-01-24 05:25:18 +00:00
kvm/virtio: Set IFF_VNET_HDR when setting up tap fds
IFF_VNET_HDR is a tun/tap flag that allows you to send and receive large (i.e. GSO) packets and packets with partial checksums. Setting the flag means that every packet is proceeded by the same header which virtio uses to communicate GSO/csum metadata. By enabling this flag on the tap fds we create, we greatly increase the achievable throughput with virtio_net. However, we need to be careful to only set the flag when a) QEMU has support for this ABI and b) the value of the flag is queryable using the TUNGETIFF ioctl. It's nearly five months since kvm-74 - the first KVM release with this feature - was released. Up until now, we've not added libvirt support because there is no clean way to detect support for this in QEMU at runtime. A brief attempt to add a "info capabilities" monitor command to QEMU floundered. Perfect is the enemy of good enough. Probing the KVM version will suffice for now. Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
e15147c141
commit
b4f62abbf1
32
ChangeLog
32
ChangeLog
@ -1,3 +1,35 @@
|
|||||||
|
Tue Jan 27 10:48:12 IST 2009 Mark McLoughlin <markmc@redhat.com>
|
||||||
|
|
||||||
|
kvm/virtio: Set IFF_VNET_HDR when setting up tap fds
|
||||||
|
|
||||||
|
IFF_VNET_HDR is a tun/tap flag that allows you to send and receive
|
||||||
|
large (i.e. GSO) packets and packets with partial checksums. Setting
|
||||||
|
the flag means that every packet is proceeded by the same header which
|
||||||
|
virtio uses to communicate GSO/csum metadata.
|
||||||
|
|
||||||
|
By enabling this flag on the tap fds we create, we greatly increase
|
||||||
|
the achievable throughput with virtio_net.
|
||||||
|
|
||||||
|
However, we need to be careful to only set the flag when a) QEMU has
|
||||||
|
support for this ABI and b) the value of the flag is queryable using
|
||||||
|
the TUNGETIFF ioctl.
|
||||||
|
|
||||||
|
It's nearly five months since kvm-74 - the first KVM release with this
|
||||||
|
feature - was released. Up until now, we've not added libvirt support
|
||||||
|
because there is no clean way to detect support for this in QEMU at
|
||||||
|
runtime. A brief attempt to add a "info capabilities" monitor command
|
||||||
|
to QEMU floundered. Perfect is the enemy of good enough. Probing the
|
||||||
|
KVM version will suffice for now.
|
||||||
|
|
||||||
|
* src/qemu_conf.[ch] (qemudExtractVersionInfo): detect the KVM version,
|
||||||
|
set QEMUD_CMD_FLAG_VNET_HDR if we have kvm-74 or newer
|
||||||
|
(qemudBuildCommandLine): if qemu support VNET_HDR and this is a
|
||||||
|
virtio interface, then use it
|
||||||
|
|
||||||
|
* src/bridge.[ch] (brProbeVnetHdr): only enable IFF_VNET_HDR if
|
||||||
|
the kernel bits are available
|
||||||
|
(brAddTap): add a @vnet_hdr arg, set IFF_VNET_HDR
|
||||||
|
|
||||||
Tue Jan 27 11:53:32 +0100 2009 Jim Meyering <meyering@redhat.com>
|
Tue Jan 27 11:53:32 +0100 2009 Jim Meyering <meyering@redhat.com>
|
||||||
and Daniel P. Berrange <berrange@redhat.com>
|
and Daniel P. Berrange <berrange@redhat.com>
|
||||||
|
|
||||||
|
67
src/bridge.c
67
src/bridge.c
@ -47,6 +47,7 @@
|
|||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
#include "logging.h"
|
||||||
|
|
||||||
#define MAX_BRIDGE_ID 256
|
#define MAX_BRIDGE_ID 256
|
||||||
|
|
||||||
@ -402,11 +403,68 @@ static int brSetInterfaceMtu(brControl *ctl,
|
|||||||
return ifSetMtu(ctl, ifname, mtu);
|
return ifSetMtu(ctl, ifname, mtu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* brProbeVnetHdr:
|
||||||
|
* @tapfd: a tun/tap file descriptor
|
||||||
|
*
|
||||||
|
* Check whether it is safe to enable the IFF_VNET_HDR flag on the
|
||||||
|
* tap interface.
|
||||||
|
*
|
||||||
|
* Setting IFF_VNET_HDR enables QEMU's virtio_net driver to allow
|
||||||
|
* guests to pass larger (GSO) packets, with partial checksums, to
|
||||||
|
* the host. This greatly increases the achievable throughput.
|
||||||
|
*
|
||||||
|
* It is only useful to enable this when we're setting up a virtio
|
||||||
|
* interface. And it is only *safe* to enable it when we know for
|
||||||
|
* sure that a) qemu has support for IFF_VNET_HDR and b) the running
|
||||||
|
* kernel implements the TUNGETIFF ioctl(), which qemu needs to query
|
||||||
|
* the supplied tapfd.
|
||||||
|
*
|
||||||
|
* Returns 0 in case of success or an errno code in case of failure.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
brProbeVnetHdr(int tapfd)
|
||||||
|
{
|
||||||
|
#if defined(IFF_VNET_HDR) && defined(TUNGETFEATURES) && defined(TUNGETIFF)
|
||||||
|
unsigned int features;
|
||||||
|
struct ifreq dummy;
|
||||||
|
|
||||||
|
if (ioctl(tapfd, TUNGETFEATURES, &features) != 0) {
|
||||||
|
VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
|
||||||
|
"TUNGETFEATURES ioctl() not implemented"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(features & IFF_VNET_HDR)) {
|
||||||
|
VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
|
||||||
|
"TUNGETFEATURES ioctl() reports no IFF_VNET_HDR"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The kernel will always return -1 at this point.
|
||||||
|
* If TUNGETIFF is not implemented then errno == EBADFD.
|
||||||
|
*/
|
||||||
|
if (ioctl(tapfd, TUNGETIFF, &dummy) != -1 || errno != EBADFD) {
|
||||||
|
VIR_INFO0(_("Not enabling IFF_VNET_HDR; "
|
||||||
|
"TUNGETIFF ioctl() not implemented"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
VIR_INFO0(_("Enabling IFF_VNET_HDR"));
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
VIR_INFO0(_("Not enabling IFF_VNET_HDR; disabled at build time"));
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* brAddTap:
|
* brAddTap:
|
||||||
* @ctl: bridge control pointer
|
* @ctl: bridge control pointer
|
||||||
* @bridge: the bridge name
|
* @bridge: the bridge name
|
||||||
* @ifname: the interface name (or name template)
|
* @ifname: the interface name (or name template)
|
||||||
|
* @vnet_hdr: whether to try enabling IFF_VNET_HDR
|
||||||
* @tapfd: file descriptor return value for the new tap device
|
* @tapfd: file descriptor return value for the new tap device
|
||||||
*
|
*
|
||||||
* This function creates a new tap device on a bridge. @ifname can be either
|
* This function creates a new tap device on a bridge. @ifname can be either
|
||||||
@ -420,6 +478,7 @@ int
|
|||||||
brAddTap(brControl *ctl,
|
brAddTap(brControl *ctl,
|
||||||
const char *bridge,
|
const char *bridge,
|
||||||
char **ifname,
|
char **ifname,
|
||||||
|
int vnet_hdr,
|
||||||
int *tapfd)
|
int *tapfd)
|
||||||
{
|
{
|
||||||
int id, subst, fd;
|
int id, subst, fd;
|
||||||
@ -435,6 +494,9 @@ brAddTap(brControl *ctl,
|
|||||||
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
|
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
|
||||||
return errno;
|
return errno;
|
||||||
|
|
||||||
|
if (vnet_hdr)
|
||||||
|
vnet_hdr = brProbeVnetHdr(fd);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct ifreq try;
|
struct ifreq try;
|
||||||
int len;
|
int len;
|
||||||
@ -443,6 +505,11 @@ brAddTap(brControl *ctl,
|
|||||||
|
|
||||||
try.ifr_flags = IFF_TAP|IFF_NO_PI;
|
try.ifr_flags = IFF_TAP|IFF_NO_PI;
|
||||||
|
|
||||||
|
#ifdef IFF_VNET_HDR
|
||||||
|
if (vnet_hdr)
|
||||||
|
try.ifr_flags |= IFF_VNET_HDR;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (subst) {
|
if (subst) {
|
||||||
len = snprintf(try.ifr_name, BR_IFNAME_MAXLEN, *ifname, id);
|
len = snprintf(try.ifr_name, BR_IFNAME_MAXLEN, *ifname, id);
|
||||||
if (len >= BR_IFNAME_MAXLEN) {
|
if (len >= BR_IFNAME_MAXLEN) {
|
||||||
|
@ -63,6 +63,7 @@ int brDeleteInterface (brControl *ctl,
|
|||||||
int brAddTap (brControl *ctl,
|
int brAddTap (brControl *ctl,
|
||||||
const char *bridge,
|
const char *bridge,
|
||||||
char **ifname,
|
char **ifname,
|
||||||
|
int vnet_hdr,
|
||||||
int *tapfd);
|
int *tapfd);
|
||||||
|
|
||||||
int brSetInterfaceUp (brControl *ctl,
|
int brSetInterfaceUp (brControl *ctl,
|
||||||
|
@ -353,7 +353,7 @@ int qemudExtractVersionInfo(const char *qemu,
|
|||||||
int newstdout = -1;
|
int newstdout = -1;
|
||||||
int ret = -1, status;
|
int ret = -1, status;
|
||||||
unsigned int major, minor, micro;
|
unsigned int major, minor, micro;
|
||||||
unsigned int version;
|
unsigned int version, kvm_version;
|
||||||
unsigned int flags = 0;
|
unsigned int flags = 0;
|
||||||
|
|
||||||
if (retflags)
|
if (retflags)
|
||||||
@ -371,10 +371,13 @@ int qemudExtractVersionInfo(const char *qemu,
|
|||||||
if (len < 0)
|
if (len < 0)
|
||||||
goto cleanup2;
|
goto cleanup2;
|
||||||
|
|
||||||
if (sscanf(help, "QEMU PC emulator version %u.%u.%u",
|
if (sscanf(help, "QEMU PC emulator version %u.%u.%u (kvm-%u)",
|
||||||
&major, &minor, µ) != 3) {
|
&major, &minor, µ, &kvm_version) != 4)
|
||||||
|
kvm_version = 0;
|
||||||
|
|
||||||
|
if (!kvm_version && sscanf(help, "QEMU PC emulator version %u.%u.%u",
|
||||||
|
&major, &minor, µ) != 3)
|
||||||
goto cleanup2;
|
goto cleanup2;
|
||||||
}
|
|
||||||
|
|
||||||
version = (major * 1000 * 1000) + (minor * 1000) + micro;
|
version = (major * 1000 * 1000) + (minor * 1000) + micro;
|
||||||
|
|
||||||
@ -394,6 +397,8 @@ int qemudExtractVersionInfo(const char *qemu,
|
|||||||
flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
|
flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
|
||||||
if (version >= 9000)
|
if (version >= 9000)
|
||||||
flags |= QEMUD_CMD_FLAG_VNC_COLON;
|
flags |= QEMUD_CMD_FLAG_VNC_COLON;
|
||||||
|
if (kvm_version >= 74)
|
||||||
|
flags |= QEMUD_CMD_FLAG_VNET_HDR;
|
||||||
|
|
||||||
if (retversion)
|
if (retversion)
|
||||||
*retversion = version;
|
*retversion = version;
|
||||||
@ -404,6 +409,8 @@ int qemudExtractVersionInfo(const char *qemu,
|
|||||||
|
|
||||||
qemudDebug("Version %d %d %d Cooked version: %d, with flags ? %d",
|
qemudDebug("Version %d %d %d Cooked version: %d, with flags ? %d",
|
||||||
major, minor, micro, version, flags);
|
major, minor, micro, version, flags);
|
||||||
|
if (kvm_version)
|
||||||
|
qemudDebug("KVM version %d detected", kvm_version);
|
||||||
|
|
||||||
cleanup2:
|
cleanup2:
|
||||||
VIR_FREE(help);
|
VIR_FREE(help);
|
||||||
@ -467,7 +474,8 @@ qemudNetworkIfaceConnect(virConnectPtr conn,
|
|||||||
int **tapfds,
|
int **tapfds,
|
||||||
int *ntapfds,
|
int *ntapfds,
|
||||||
virDomainNetDefPtr net,
|
virDomainNetDefPtr net,
|
||||||
int vlan)
|
int vlan,
|
||||||
|
int vnet_hdr)
|
||||||
{
|
{
|
||||||
char *brname;
|
char *brname;
|
||||||
char tapfdstr[4+3+32+7];
|
char tapfdstr[4+3+32+7];
|
||||||
@ -517,7 +525,7 @@ qemudNetworkIfaceConnect(virConnectPtr conn,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ((err = brAddTap(driver->brctl, brname,
|
if ((err = brAddTap(driver->brctl, brname,
|
||||||
&net->ifname, &tapfd))) {
|
&net->ifname, vnet_hdr, &tapfd))) {
|
||||||
if (errno == ENOTSUP) {
|
if (errno == ENOTSUP) {
|
||||||
/* In this particular case, give a better diagnostic. */
|
/* In this particular case, give a better diagnostic. */
|
||||||
qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
|
||||||
@ -1029,9 +1037,16 @@ int qemudBuildCommandLine(virConnectPtr conn,
|
|||||||
case VIR_DOMAIN_NET_TYPE_NETWORK:
|
case VIR_DOMAIN_NET_TYPE_NETWORK:
|
||||||
case VIR_DOMAIN_NET_TYPE_BRIDGE:
|
case VIR_DOMAIN_NET_TYPE_BRIDGE:
|
||||||
{
|
{
|
||||||
char *tap = qemudNetworkIfaceConnect(conn, driver,
|
char *tap;
|
||||||
|
int vnet_hdr = 0;
|
||||||
|
|
||||||
|
if (qemuCmdFlags & QEMUD_CMD_FLAG_VNET_HDR &&
|
||||||
|
net->model && STREQ(net->model, "virtio"))
|
||||||
|
vnet_hdr = 1;
|
||||||
|
|
||||||
|
tap = qemudNetworkIfaceConnect(conn, driver,
|
||||||
tapfds, ntapfds,
|
tapfds, ntapfds,
|
||||||
net, vlan);
|
net, vlan, vnet_hdr);
|
||||||
if (tap == NULL)
|
if (tap == NULL)
|
||||||
goto error;
|
goto error;
|
||||||
ADD_ARG(tap);
|
ADD_ARG(tap);
|
||||||
|
@ -48,6 +48,7 @@ enum qemud_cmd_flags {
|
|||||||
QEMUD_CMD_FLAG_NAME = (1 << 5),
|
QEMUD_CMD_FLAG_NAME = (1 << 5),
|
||||||
QEMUD_CMD_FLAG_UUID = (1 << 6),
|
QEMUD_CMD_FLAG_UUID = (1 << 6),
|
||||||
QEMUD_CMD_FLAG_DOMID = (1 << 7), /* Xenner only */
|
QEMUD_CMD_FLAG_DOMID = (1 << 7), /* Xenner only */
|
||||||
|
QEMUD_CMD_FLAG_VNET_HDR = (1 << 8),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Main driver state */
|
/* Main driver state */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user