diff --git a/ChangeLog b/ChangeLog index 9aecad2121..afc9dc9bad 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,35 @@ +Tue Jan 27 10:48:12 IST 2009 Mark McLoughlin + + kvm/virtio: Set IFF_VNET_HDR when setting up tap fds + + IFF_VNET_HDR is a tun/tap flag that allows you to send and receive + large (i.e. GSO) packets and packets with partial checksums. Setting + the flag means that every packet is proceeded by the same header which + virtio uses to communicate GSO/csum metadata. + + By enabling this flag on the tap fds we create, we greatly increase + the achievable throughput with virtio_net. + + However, we need to be careful to only set the flag when a) QEMU has + support for this ABI and b) the value of the flag is queryable using + the TUNGETIFF ioctl. + + It's nearly five months since kvm-74 - the first KVM release with this + feature - was released. Up until now, we've not added libvirt support + because there is no clean way to detect support for this in QEMU at + runtime. A brief attempt to add a "info capabilities" monitor command + to QEMU floundered. Perfect is the enemy of good enough. Probing the + KVM version will suffice for now. + + * src/qemu_conf.[ch] (qemudExtractVersionInfo): detect the KVM version, + set QEMUD_CMD_FLAG_VNET_HDR if we have kvm-74 or newer + (qemudBuildCommandLine): if qemu support VNET_HDR and this is a + virtio interface, then use it + + * src/bridge.[ch] (brProbeVnetHdr): only enable IFF_VNET_HDR if + the kernel bits are available + (brAddTap): add a @vnet_hdr arg, set IFF_VNET_HDR + Tue Jan 27 11:53:32 +0100 2009 Jim Meyering and Daniel P. Berrange diff --git a/src/bridge.c b/src/bridge.c index 30afa6d877..9c4ca7422a 100644 --- a/src/bridge.c +++ b/src/bridge.c @@ -47,6 +47,7 @@ #include "internal.h" #include "memory.h" #include "util.h" +#include "logging.h" #define MAX_BRIDGE_ID 256 @@ -402,11 +403,68 @@ static int brSetInterfaceMtu(brControl *ctl, return ifSetMtu(ctl, ifname, mtu); } +/** + * brProbeVnetHdr: + * @tapfd: a tun/tap file descriptor + * + * Check whether it is safe to enable the IFF_VNET_HDR flag on the + * tap interface. + * + * Setting IFF_VNET_HDR enables QEMU's virtio_net driver to allow + * guests to pass larger (GSO) packets, with partial checksums, to + * the host. This greatly increases the achievable throughput. + * + * It is only useful to enable this when we're setting up a virtio + * interface. And it is only *safe* to enable it when we know for + * sure that a) qemu has support for IFF_VNET_HDR and b) the running + * kernel implements the TUNGETIFF ioctl(), which qemu needs to query + * the supplied tapfd. + * + * Returns 0 in case of success or an errno code in case of failure. + */ +static int +brProbeVnetHdr(int tapfd) +{ +#if defined(IFF_VNET_HDR) && defined(TUNGETFEATURES) && defined(TUNGETIFF) + unsigned int features; + struct ifreq dummy; + + if (ioctl(tapfd, TUNGETFEATURES, &features) != 0) { + VIR_INFO0(_("Not enabling IFF_VNET_HDR; " + "TUNGETFEATURES ioctl() not implemented")); + return 0; + } + + if (!(features & IFF_VNET_HDR)) { + VIR_INFO0(_("Not enabling IFF_VNET_HDR; " + "TUNGETFEATURES ioctl() reports no IFF_VNET_HDR")); + return 0; + } + + /* The kernel will always return -1 at this point. + * If TUNGETIFF is not implemented then errno == EBADFD. + */ + if (ioctl(tapfd, TUNGETIFF, &dummy) != -1 || errno != EBADFD) { + VIR_INFO0(_("Not enabling IFF_VNET_HDR; " + "TUNGETIFF ioctl() not implemented")); + return 0; + } + + VIR_INFO0(_("Enabling IFF_VNET_HDR")); + + return 1; +#else + VIR_INFO0(_("Not enabling IFF_VNET_HDR; disabled at build time")); + return 0; +#endif +} + /** * brAddTap: * @ctl: bridge control pointer * @bridge: the bridge name * @ifname: the interface name (or name template) + * @vnet_hdr: whether to try enabling IFF_VNET_HDR * @tapfd: file descriptor return value for the new tap device * * This function creates a new tap device on a bridge. @ifname can be either @@ -420,6 +478,7 @@ int brAddTap(brControl *ctl, const char *bridge, char **ifname, + int vnet_hdr, int *tapfd) { int id, subst, fd; @@ -435,6 +494,9 @@ brAddTap(brControl *ctl, if ((fd = open("/dev/net/tun", O_RDWR)) < 0) return errno; + if (vnet_hdr) + vnet_hdr = brProbeVnetHdr(fd); + do { struct ifreq try; int len; @@ -443,6 +505,11 @@ brAddTap(brControl *ctl, try.ifr_flags = IFF_TAP|IFF_NO_PI; +#ifdef IFF_VNET_HDR + if (vnet_hdr) + try.ifr_flags |= IFF_VNET_HDR; +#endif + if (subst) { len = snprintf(try.ifr_name, BR_IFNAME_MAXLEN, *ifname, id); if (len >= BR_IFNAME_MAXLEN) { diff --git a/src/bridge.h b/src/bridge.h index 2172b92b1a..2491123725 100644 --- a/src/bridge.h +++ b/src/bridge.h @@ -63,6 +63,7 @@ int brDeleteInterface (brControl *ctl, int brAddTap (brControl *ctl, const char *bridge, char **ifname, + int vnet_hdr, int *tapfd); int brSetInterfaceUp (brControl *ctl, diff --git a/src/qemu_conf.c b/src/qemu_conf.c index ee5cf62520..bd5d414ee9 100644 --- a/src/qemu_conf.c +++ b/src/qemu_conf.c @@ -353,7 +353,7 @@ int qemudExtractVersionInfo(const char *qemu, int newstdout = -1; int ret = -1, status; unsigned int major, minor, micro; - unsigned int version; + unsigned int version, kvm_version; unsigned int flags = 0; if (retflags) @@ -371,10 +371,13 @@ int qemudExtractVersionInfo(const char *qemu, if (len < 0) goto cleanup2; - if (sscanf(help, "QEMU PC emulator version %u.%u.%u", - &major, &minor, µ) != 3) { + if (sscanf(help, "QEMU PC emulator version %u.%u.%u (kvm-%u)", + &major, &minor, µ, &kvm_version) != 4) + kvm_version = 0; + + if (!kvm_version && sscanf(help, "QEMU PC emulator version %u.%u.%u", + &major, &minor, µ) != 3) goto cleanup2; - } version = (major * 1000 * 1000) + (minor * 1000) + micro; @@ -394,6 +397,8 @@ int qemudExtractVersionInfo(const char *qemu, flags |= QEMUD_CMD_FLAG_DRIVE_BOOT; if (version >= 9000) flags |= QEMUD_CMD_FLAG_VNC_COLON; + if (kvm_version >= 74) + flags |= QEMUD_CMD_FLAG_VNET_HDR; if (retversion) *retversion = version; @@ -404,6 +409,8 @@ int qemudExtractVersionInfo(const char *qemu, qemudDebug("Version %d %d %d Cooked version: %d, with flags ? %d", major, minor, micro, version, flags); + if (kvm_version) + qemudDebug("KVM version %d detected", kvm_version); cleanup2: VIR_FREE(help); @@ -467,7 +474,8 @@ qemudNetworkIfaceConnect(virConnectPtr conn, int **tapfds, int *ntapfds, virDomainNetDefPtr net, - int vlan) + int vlan, + int vnet_hdr) { char *brname; char tapfdstr[4+3+32+7]; @@ -517,7 +525,7 @@ qemudNetworkIfaceConnect(virConnectPtr conn, } if ((err = brAddTap(driver->brctl, brname, - &net->ifname, &tapfd))) { + &net->ifname, vnet_hdr, &tapfd))) { if (errno == ENOTSUP) { /* In this particular case, give a better diagnostic. */ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -1029,9 +1037,16 @@ int qemudBuildCommandLine(virConnectPtr conn, case VIR_DOMAIN_NET_TYPE_NETWORK: case VIR_DOMAIN_NET_TYPE_BRIDGE: { - char *tap = qemudNetworkIfaceConnect(conn, driver, - tapfds, ntapfds, - net, vlan); + char *tap; + int vnet_hdr = 0; + + if (qemuCmdFlags & QEMUD_CMD_FLAG_VNET_HDR && + net->model && STREQ(net->model, "virtio")) + vnet_hdr = 1; + + tap = qemudNetworkIfaceConnect(conn, driver, + tapfds, ntapfds, + net, vlan, vnet_hdr); if (tap == NULL) goto error; ADD_ARG(tap); diff --git a/src/qemu_conf.h b/src/qemu_conf.h index db2491cdcc..65b08ace26 100644 --- a/src/qemu_conf.h +++ b/src/qemu_conf.h @@ -48,6 +48,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NAME = (1 << 5), QEMUD_CMD_FLAG_UUID = (1 << 6), QEMUD_CMD_FLAG_DOMID = (1 << 7), /* Xenner only */ + QEMUD_CMD_FLAG_VNET_HDR = (1 << 8), }; /* Main driver state */