From e9bd5c0e248aaa73db4d26ed4abf27acc6f93cc8 Mon Sep 17 00:00:00 2001 From: Laine Stump Date: Thu, 3 Feb 2011 15:20:01 -0500 Subject: [PATCH] Add txmode attribute to interface XML for virtio backend This is in response to: https://bugzilla.redhat.com/show_bug.cgi?id=629662 Explanation qemu's virtio-net-pci driver allows setting the algorithm used for tx packets to either "bh" or "timer". This is done by adding ",tx=bh" or ",tx=timer" to the "-device virtio-net-pci" commandline option. 'bh' stands for 'bottom half'; when this is set, packet tx is all done in an iothread in the bottom half of the driver. (In libvirt, this option is called the more descriptive "iothread".) 'timer' means that tx work is done in qemu, and if there is more tx data than can be sent at the present time, a timer is set before qemu moves on to do other things; when the timer fires, another attempt is made to send more data. (libvirt retains the name "timer" for this option.) The resulting difference, according to the qemu developer who added the option is: bh makes tx more asynchronous and reduces latency, but potentially causes more processor bandwidth contention since the cpu doing the tx isn't necessarily the cpu where the guest generated the packets. Solution This patch provides a libvirt domain xml knob to change the option on the qemu commandline, by adding a new attribute "txmode" to the element that can be placed inside any element in a domain definition. It's use would be something like this: ... ... I chose to put this setting as an attribute to rather than as a sub-element to because it is specific to the virtio-net driver, not something that is generally usable by all network drivers. (note that this is the same placement as the "driver name=..." attribute used to choose kernel vs. userland backend for the virtio-net driver.) Actually adding the tx=xxx option to the qemu commandline is only done if the version of qemu being used advertises it in the output of qemu -device virtio-net-pci,? If a particular txmode is requested in the XML, and the option isn't listed in that help output, an UNSUPPORTED_CONFIG error is logged, and the domain fails to start. --- docs/formatdomain.html.in | 50 +++++++++++++++++++ docs/schemas/domain.rng | 8 +++ src/conf/domain_conf.c | 26 +++++++++- src/conf/domain_conf.h | 11 ++++ src/qemu/qemu_capabilities.c | 3 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 27 ++++++++++ .../qemu-kvm-0.12.1.2-rhel61-device | 25 ++++++++++ tests/qemuhelpdata/qemu-kvm-0.13.0-device | 22 ++++++++ tests/qemuhelptest.c | 3 +- .../qemuxml2argv-net-virtio-device.args | 2 +- .../qemuxml2argv-net-virtio-device.xml | 3 ++ tests/qemuxml2argvtest.c | 2 +- tests/qemuxml2xmltest.c | 1 + 14 files changed, 180 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 9f7a1133d1..84b1cab84f 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1378,6 +1378,56 @@ qemu-kvm -net nic,model=? /dev/null ne2k_isa i82551 i82557b i82559er ne2k_pci pcnet rtl8139 e1000 virtio

+
Setting NIC driver-specific options
+ +
+  ...
+  <devices>
+    <interface type='network'>
+      <source network='default'/>
+      <target dev='vnet1'/>
+      <model type='virtio'/>
+      <driver txmode='iothread'/>
+    </interface>
+  </devices>
+  ...
+ +

+ Some NICs may have tunable driver-specific options. These are + set as attributes of the driver sub-element of the + interface definition. Currently the following attributes are + available for the "virtio" NIC driver: +

+ +
+
txmode
+
+ The txmode attribute specifies how to handle + transmission of packets when the transmit buffer is full. The + value can be either 'iothread' or 'timer'. + Since 0.8.8 (QEMU and KVM only)

+ + If set to 'iothread', packet tx is all done in an iothread in + the bottom half of the driver (this option translates into + adding "tx=bh" to the qemu commandline -device virtio-net-pci + option).

+ + If set to 'timer', tx work is done in qemu, and if there is + more tx data than can be sent at the present time, a timer is + set before qemu moves on to do other things; when the timer + fires, another attempt is made to send more data.

+ + The resulting difference, according to the qemu developer who + added the option is: "bh makes tx more asynchronous and reduces + latency, but potentially causes more processor bandwidth + contention since the cpu doing the tx isn't necessarily the + cpu where the guest generated the packets."

+ + In general you should leave this option alone, unless you + are very certain you know what you are doing. +
+
+
Overriding the target element
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 7fe66e3c26..8b215f35df 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -1042,6 +1042,14 @@
               
             
           
+          
+            
+              
+                iothread
+                timer
+              
+            
+          
           
         
       
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index bc4fe29d31..01a5b2ce2c 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -198,6 +198,11 @@ VIR_ENUM_IMPL(virDomainNetBackend, VIR_DOMAIN_NET_BACKEND_TYPE_LAST,
               "qemu",
               "vhost")
 
+VIR_ENUM_IMPL(virDomainNetVirtioTxMode, VIR_DOMAIN_NET_VIRTIO_TX_MODE_LAST,
+              "default",
+              "iothread",
+              "timer")
+
 VIR_ENUM_IMPL(virDomainChrChannelTarget,
               VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_LAST,
               "guestfwd",
@@ -2477,6 +2482,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
     char *port = NULL;
     char *model = NULL;
     char *backend = NULL;
+    char *txmode = NULL;
     char *filter = NULL;
     char *internal = NULL;
     char *devaddr = NULL;
@@ -2565,6 +2571,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
                 model = virXMLPropString(cur, "type");
             } else if (xmlStrEqual (cur->name, BAD_CAST "driver")) {
                 backend = virXMLPropString(cur, "name");
+                txmode = virXMLPropString(cur, "txmode");
             } else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) {
                 filter = virXMLPropString(cur, "filter");
                 VIR_FREE(filterparams);
@@ -2769,6 +2776,18 @@ virDomainNetDefParseXML(virCapsPtr caps,
             }
             def->driver.virtio.name = name;
         }
+        if (txmode != NULL) {
+            int m;
+            if (((m = virDomainNetVirtioTxModeTypeFromString(txmode)) < 0) ||
+                (m == VIR_DOMAIN_NET_VIRTIO_TX_MODE_DEFAULT)) {
+                virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+                                     _("Unknown interface  "
+                                       "has been specified"),
+                                     txmode);
+                goto error;
+            }
+            def->driver.virtio.txmode = m;
+        }
     }
 
     if (filter != NULL) {
@@ -2808,6 +2827,7 @@ cleanup:
     VIR_FREE(bridge);
     VIR_FREE(model);
     VIR_FREE(backend);
+    VIR_FREE(txmode);
     VIR_FREE(filter);
     VIR_FREE(type);
     VIR_FREE(internal);
@@ -6813,12 +6833,16 @@ virDomainNetDefFormat(virBufferPtr buf,
         virBufferEscapeString(buf, "      \n",
                               def->model);
         if (STREQ(def->model, "virtio") &&
-            def->driver.virtio.name) {
+            (def->driver.virtio.name || def->driver.virtio.txmode)) {
             virBufferAddLit(buf, "      driver.virtio.name) {
                 virBufferVSprintf(buf, " name='%s'",
                                   virDomainNetBackendTypeToString(def->driver.virtio.name));
             }
+            if (def->driver.virtio.txmode) {
+                virBufferVSprintf(buf, " txmode='%s'",
+                                  virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode));
+            }
             virBufferAddLit(buf, "/>\n");
         }
     }
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index e05064fb49..30aecccdb6 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -321,6 +321,15 @@ enum virDomainNetBackendType {
     VIR_DOMAIN_NET_BACKEND_TYPE_LAST,
 };
 
+/* the TX algorithm used for virtio interfaces */
+enum virDomainNetVirtioTxModeType {
+    VIR_DOMAIN_NET_VIRTIO_TX_MODE_DEFAULT, /* default for this version of qemu */
+    VIR_DOMAIN_NET_VIRTIO_TX_MODE_IOTHREAD,
+    VIR_DOMAIN_NET_VIRTIO_TX_MODE_TIMER,
+
+    VIR_DOMAIN_NET_VIRTIO_TX_MODE_LAST,
+};
+
 /* the mode type for macvtap devices */
 enum virDomainNetdevMacvtapType {
     VIR_DOMAIN_NETDEV_MACVTAP_MODE_VEPA,
@@ -341,6 +350,7 @@ struct _virDomainNetDef {
     union {
         struct {
             enum virDomainNetBackendType name; /* which driver backend to use */
+            enum virDomainNetVirtioTxModeType txmode;
         } virtio;
     } driver;
     union {
@@ -1371,6 +1381,7 @@ VIR_ENUM_DECL(virDomainFS)
 VIR_ENUM_DECL(virDomainFSAccessMode)
 VIR_ENUM_DECL(virDomainNet)
 VIR_ENUM_DECL(virDomainNetBackend)
+VIR_ENUM_DECL(virDomainNetVirtioTxMode)
 VIR_ENUM_DECL(virDomainChrDevice)
 VIR_ENUM_DECL(virDomainChrChannelTarget)
 VIR_ENUM_DECL(virDomainChrConsoleTarget)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index cc5552cc38..5f08a2012f 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -1074,6 +1074,7 @@ qemuCapsExtractDeviceStr(const char *qemu,
                                "-device", "?",
                                "-device", "pci-assign,?",
                                "-device", "virtio-blk-pci,?",
+                               "-device", "virtio-net-pci,?",
                                NULL);
     virCommandAddEnvPassCommon(cmd);
     /* qemu -help goes to stdout, but qemu -device ? goes to stderr.  */
@@ -1115,6 +1116,8 @@ qemuCapsParseDeviceStr(const char *str, unsigned long long *flags)
         if (strstr(str, "pci-assign.bootindex"))
             *flags |= QEMUD_CMD_FLAG_PCI_BOOTINDEX;
     }
+    if (strstr(str, "virtio-net-pci.tx="))
+        *flags |= QEMUD_CMD_FLAG_VIRTIO_TX_ALG;
 
     return 0;
 }
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index a130a4f993..63cbbb3975 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -92,6 +92,7 @@ enum qemuCapsFlags {
     QEMUD_CMD_FLAG_CCID_PASSTHRU = (1LL << 55), /* -device ccid-card-passthru */
     QEMUD_CMD_FLAG_CHARDEV_SPICEVMC = (1LL << 56), /* newer -chardev spicevmc */
     QEMUD_CMD_FLAG_DEVICE_SPICEVMC = (1LL << 57), /* older -device spicevmc*/
+    QEMUD_CMD_FLAG_VIRTIO_TX_ALG = (1LL << 58), /* -device virtio-net-pci,tx=string */
 };
 
 virCapsPtr qemuCapsInit(virCapsPtr old_caps);
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 7a52f4e9ad..b0eddd4e73 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1606,16 +1606,43 @@ qemuBuildNicDevStr(virDomainNetDefPtr net,
 {
     virBuffer buf = VIR_BUFFER_INITIALIZER;
     const char *nic;
+    bool usingVirtio = false;
 
     if (!net->model) {
         nic = "rtl8139";
     } else if (STREQ(net->model, "virtio")) {
         nic = "virtio-net-pci";
+        usingVirtio = true;
     } else {
         nic = net->model;
     }
 
     virBufferAdd(&buf, nic, strlen(nic));
+    if (usingVirtio && net->driver.virtio.txmode) {
+        if (qemuCmdFlags & QEMUD_CMD_FLAG_VIRTIO_TX_ALG) {
+            virBufferAddLit(&buf, ",tx=");
+            switch (net->driver.virtio.txmode) {
+                case VIR_DOMAIN_NET_VIRTIO_TX_MODE_IOTHREAD:
+                    virBufferAddLit(&buf, "bh");
+                    break;
+
+                case VIR_DOMAIN_NET_VIRTIO_TX_MODE_TIMER:
+                    virBufferAddLit(&buf, "timer");
+                    break;
+                default:
+                    /* this should never happen, if it does, we need
+                     * to add another case to this switch.
+                     */
+                    qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                    _("unrecognized virtio-net-pci 'tx' option"));
+                    goto error;
+            }
+        } else {
+            qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                            _("virtio-net-pci 'tx' option not supported in this QEMU binary"));
+            goto error;
+        }
+    }
     if (vlan == -1)
         virBufferVSprintf(&buf, ",netdev=host%s", net->info.alias);
     else
diff --git a/tests/qemuhelpdata/qemu-kvm-0.12.1.2-rhel61-device b/tests/qemuhelpdata/qemu-kvm-0.12.1.2-rhel61-device
index 8a0e528315..8ac9630b40 100644
--- a/tests/qemuhelpdata/qemu-kvm-0.12.1.2-rhel61-device
+++ b/tests/qemuhelpdata/qemu-kvm-0.12.1.2-rhel61-device
@@ -72,3 +72,28 @@ virtio-blk-pci.ioeventfd=on/off
 virtio-blk-pci.vectors=uint32
 virtio-blk-pci.indirect_desc=on/off
 virtio-blk-pci.scsi=on/off
+virtio-net-pci.vectors=uint32
+virtio-net-pci.indirect_desc=on/off
+virtio-net-pci.csum=on/off
+virtio-net-pci.guest_csum=on/off
+virtio-net-pci.gso=on/off
+virtio-net-pci.guest_tso4=on/off
+virtio-net-pci.guest_tso6=on/off
+virtio-net-pci.guest_ecn=on/off
+virtio-net-pci.guest_ufo=on/off
+virtio-net-pci.host_tso4=on/off
+virtio-net-pci.host_tso6=on/off
+virtio-net-pci.host_ecn=on/off
+virtio-net-pci.host_ufo=on/off
+virtio-net-pci.mrg_rxbuf=on/off
+virtio-net-pci.status=on/off
+virtio-net-pci.ctrl_vq=on/off
+virtio-net-pci.ctrl_rx=on/off
+virtio-net-pci.ctrl_vlan=on/off
+virtio-net-pci.ctrl_rx_extra=on/off
+virtio-net-pci.mac=macaddr
+virtio-net-pci.vlan=vlan
+virtio-net-pci.netdev=netdev
+virtio-net-pci.x-txtimer=uint32
+virtio-net-pci.x-txburst=int32
+virtio-net-pci.tx=string
diff --git a/tests/qemuhelpdata/qemu-kvm-0.13.0-device b/tests/qemuhelpdata/qemu-kvm-0.13.0-device
index b121257874..4888d187f7 100644
--- a/tests/qemuhelpdata/qemu-kvm-0.13.0-device
+++ b/tests/qemuhelpdata/qemu-kvm-0.13.0-device
@@ -68,3 +68,25 @@ name "pci-assign", bus PCI, desc "pass through host pci devices to the guest"
 pci-assign.host=pci-hostaddr
 pci-assign.iommu=uint32
 pci-assign.configfd=string
+virtio-net-pci.vectors=uint32
+virtio-net-pci.indirect_desc=on/off
+virtio-net-pci.csum=on/off
+virtio-net-pci.guest_csum=on/off
+virtio-net-pci.gso=on/off
+virtio-net-pci.guest_tso4=on/off
+virtio-net-pci.guest_tso6=on/off
+virtio-net-pci.guest_ecn=on/off
+virtio-net-pci.guest_ufo=on/off
+virtio-net-pci.host_tso4=on/off
+virtio-net-pci.host_tso6=on/off
+virtio-net-pci.host_ecn=on/off
+virtio-net-pci.host_ufo=on/off
+virtio-net-pci.mrg_rxbuf=on/off
+virtio-net-pci.status=on/off
+virtio-net-pci.ctrl_vq=on/off
+virtio-net-pci.ctrl_rx=on/off
+virtio-net-pci.ctrl_vlan=on/off
+virtio-net-pci.ctrl_rx_extra=on/off
+virtio-net-pci.mac=macaddr
+virtio-net-pci.vlan=vlan
+virtio-net-pci.netdev=netdev
diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c
index 3a04b610f5..11890ab16d 100644
--- a/tests/qemuhelptest.c
+++ b/tests/qemuhelptest.c
@@ -480,7 +480,8 @@ mymain(int argc, char **argv)
             QEMUD_CMD_FLAG_HDA_DUPLEX |
             QEMUD_CMD_FLAG_DRIVE_AIO |
             QEMUD_CMD_FLAG_CCID_PASSTHRU |
-            QEMUD_CMD_FLAG_CHARDEV_SPICEVMC,
+            QEMUD_CMD_FLAG_CHARDEV_SPICEVMC |
+            QEMUD_CMD_FLAG_VIRTIO_TX_ALG,
             12001, 1,  0);
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.args b/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.args
index 92bd889b12..843c3e9933 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.args
+++ b/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.args
@@ -1,6 +1,6 @@
 LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M \
 pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
 unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda \
-/dev/HostVG/QEMUGuest1 -device virtio-net-pci,vlan=0,id=net0,\
+/dev/HostVG/QEMUGuest1 -device virtio-net-pci,tx=bh,vlan=0,id=net0,\
 mac=00:11:22:33:44:55,bus=pci.0,addr=0x2 -net user,vlan=0,name=hostnet0 -usb \
 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.xml b/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.xml
index 572f3cc757..4e7abcd554 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.xml
+++ b/tests/qemuxml2argvdata/qemuxml2argv-net-virtio-device.xml
@@ -17,10 +17,13 @@
     
       
       
+      
+ + diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 96260c0d81..4817d511ce 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -365,7 +365,7 @@ mymain(int argc, char **argv) DO_TEST("net-user", 0, false); DO_TEST("net-virtio", 0, false); DO_TEST("net-virtio-device", QEMUD_CMD_FLAG_DEVICE | - QEMUD_CMD_FLAG_NODEFCONFIG, false); + QEMUD_CMD_FLAG_NODEFCONFIG | QEMUD_CMD_FLAG_VIRTIO_TX_ALG, false); DO_TEST("net-virtio-netdev", QEMUD_CMD_FLAG_DEVICE | QEMUD_CMD_FLAG_NETDEV | QEMUD_CMD_FLAG_NODEFCONFIG, false); DO_TEST("net-eth", 0, false); diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 15d94b764f..67e721b8b2 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -159,6 +159,7 @@ mymain(int argc, char **argv) DO_TEST("misc-no-reboot"); DO_TEST("net-user"); DO_TEST("net-virtio"); + DO_TEST("net-virtio-device"); DO_TEST("net-eth"); DO_TEST("net-eth-ifname"); DO_TEST("sound");