From 017abcbb1aa576a5f40d6bc8b905bbdacc5e4b94 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Mon, 20 Jun 2011 10:26:47 +0200 Subject: [PATCH] qemu: domain I/O asynchronous handling For virtio disks and interfaces, qemu allows users to enable or disable ioeventfd feature. This means, qemu can execute domain code, while another thread waits for I/O event. Basically, in some cases it is win, in some loss. This feature is available via 'ioeventfd' attribute in disk and interface element. It accepts 'on' and 'off'. Leaving this attribute out defaults to hypervisor decision. --- docs/formatdomain.html.in | 34 ++++++++++++- docs/schemas/domain.rng | 14 ++++++ src/conf/domain_conf.c | 49 +++++++++++++++++- src/conf/domain_conf.h | 11 ++++ src/libvirt_private.syms | 2 + src/qemu/qemu_capabilities.c | 3 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 13 +++++ tests/qemuhelptest.c | 3 +- .../qemuxml2argv-disk-ioeventfd.args | 11 ++++ .../qemuxml2argv-disk-ioeventfd.xml | 50 +++++++++++++++++++ tests/qemuxml2argvtest.c | 4 ++ 12 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index ab39417a6a..39e1a85877 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -785,7 +785,7 @@ </disk> ... <disk type='network'> - <driver name="qemu" type="raw" io="threads"/> + <driver name="qemu" type="raw" io="threads" ioeventfd="on"/> <source protocol="sheepdog" name="image_name"> <host name="hostname" port="7000"/> </source> @@ -869,6 +869,20 @@ policies on I/O; qemu guests support "threads" and "native". Since 0.8.8 +
  • + The optional ioeventfd attribute allows users to + set + domain I/O asynchronous handling for disk device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + Since 0.9.3 (QEMU and KVM only) + In general you should leave this option alone, unless you + are very certain you know what you are doing. +
  • boot
    @@ -1649,7 +1663,7 @@ qemu-kvm -net nic,model=? /dev/null <source network='default'/> <target dev='vnet1'/> <model type='virtio'/> - <driver name='vhost' txmode='iothread'/> + <driver name='vhost' txmode='iothread' ioeventfd='on'/> </interface> </devices> ... @@ -1697,6 +1711,22 @@ qemu-kvm -net nic,model=? /dev/null contention since the cpu doing the tx isn't necessarily the cpu where the guest generated the packets."

    + In general you should leave this option alone, unless you + are very certain you know what you are doing. + +
    ioeventfd
    +
    + This optional attribute allows users to set + + domain I/O asynchronous handling for interface device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + Since 0.9.3 (QEMU and KVM only)

    + In general you should leave this option alone, unless you are very certain you know what you are doing.
    diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 6de024e3ee..891662df7c 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -778,6 +778,9 @@ + + + @@ -817,6 +820,14 @@ + + + + on + off + + + @@ -1117,6 +1128,9 @@ + + + diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 0f302e4dc6..b5c0f83696 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -163,6 +163,11 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST, "default", "native", "threads") +VIR_ENUM_IMPL(virDomainIoEventFd, VIR_DOMAIN_IO_EVENT_FD_LAST, + "default", + "on", + "off") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", @@ -2015,6 +2020,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, char *cachetag = NULL; char *error_policy = NULL; char *iotag = NULL; + char *ioeventfd = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -2130,6 +2136,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, cachetag = virXMLPropString(cur, "cache"); error_policy = virXMLPropString(cur, "error_policy"); iotag = virXMLPropString(cur, "io"); + ioeventfd = virXMLPropString(cur, "ioeventfd"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -2266,6 +2273,24 @@ virDomainDiskDefParseXML(virCapsPtr caps, } } + if (ioeventfd) { + if (def->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("disk ioeventfd mode supported " + "only for virtio bus")); + goto error; + } + + int i; + if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown disk ioeventfd mode '%s'"), + ioeventfd); + goto error; + } + def->ioeventfd=i; + } + if (devaddr) { if (virDomainParseLegacyDeviceAddress(devaddr, &def->info.addr.pci) < 0) { @@ -2328,6 +2353,7 @@ cleanup: VIR_FREE(cachetag); VIR_FREE(error_policy); VIR_FREE(iotag); + VIR_FREE(ioeventfd); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -2715,6 +2741,7 @@ virDomainNetDefParseXML(virCapsPtr caps, char *model = NULL; char *backend = NULL; char *txmode = NULL; + char *ioeventfd = NULL; char *filter = NULL; char *internal = NULL; char *devaddr = NULL; @@ -2804,6 +2831,7 @@ virDomainNetDefParseXML(virCapsPtr caps, } else if (xmlStrEqual (cur->name, BAD_CAST "driver")) { backend = virXMLPropString(cur, "name"); txmode = virXMLPropString(cur, "txmode"); + ioeventfd = virXMLPropString(cur, "ioeventfd"); } else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) { filter = virXMLPropString(cur, "filter"); VIR_FREE(filterparams); @@ -3020,6 +3048,16 @@ virDomainNetDefParseXML(virCapsPtr caps, } def->driver.virtio.txmode = m; } + if (ioeventfd) { + int i; + if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown interface ioeventfd mode '%s'"), + ioeventfd); + goto error; + } + def->driver.virtio.ioeventfd = i; + } } if (filter != NULL) { @@ -3059,6 +3097,7 @@ cleanup: VIR_FREE(model); VIR_FREE(backend); VIR_FREE(txmode); + VIR_FREE(ioeventfd); VIR_FREE(filter); VIR_FREE(type); VIR_FREE(internal); @@ -8286,6 +8325,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy); const char *iomode = virDomainDiskIoTypeToString(def->iomode); + const char *ioeventfd = virDomainIoEventFdTypeToString(def->ioeventfd); if (!type) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -8317,7 +8357,8 @@ virDomainDiskDefFormat(virBufferPtr buf, " \n", type, device); - if (def->driverName || def->driverType || def->cachemode) { + if (def->driverName || def->driverType || def->cachemode || + def->ioeventfd) { virBufferAsprintf(buf, " driverName) virBufferAsprintf(buf, " name='%s'", def->driverName); @@ -8329,6 +8370,8 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " error_policy='%s'", error_policy); if (def->iomode) virBufferAsprintf(buf, " io='%s'", iomode); + if (def->ioeventfd) + virBufferAsprintf(buf, " ioeventfd='%s'", ioeventfd); virBufferAsprintf(buf, "/>\n"); } @@ -8619,6 +8662,10 @@ virDomainNetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " txmode='%s'", virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode)); } + if (def->driver.virtio.ioeventfd) { + virBufferAsprintf(buf, " ioeventfd='%s'", + virDomainIoEventFdTypeToString(def->driver.virtio.ioeventfd)); + } virBufferAddLit(buf, "/>\n"); } } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index ff5c28d7c7..994ff91bc9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -206,6 +206,14 @@ enum virDomainDiskIo { VIR_DOMAIN_DISK_IO_LAST }; +enum virDomainIoEventFd { + VIR_DOMAIN_IO_EVENT_FD_DEFAULT = 0, + VIR_DOMAIN_IO_EVENT_FD_ON, + VIR_DOMAIN_IO_EVENT_FD_OFF, + + VIR_DOMAIN_IO_EVENT_FD_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -225,6 +233,7 @@ struct _virDomainDiskDef { int error_policy; int bootIndex; int iomode; + int ioeventfd; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -361,6 +370,7 @@ struct _virDomainNetDef { struct { enum virDomainNetBackendType name; /* which driver backend to use */ enum virDomainNetVirtioTxModeType txmode; + enum virDomainIoEventFd ioeventfd; } virtio; } driver; union { @@ -1554,6 +1564,7 @@ VIR_ENUM_DECL(virDomainDiskCache) VIR_ENUM_DECL(virDomainDiskErrorPolicy) VIR_ENUM_DECL(virDomainDiskProtocol) VIR_ENUM_DECL(virDomainDiskIo) +VIR_ENUM_DECL(virDomainIoEventFd) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModel) VIR_ENUM_DECL(virDomainFS) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 09b01592ef..6f253ab30f 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -292,6 +292,8 @@ virDomainHostdevDefFree; virDomainHostdevModeTypeToString; virDomainHostdevSubsysTypeToString; virDomainInputDefFree; +virDomainIoEventFdTypeFromString; +virDomainIoEventFdTypeToString; virDomainLeaseIndex; virDomainLeaseInsert; virDomainLeaseInsertPreAlloc; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 28c89b5bf9..ad62a0773c 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -121,6 +121,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, "device-qxl-vga", "pci-multifunction", /* 60 */ + "virtio-blk-pci.ioeventfd", ); struct qemu_feature_flags { @@ -1207,6 +1208,8 @@ qemuCapsParseDeviceStr(const char *str, virBitmapPtr flags) qemuCapsSet(flags, QEMU_CAPS_VIRTIO_TX_ALG); if (strstr(str, "name \"qxl-vga\"")) qemuCapsSet(flags, QEMU_CAPS_DEVICE_QXL_VGA); + if (strstr(str, "virtio-blk-pci.ioeventfd")) + qemuCapsSet(flags, QEMU_CAPS_VIRTIO_IOEVENTFD); return 0; } diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index e6d2fa3728..0b9c8be754 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -96,6 +96,7 @@ enum qemuCapsFlags { QEMU_CAPS_VIRTIO_TX_ALG = 58, /* -device virtio-net-pci,tx=string */ QEMU_CAPS_DEVICE_QXL_VGA = 59, /* Is the primary and vga campatible qxl device named qxl-vga? */ QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */ + QEMU_CAPS_VIRTIO_IOEVENTFD = 61, /* IOeventFD feature: virtio-{net|blk}-pci.ioeventfd=on/off */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index e9ca1c9f93..b517e1a6e2 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1289,6 +1289,16 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, return 0; } +static int +qemuBuildIoEventFdStr(virBufferPtr buf, + enum virDomainIoEventFd use, + virBitmapPtr qemuCaps) +{ + if (use && qemuCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_IOEVENTFD)) + virBufferAsprintf(buf, ",ioeventfd=%s", + virDomainIoEventFdTypeToString(use)); + return 0; +} #define QEMU_SERIAL_PARAM_ACCEPTED_CHARS \ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" @@ -1556,6 +1566,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk, break; case VIR_DOMAIN_DISK_BUS_VIRTIO: virBufferAddLit(&opt, "virtio-blk-pci"); + qemuBuildIoEventFdStr(&opt, disk->ioeventfd, qemuCaps); qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps); break; case VIR_DOMAIN_DISK_BUS_USB: @@ -1779,6 +1790,8 @@ qemuBuildNicDevStr(virDomainNetDefPtr net, goto error; } } + if (usingVirtio) + qemuBuildIoEventFdStr(&buf, net->driver.virtio.ioeventfd, qemuCaps); if (vlan == -1) virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias); else diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 327a0c7fec..119e771fbc 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -475,7 +475,8 @@ mymain(void) QEMU_CAPS_CCID_PASSTHRU, QEMU_CAPS_CHARDEV_SPICEVMC, QEMU_CAPS_DEVICE_QXL_VGA, - QEMU_CAPS_VIRTIO_TX_ALG); + QEMU_CAPS_VIRTIO_TX_ALG, + QEMU_CAPS_VIRTIO_IOEVENTFD); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args new file mode 100644 index 0000000000..c512f1516f --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args @@ -0,0 +1,11 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/qemu -S -M pc-0.13 -m 1024 -smp 1 -nodefaults \ +-monitor unix:/tmp/test-monitor,server,nowait -no-acpi \ +-boot dc -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \ +-drive file=/var/lib/libvirt/images/f14.img,if=none,id=drive-virtio-disk0 \ +-device virtio-blk-pci,ioeventfd=on,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 \ +-drive file=/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso,if=none,media=cdrom,id=drive-ide0-1-0 \ +-device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 \ +-device virtio-net-pci,tx=bh,ioeventfd=off,vlan=0,id=net0,mac=52:54:00:e5:48:58,bus=pci.0,addr=0x3 \ +-net user,vlan=0,name=hostnet0 -serial pty -usb -vnc 127.0.0.1:-809 -std-vga \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml new file mode 100644 index 0000000000..c565c9f988 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml @@ -0,0 +1,50 @@ + + test + 1048576 + 1 + + hvm + + + + + + destroy + restart + restart + + /usr/bin/qemu + + + + +
    + + + + + + +
    + + + + + + + +
    + + + + + + + + +