From 1c7027788678c3ce0e41eb937d71ede33418b6b9 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 7 May 2014 18:07:12 +0200 Subject: [PATCH] nodedev: Export NUMA node locality for PCI devices A PCI device can be associated with a specific NUMA node. Later, when a guest is pinned to one NUMA node the PCI device can be assigned on different NUMA node. This makes DMA transfers travel across nodes and thus results in suboptimal performance. We should expose the NUMA node locality for PCI devices so management applications can make better decisions. Signed-off-by: Michal Privoznik --- docs/formatnode.html.in | 7 +++ docs/schemas/nodedev.rng | 10 +++++ src/conf/node_device_conf.c | 44 +++++++++++++++++++ src/conf/node_device_conf.h | 1 + src/node_device/node_device_udev.c | 12 +++++ tests/nodedevschemadata/pci_1002_71c4.xml | 1 + .../pci_8086_10c9_sriov_pf.xml | 1 + 7 files changed, 76 insertions(+) diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index b424c96fa6..76bf8af1bf 100644 --- a/docs/formatnode.html.in +++ b/docs/formatnode.html.in @@ -110,6 +110,13 @@ have a list of address subelements, one for each VF on this PF. +
numa
+
+ This optional element contains information on the PCI device + with respect to NUMA. For example, the optional + node attribute tells which NUMA node is the PCI + device associated with. +
usb_device
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 81ab4d4856..02d41063d5 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -158,6 +158,16 @@ + + + + + + + + + + diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index e65b5e4578..99fa448e8b 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -346,6 +346,9 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "\n"); } + if (data->pci_dev.numa_node >= 0) + virBufferAsprintf(&buf, "\n", + data->pci_dev.numa_node); break; case VIR_NODE_DEV_CAP_USB_DEV: virBufferAsprintf(&buf, "%d\n", data->usb_dev.bus); @@ -520,6 +523,41 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) return NULL; } +/** + * virNodeDevCapsDefParseIntOptional: + * @xpath: XPath to evaluate + * @ctxt: Context + * @value: Where to store parsed value + * @def: Node device which is parsed + * @invalid_error_fmt: error message to print on invalid format + * + * Returns: -1 on error (invalid int format under @xpath) + * 0 if @xpath was not found (@value is untouched) + * 1 on success + */ +static int +virNodeDevCapsDefParseIntOptional(const char *xpath, + xmlXPathContextPtr ctxt, + int *value, + virNodeDeviceDefPtr def, + const char *invalid_error_fmt) +{ + int ret; + int val; + + ret = virXPathInt(xpath, ctxt, &val); + if (ret < -1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + invalid_error_fmt, + def->name); + return -1; + } else if (ret == -1) { + return 0; + } + *value = val; + return 1; +} + static int virNodeDevCapsDefParseULong(const char *xpath, xmlXPathContextPtr ctxt, @@ -1101,6 +1139,12 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt, goto out; } } + + if (virNodeDevCapsDefParseIntOptional("number(./numa[1]/@node)", ctxt, + &data->pci_dev.numa_node, def, + _("invalid NUMA node ID supplied for '%s'")) < 0) + goto out; + ret = 0; out: ctxt->node = orignode; diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 50e68059a6..50ce4b3f70 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -115,6 +115,7 @@ struct _virNodeDevCapsDef { virPCIDeviceAddressPtr *iommuGroupDevices; size_t nIommuGroupDevices; unsigned int iommuGroupNumber; + int numa_node; } pci_dev; struct { unsigned int bus; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 9a951d9bd0..91fc16f301 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -493,6 +493,18 @@ static int udevProcessPCI(struct udev_device *device, goto out; } + rc = udevGetIntSysfsAttr(device, + "numa_node", + &data->pci_dev.numa_node, + 10); + if (rc == PROPERTY_ERROR) { + goto out; + } else if (rc == PROPERTY_MISSING) { + /* The default value is -1, because it can't be 0 + * as zero is valid node number. */ + data->pci_dev.numa_node = -1; + } + if (!virPCIGetPhysicalFunction(syspath, &data->pci_dev.physical_function)) data->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION; diff --git a/tests/nodedevschemadata/pci_1002_71c4.xml b/tests/nodedevschemadata/pci_1002_71c4.xml index 6de09c1c6e..6d5d85bc3c 100644 --- a/tests/nodedevschemadata/pci_1002_71c4.xml +++ b/tests/nodedevschemadata/pci_1002_71c4.xml @@ -8,5 +8,6 @@ 0 M56GL [Mobility FireGL V5200] ATI Technologies Inc + diff --git a/tests/nodedevschemadata/pci_8086_10c9_sriov_pf.xml b/tests/nodedevschemadata/pci_8086_10c9_sriov_pf.xml index eff89328ad..6e1dc868a6 100644 --- a/tests/nodedevschemadata/pci_8086_10c9_sriov_pf.xml +++ b/tests/nodedevschemadata/pci_8086_10c9_sriov_pf.xml @@ -12,5 +12,6 @@
+