From 35f1095e12abf333903915f96f029612648346d4 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Fri, 6 Jun 2014 18:09:01 +0200 Subject: [PATCH] virnuma: Introduce pages helpers For future work we need two functions that fetches total number of pages and number of free pages for given NUMA node and page size (virNumaGetPageInfo()). Then we need to learn pages of what sizes are supported on given node (virNumaGetPages()). Note that system page size is disabled at the moment as there's one issue connected. If you have a NUMA node with huge pages allocated the kernel would return the normal size of memory for that node. It basically ignores the fact that huge pages steal size from the system memory. Until we resolve this, it's safer to not confuse users and hence not report any system pages yet. Signed-off-by: Michal Privoznik --- src/internal.h | 12 ++ src/libvirt_private.syms | 2 + src/util/virnuma.c | 336 +++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 10 ++ 4 files changed, 360 insertions(+) diff --git a/src/internal.h b/src/internal.h index 0b36de9f4e..a9e2065147 100644 --- a/src/internal.h +++ b/src/internal.h @@ -255,6 +255,18 @@ fprintf(stderr, "Unimplemented block at %s:%d\n", \ __FILE__, __LINE__); +/** + * SWAP: + * + * In place exchange of two values + */ +# define SWAP(a, b) \ + do { \ + (a) = (a) ^ (b); \ + (b) = (a) ^ (b); \ + (a) = (a) ^ (b); \ + } while (0) + /** * virCheckFlags: * @supported: an OR'ed set of supported flags diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6284d1181a..3c412f99e1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1664,6 +1664,8 @@ virNumaGetAutoPlacementAdvice; virNumaGetDistances; virNumaGetMaxNode; virNumaGetNodeMemory; +virNumaGetPageInfo; +virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaSetupMemoryPolicy; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 5814cba7b0..c8e7f40ddf 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -34,12 +34,18 @@ #endif /* WITH_NUMACTL */ +#include +#include + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" #include "virlog.h" #include "viralloc.h" #include "virbitmap.h" +#include "virstring.h" +#include "virfile.h" +#include "nodeinfo.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -504,3 +510,333 @@ virNumaGetDistances(int node ATTRIBUTE_UNUSED, return 0; } #endif + + +#define HUGEPAGES_NUMA_PREFIX "/sys/devices/system/node/" +#define HUGEPAGES_SYSTEM_PREFIX "/sys/kernel/mm/hugepages/" +#define HUGEPAGES_PREFIX "hugepages-" + +static int +virNumaGetHugePageInfoPath(char **path, + int node, + unsigned int page_size, + const char *suffix) +{ + + int ret = -1; + + if (node == -1) { + /* We are aiming at overall system info */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_SYSTEM_PREFIX HUGEPAGES_PREFIX "%ukB/%s", + page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (VIR_STRDUP(*path, HUGEPAGES_SYSTEM_PREFIX) < 0) + goto cleanup; + } + + } else { + /* We are aiming on specific NUMA node */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/" + HUGEPAGES_PREFIX "%ukB/%s", + node, page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/", + node) < 0) + goto cleanup; + } + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetHugePageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and huge page size fetch information on + * total number of huge pages in the pool (both free and taken) + * and count for free huge pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +static int +virNumaGetHugePageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + char *path = NULL; + char *buf = NULL; + char *end; + + if (page_avail) { + if (virNumaGetHugePageInfoPath(&path, node, + page_size, "nr_hugepages") < 0) + goto cleanup; + + if (virFileReadAll(path, 1024, &buf) < 0) + goto cleanup; + + if (virStrToLong_ui(buf, &end, 10, page_avail) < 0 || + *end != '\n') { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse: %s"), + buf); + goto cleanup; + } + VIR_FREE(buf); + VIR_FREE(path); + } + + if (page_free) { + if (virNumaGetHugePageInfoPath(&path, node, + page_size, "free_hugepages") < 0) + goto cleanup; + + if (virFileReadAll(path, 1024, &buf) < 0) + goto cleanup; + + if (virStrToLong_ui(buf, &end, 10, page_free) < 0 || + *end != '\n') { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse: %s"), + buf); + goto cleanup; + } + } + + ret = 0; + cleanup: + VIR_FREE(buf); + VIR_FREE(path); + return ret; +} + +/** + * virNumaGetPageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in (in KiB) + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and page size fetch information on + * total number of pages in the pool (both free and taken) + * and count for free pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +int +virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + long system_page_size = sysconf(_SC_PAGESIZE); + + /* sysconf() returns page size in bytes, + * the @page_size is however in kibibytes */ + if (page_size == system_page_size / 1024) { +#if 0 + unsigned long long memsize, memfree; + + /* TODO: come up with better algorithm that takes huge pages into + * account. The problem is huge pages cut off regular memory. */ + if (node == -1) { + if (nodeGetMemory(&memsize, &memfree) < 0) + goto cleanup; + } else { + if (virNumaGetNodeMemory(node, &memsize, &memfree) < 0) + goto cleanup; + } + + if (page_avail) + *page_avail = memsize / system_page_size; + + if (page_free) + *page_free = memfree / system_page_size; +#else + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("system page size are not supported yet")); + goto cleanup; +#endif /* 0 */ + } else { + if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetPages: + * @node: NUMA node id + * @pages_size: list of pages supported on @node + * @pages_avail: list of the pool sizes on @node + * @pages_free: list of free pages on @node + * @npages: the lists size + * + * For given NUMA node fetch info on pages. The size of pages + * (e.g. 4K, 2M, 1G) is stored into @pages_size, the size of the + * pool is then stored into @pages_avail and the number of free + * pages in the pool is stored into @pages_free. + * + * If you're interested only in some lists, pass NULL to the + * other ones. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise. + */ +int +virNumaGetPages(int node, + unsigned int **pages_size, + unsigned int **pages_avail, + unsigned int **pages_free, + size_t *npages) +{ + int ret = -1; + char *path = NULL; + DIR *dir = NULL; + struct dirent *entry; + unsigned int *tmp_size = NULL, *tmp_avail = NULL, *tmp_free = NULL; + unsigned int ntmp = 0; + size_t i; + bool exchange; + +#if 0 + /* This has to be disabled until the time the issue in + * virNumaGetPageInfo is resolved. Sorry. */ + long system_page_size; + + /* sysconf() returns page size in bytes, + * but we are storing the page size in kibibytes. */ + system_page_size = sysconf(_SC_PAGESIZE) / 1024; + + /* We know that ordinary system pages are supported + * if nothing else is. */ + if (VIR_REALLOC_N(tmp_size, 1) < 0 || + VIR_REALLOC_N(tmp_avail, 1) < 0 || + VIR_REALLOC_N(tmp_free, 1) < 0) + goto cleanup; + + if (virNumaGetPageInfo(node, system_page_size, + &tmp_avail[ntmp], &tmp_free[ntmp]) < 0) + goto cleanup; + tmp_size[ntmp] = system_page_size; + ntmp++; +#endif /* 0 */ + + /* Now that we got ordinary system pages, lets get info on huge pages */ + if (virNumaGetHugePageInfoPath(&path, node, 0, NULL) < 0) + goto cleanup; + + if (!(dir = opendir(path))) { + virReportSystemError(errno, + _("unable to open path: %s"), + path); + goto cleanup; + } + + while (virDirRead(dir, &entry, path) > 0) { + const char *page_name = entry->d_name; + unsigned int page_size, page_avail = 0, page_free = 0; + char *end; + + /* Just to give you a hint, we're dealing with this: + * hugepages-2048kB/ or hugepages-1048576kB/ */ + if (!STRPREFIX(entry->d_name, HUGEPAGES_PREFIX)) + continue; + + page_name += strlen(HUGEPAGES_PREFIX); + + if (virStrToLong_ui(page_name, &end, 10, &page_size) < 0 || + STRCASENEQ(end, "kB")) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse %s"), + entry->d_name); + goto cleanup; + } + + /* Querying more detailed info makes sense only sometimes */ + if ((pages_avail || pages_free) && + virNumaGetHugePageInfo(node, page_size, + &page_avail, &page_free) < 0) + goto cleanup; + + if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_free, ntmp + 1) < 0) + goto cleanup; + + tmp_size[ntmp] = page_size; + tmp_avail[ntmp] = page_avail; + tmp_free[ntmp] = page_free; + ntmp++; + } + + /* Just to produce nice output, sort the arrays by increasing page size */ + do { + exchange = false; + for (i = 0; i < ntmp -1; i++) { + if (tmp_size[i] > tmp_size[i + 1]) { + exchange = true; + SWAP(tmp_size[i], tmp_size[i + 1]); + SWAP(tmp_avail[i], tmp_avail[i + 1]); + SWAP(tmp_free[i], tmp_free[i + 1]); + } + } + } while (exchange); + + if (pages_size) { + *pages_size = tmp_size; + tmp_size = NULL; + } + if (pages_avail) { + *pages_avail = tmp_avail; + tmp_avail = NULL; + } + if (pages_free) { + *pages_free = tmp_free; + tmp_free = NULL; + } + *npages = ntmp; + ret = 0; + cleanup: + VIR_FREE(tmp_free); + VIR_FREE(tmp_avail); + VIR_FREE(tmp_size); + closedir(dir); + VIR_FREE(path); + return ret; +} diff --git a/src/util/virnuma.h b/src/util/virnuma.h index 772296d8bd..a7435dc27a 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -70,4 +70,14 @@ unsigned int virNumaGetMaxCPUs(void); int virNumaGetNodeCPUs(int node, virBitmapPtr *cpus); +int virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free); +int virNumaGetPages(int node, + unsigned int **pages_size, + unsigned int **pages_avail, + unsigned int **pages_free, + size_t *npages) + ATTRIBUTE_NONNULL(5); #endif /* __VIR_NUMA_H__ */