2013-03-18 09:04:01 +00:00
|
|
|
/*
|
|
|
|
* virnuma.c: helper APIs for managing numa
|
|
|
|
*
|
2014-06-06 14:50:39 +00:00
|
|
|
* Copyright (C) 2011-2014 Red Hat, Inc.
|
2013-03-18 09:04:01 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library. If not, see
|
|
|
|
* <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
2013-10-18 12:43:34 +00:00
|
|
|
#define NUMA_MAX_N_CPUS 4096
|
|
|
|
|
2013-03-20 03:35:08 +00:00
|
|
|
#if WITH_NUMACTL
|
|
|
|
# define NUMA_VERSION1_COMPATIBILITY 1
|
|
|
|
# include <numa.h>
|
2013-10-18 12:43:34 +00:00
|
|
|
|
|
|
|
# if LIBNUMA_API_VERSION > 1
|
|
|
|
# undef NUMA_MAX_N_CPUS
|
|
|
|
# define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
|
|
|
|
# endif
|
|
|
|
|
|
|
|
#endif /* WITH_NUMACTL */
|
2013-03-20 03:35:08 +00:00
|
|
|
|
2014-06-06 16:09:01 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
|
2013-03-18 09:04:01 +00:00
|
|
|
#include "virnuma.h"
|
|
|
|
#include "vircommand.h"
|
|
|
|
#include "virerror.h"
|
2013-03-20 03:35:08 +00:00
|
|
|
#include "virlog.h"
|
2013-10-18 14:21:24 +00:00
|
|
|
#include "viralloc.h"
|
|
|
|
#include "virbitmap.h"
|
2014-06-06 16:09:01 +00:00
|
|
|
#include "virstring.h"
|
|
|
|
#include "virfile.h"
|
2016-04-13 17:53:02 +00:00
|
|
|
#include "virhostmem.h"
|
2013-03-18 09:04:01 +00:00
|
|
|
|
|
|
|
#define VIR_FROM_THIS VIR_FROM_NONE
|
|
|
|
|
2014-02-28 12:16:17 +00:00
|
|
|
VIR_LOG_INIT("util.numa");
|
|
|
|
|
2013-03-20 03:35:08 +00:00
|
|
|
|
2013-03-18 09:04:01 +00:00
|
|
|
#if HAVE_NUMAD
|
|
|
|
char *
|
|
|
|
virNumaGetAutoPlacementAdvice(unsigned short vcpus,
|
|
|
|
unsigned long long balloon)
|
|
|
|
{
|
2018-07-28 18:01:40 +00:00
|
|
|
VIR_AUTOPTR(virCommand) cmd = NULL;
|
2013-03-18 09:04:01 +00:00
|
|
|
char *output = NULL;
|
|
|
|
|
|
|
|
cmd = virCommandNewArgList(NUMAD, "-w", NULL);
|
|
|
|
virCommandAddArgFormat(cmd, "%d:%llu", vcpus,
|
|
|
|
VIR_DIV_UP(balloon, 1024));
|
|
|
|
|
|
|
|
virCommandSetOutputBuffer(cmd, &output);
|
|
|
|
|
2019-06-03 16:45:27 +00:00
|
|
|
if (virCommandRun(cmd, NULL) < 0) {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED, "%s",
|
2013-03-18 09:04:01 +00:00
|
|
|
_("Failed to query numad for the "
|
|
|
|
"advisory nodeset"));
|
2019-06-03 16:45:27 +00:00
|
|
|
VIR_FREE(output);
|
|
|
|
}
|
2013-03-18 09:04:01 +00:00
|
|
|
|
|
|
|
return output;
|
|
|
|
}
|
2014-11-05 16:51:27 +00:00
|
|
|
#else /* !HAVE_NUMAD */
|
2013-03-18 09:04:01 +00:00
|
|
|
char *
|
|
|
|
virNumaGetAutoPlacementAdvice(unsigned short vcpus ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long balloon ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
|
|
|
_("numad is not available on this host"));
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-11-05 16:51:27 +00:00
|
|
|
#endif /* !HAVE_NUMAD */
|
2013-03-20 03:35:08 +00:00
|
|
|
|
|
|
|
#if WITH_NUMACTL
|
|
|
|
int
|
2014-11-06 11:16:54 +00:00
|
|
|
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode,
|
|
|
|
virBitmapPtr nodeset)
|
2013-03-20 03:35:08 +00:00
|
|
|
{
|
|
|
|
nodemask_t mask;
|
|
|
|
int node = -1;
|
|
|
|
int ret = -1;
|
Convert 'int i' to 'size_t i' in src/util/ files
Convert the type of loop iterators named 'i', 'j', k',
'ii', 'jj', 'kk', to be 'size_t' instead of 'int' or
'unsigned int', also santizing 'ii', 'jj', 'kk' to use
the normal 'i', 'j', 'k' naming
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2013-07-08 14:09:33 +00:00
|
|
|
int bit = 0;
|
|
|
|
size_t i;
|
2013-03-20 03:35:08 +00:00
|
|
|
int maxnode = 0;
|
|
|
|
|
2014-11-07 08:54:56 +00:00
|
|
|
if (!nodeset)
|
|
|
|
return 0;
|
|
|
|
|
2014-11-06 11:16:54 +00:00
|
|
|
if (!virNumaNodesetIsAvailable(nodeset))
|
2014-11-04 02:44:41 +00:00
|
|
|
return -1;
|
|
|
|
|
util: Correct the NUMA node range checking
There are 2 issues here: First we shouldn't add "1" to the return
value of numa_max_node(), since the semanteme of the error message
was changed, it's not saying about the number of total NUMA nodes
anymore. Second, the value of "bit" is the position of the first
bit which exceeds either numa_max_node() or NUMA_NUM_NODES, it can
be any number in the range, so saying "bigger than $bit" is quite
confused now. For example, assuming there is a NUMA machine which
has 10 NUMA nodes, and one specifies the "nodeset" as "0,5,88",
the error message will be like:
Nodeset is out of range, host cannot support NUMA node bigger than 88
It sounds like all NUMA node number less than 88 is fine, but
actually the maximum NUMA node number the machine supports is 9.
This patch fixes the issues by removing the addition with "1" and
simplifies the error message as "NUMA node $bit is out of range".
Also simplifies the comparision in the while loop by getting the
smaller one of numa_max_node() and NUMA_NUM_NODES up front.
2014-01-22 09:18:44 +00:00
|
|
|
maxnode = numa_max_node();
|
|
|
|
maxnode = maxnode < NUMA_NUM_NODES ? maxnode : NUMA_NUM_NODES;
|
2013-04-22 07:14:56 +00:00
|
|
|
|
2013-03-20 03:35:08 +00:00
|
|
|
/* Convert nodemask to NUMA bitmask. */
|
|
|
|
nodemask_zero(&mask);
|
Convert 'int i' to 'size_t i' in src/util/ files
Convert the type of loop iterators named 'i', 'j', k',
'ii', 'jj', 'kk', to be 'size_t' instead of 'int' or
'unsigned int', also santizing 'ii', 'jj', 'kk' to use
the normal 'i', 'j', 'k' naming
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2013-07-08 14:09:33 +00:00
|
|
|
bit = -1;
|
2014-11-06 11:16:54 +00:00
|
|
|
while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
|
util: Correct the NUMA node range checking
There are 2 issues here: First we shouldn't add "1" to the return
value of numa_max_node(), since the semanteme of the error message
was changed, it's not saying about the number of total NUMA nodes
anymore. Second, the value of "bit" is the position of the first
bit which exceeds either numa_max_node() or NUMA_NUM_NODES, it can
be any number in the range, so saying "bigger than $bit" is quite
confused now. For example, assuming there is a NUMA machine which
has 10 NUMA nodes, and one specifies the "nodeset" as "0,5,88",
the error message will be like:
Nodeset is out of range, host cannot support NUMA node bigger than 88
It sounds like all NUMA node number less than 88 is fine, but
actually the maximum NUMA node number the machine supports is 9.
This patch fixes the issues by removing the addition with "1" and
simplifies the error message as "NUMA node $bit is out of range".
Also simplifies the comparision in the while loop by getting the
smaller one of numa_max_node() and NUMA_NUM_NODES up front.
2014-01-22 09:18:44 +00:00
|
|
|
if (bit > maxnode) {
|
2013-03-20 03:35:08 +00:00
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
util: Correct the NUMA node range checking
There are 2 issues here: First we shouldn't add "1" to the return
value of numa_max_node(), since the semanteme of the error message
was changed, it's not saying about the number of total NUMA nodes
anymore. Second, the value of "bit" is the position of the first
bit which exceeds either numa_max_node() or NUMA_NUM_NODES, it can
be any number in the range, so saying "bigger than $bit" is quite
confused now. For example, assuming there is a NUMA machine which
has 10 NUMA nodes, and one specifies the "nodeset" as "0,5,88",
the error message will be like:
Nodeset is out of range, host cannot support NUMA node bigger than 88
It sounds like all NUMA node number less than 88 is fine, but
actually the maximum NUMA node number the machine supports is 9.
This patch fixes the issues by removing the addition with "1" and
simplifies the error message as "NUMA node $bit is out of range".
Also simplifies the comparision in the while loop by getting the
smaller one of numa_max_node() and NUMA_NUM_NODES up front.
2014-01-22 09:18:44 +00:00
|
|
|
_("NUMA node %d is out of range"), bit);
|
2013-03-20 03:35:08 +00:00
|
|
|
return -1;
|
|
|
|
}
|
Convert 'int i' to 'size_t i' in src/util/ files
Convert the type of loop iterators named 'i', 'j', k',
'ii', 'jj', 'kk', to be 'size_t' instead of 'int' or
'unsigned int', also santizing 'ii', 'jj', 'kk' to use
the normal 'i', 'j', 'k' naming
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2013-07-08 14:09:33 +00:00
|
|
|
nodemask_set(&mask, bit);
|
2013-03-20 03:35:08 +00:00
|
|
|
}
|
|
|
|
|
2014-11-06 11:16:54 +00:00
|
|
|
switch (mode) {
|
2014-06-09 13:00:22 +00:00
|
|
|
case VIR_DOMAIN_NUMATUNE_MEM_STRICT:
|
2013-03-20 03:35:08 +00:00
|
|
|
numa_set_bind_policy(1);
|
|
|
|
numa_set_membind(&mask);
|
|
|
|
numa_set_bind_policy(0);
|
2014-06-09 13:00:22 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED:
|
|
|
|
{
|
2013-03-20 03:35:08 +00:00
|
|
|
int nnodes = 0;
|
|
|
|
for (i = 0; i < NUMA_NUM_NODES; i++) {
|
|
|
|
if (nodemask_isset(&mask, i)) {
|
|
|
|
node = i;
|
|
|
|
nnodes++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nnodes != 1) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
"%s", _("NUMA memory tuning in 'preferred' mode "
|
|
|
|
"only supports single node"));
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
numa_set_bind_policy(0);
|
|
|
|
numa_set_preferred(node);
|
|
|
|
}
|
2014-06-09 13:00:22 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE:
|
|
|
|
numa_set_interleave_mask(&mask);
|
|
|
|
break;
|
2013-03-20 03:35:08 +00:00
|
|
|
|
2014-06-09 13:00:22 +00:00
|
|
|
case VIR_DOMAIN_NUMATUNE_MEM_LAST:
|
|
|
|
break;
|
|
|
|
}
|
2013-03-20 03:35:08 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2014-03-25 06:53:22 +00:00
|
|
|
cleanup:
|
2013-03-20 03:35:08 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2013-10-17 14:57:29 +00:00
|
|
|
|
|
|
|
bool
|
|
|
|
virNumaIsAvailable(void)
|
|
|
|
{
|
|
|
|
return numa_available() != -1;
|
|
|
|
}
|
2013-10-17 15:42:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetMaxNode:
|
|
|
|
* Get the highest node number available on the current system.
|
|
|
|
* (See the node numbers in /sys/devices/system/node/ ).
|
|
|
|
*
|
2014-06-02 10:05:28 +00:00
|
|
|
* Returns the highest NUMA node id on success, -1 on error.
|
2013-10-17 15:42:22 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaGetMaxNode(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!virNumaIsAvailable()) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("NUMA isn't available on this host"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((ret = numa_max_node()) < 0) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("Failed to request maximum NUMA node id"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2013-10-17 16:30:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
2014-06-02 10:05:28 +00:00
|
|
|
* virNumaGetNodeMemory:
|
2013-10-17 16:30:09 +00:00
|
|
|
* @node: identifier of the requested NUMA node
|
|
|
|
* @memsize: returns the total size of memory in the NUMA node
|
|
|
|
* @memfree: returns the total free memory in a NUMA node
|
|
|
|
*
|
|
|
|
* Returns the size of the memory in one NUMA node in bytes via the @size
|
|
|
|
* argument and free memory of a node in the @free argument. The caller has to
|
|
|
|
* guarantee that @node is in range (see virNumaGetMaxNode).
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -1 on error. Does not report errors.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaGetNodeMemory(int node,
|
|
|
|
unsigned long long *memsize,
|
|
|
|
unsigned long long *memfree)
|
|
|
|
{
|
|
|
|
long long node_size;
|
|
|
|
long long node_free;
|
|
|
|
|
|
|
|
if (memsize)
|
|
|
|
*memsize = 0;
|
|
|
|
|
|
|
|
if (memfree)
|
|
|
|
*memfree = 0;
|
|
|
|
|
|
|
|
if ((node_size = numa_node_size64(node, &node_free)) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (memsize)
|
|
|
|
*memsize = node_size;
|
|
|
|
|
|
|
|
if (memfree)
|
|
|
|
*memfree = node_free;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetNodeCPUs:
|
|
|
|
* @node: identifier of the requested NUMA node
|
|
|
|
* @cpus: returns a bitmap of CPUs in @node
|
|
|
|
*
|
|
|
|
* Returns count of CPUs in the selected node and sets the map of the cpus to
|
|
|
|
* @cpus. On error if the @node doesn't exist in the system this function
|
|
|
|
* returns -2 and sets @cpus to NULL. On other errors -1 is returned, @cpus
|
|
|
|
* is set to NULL and an error is reported.
|
|
|
|
*/
|
|
|
|
|
|
|
|
# define n_bits(var) (8 * sizeof(var))
|
|
|
|
# define MASK_CPU_ISSET(mask, cpu) \
|
|
|
|
(((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)
|
|
|
|
int
|
|
|
|
virNumaGetNodeCPUs(int node,
|
|
|
|
virBitmapPtr *cpus)
|
|
|
|
{
|
|
|
|
int ncpus = 0;
|
|
|
|
int max_n_cpus = virNumaGetMaxCPUs();
|
|
|
|
int mask_n_bytes = max_n_cpus / 8;
|
|
|
|
size_t i;
|
2018-07-28 18:01:39 +00:00
|
|
|
VIR_AUTOFREE(unsigned long *) mask = NULL;
|
|
|
|
VIR_AUTOFREE(unsigned long *) allonesmask = NULL;
|
2018-07-28 18:01:40 +00:00
|
|
|
VIR_AUTOPTR(virBitmap) cpumap = NULL;
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
*cpus = NULL;
|
|
|
|
|
|
|
|
if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
|
2018-07-28 18:01:40 +00:00
|
|
|
return -1;
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
|
2018-07-28 18:01:40 +00:00
|
|
|
return -1;
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
memset(allonesmask, 0xff, mask_n_bytes);
|
|
|
|
|
|
|
|
/* The first time this returns -1, ENOENT if node doesn't exist... */
|
|
|
|
if (numa_node_to_cpus(node, mask, mask_n_bytes) < 0) {
|
|
|
|
VIR_WARN("NUMA topology for cell %d is not available, ignoring", node);
|
2018-07-28 18:01:40 +00:00
|
|
|
return -2;
|
2013-10-18 14:21:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* second, third... times it returns an all-1's mask */
|
|
|
|
if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
|
|
|
|
VIR_DEBUG("NUMA topology for cell %d is invalid, ignoring", node);
|
2018-07-28 18:01:40 +00:00
|
|
|
return -2;
|
2013-10-18 14:21:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(cpumap = virBitmapNew(max_n_cpus)))
|
2018-07-28 18:01:40 +00:00
|
|
|
return -1;
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
for (i = 0; i < max_n_cpus; i++) {
|
|
|
|
if (MASK_CPU_ISSET(mask, i)) {
|
|
|
|
ignore_value(virBitmapSetBit(cpumap, i));
|
|
|
|
ncpus++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-28 18:01:40 +00:00
|
|
|
VIR_STEAL_PTR(*cpus, cpumap);
|
|
|
|
return ncpus;
|
2013-10-18 14:21:24 +00:00
|
|
|
}
|
|
|
|
# undef MASK_CPU_ISSET
|
|
|
|
# undef n_bits
|
|
|
|
|
2019-05-30 16:01:35 +00:00
|
|
|
/**
|
|
|
|
* virNumaNodesetToCPUset:
|
|
|
|
* @nodeset: bitmap containing a set of NUMA nodes
|
|
|
|
* @cpuset: return location for a bitmap containing a set of CPUs
|
|
|
|
*
|
|
|
|
* Convert a set of NUMA node to the set of CPUs they contain.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, <0 on failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaNodesetToCPUset(virBitmapPtr nodeset,
|
|
|
|
virBitmapPtr *cpuset)
|
|
|
|
{
|
|
|
|
VIR_AUTOPTR(virBitmap) allNodesCPUs = NULL;
|
|
|
|
size_t nodesetSize;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
*cpuset = NULL;
|
|
|
|
|
|
|
|
if (!nodeset)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
allNodesCPUs = virBitmapNewEmpty();
|
|
|
|
nodesetSize = virBitmapSize(nodeset);
|
|
|
|
|
|
|
|
for (i = 0; i < nodesetSize; i++) {
|
|
|
|
VIR_AUTOPTR(virBitmap) nodeCPUs = NULL;
|
|
|
|
|
|
|
|
if (!virBitmapIsBitSet(nodeset, i))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (virNumaGetNodeCPUs(i, &nodeCPUs) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (virBitmapUnion(allNodesCPUs, nodeCPUs) < 0)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_STEAL_PTR(*cpuset, allNodesCPUs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-05 16:51:27 +00:00
|
|
|
#else /* !WITH_NUMACTL */
|
|
|
|
|
2013-03-20 03:35:08 +00:00
|
|
|
int
|
2014-11-06 11:16:54 +00:00
|
|
|
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode ATTRIBUTE_UNUSED,
|
|
|
|
virBitmapPtr nodeset)
|
2013-03-20 03:35:08 +00:00
|
|
|
{
|
2014-11-06 11:16:54 +00:00
|
|
|
if (!virNumaNodesetIsAvailable(nodeset))
|
2013-03-20 03:35:08 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2013-10-17 14:57:29 +00:00
|
|
|
|
|
|
|
bool
|
|
|
|
virNumaIsAvailable(void)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
2013-10-17 15:42:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
virNumaGetMaxNode(void)
|
|
|
|
{
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("NUMA isn't available on this host"));
|
|
|
|
return -1;
|
|
|
|
}
|
2013-10-17 16:30:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
virNumaGetNodeMemory(int node ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long *memsize,
|
|
|
|
unsigned long long *memfree)
|
|
|
|
{
|
|
|
|
if (memsize)
|
|
|
|
*memsize = 0;
|
|
|
|
|
|
|
|
if (memfree)
|
|
|
|
*memfree = 0;
|
|
|
|
|
|
|
|
VIR_DEBUG("NUMA isn't available on this host");
|
|
|
|
return -1;
|
|
|
|
}
|
2013-10-18 14:21:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED,
|
|
|
|
virBitmapPtr *cpus)
|
|
|
|
{
|
|
|
|
*cpus = NULL;
|
|
|
|
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("NUMA isn't available on this host"));
|
|
|
|
return -1;
|
|
|
|
}
|
2019-05-30 16:01:35 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
virNumaNodesetToCPUset(virBitmapPtr nodeset ATTRIBUTE_UNUSED,
|
|
|
|
virBitmapPtr *cpuset)
|
|
|
|
{
|
|
|
|
*cpuset = NULL;
|
|
|
|
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("NUMA isn't available on this host"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-11-05 16:51:27 +00:00
|
|
|
#endif /* !WITH_NUMACTL */
|
2013-10-18 12:43:34 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetMaxCPUs:
|
|
|
|
*
|
|
|
|
* Get the maximum count of CPUs supportable in the host.
|
|
|
|
*
|
|
|
|
* Returns the count of CPUs supported.
|
|
|
|
*/
|
|
|
|
unsigned int
|
|
|
|
virNumaGetMaxCPUs(void)
|
|
|
|
{
|
|
|
|
return NUMA_MAX_N_CPUS;
|
|
|
|
}
|
2014-06-04 12:39:01 +00:00
|
|
|
|
|
|
|
|
2014-07-21 17:24:21 +00:00
|
|
|
#if WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET
|
2014-06-16 12:29:15 +00:00
|
|
|
/**
|
|
|
|
* virNumaNodeIsAvailable:
|
|
|
|
* @node: node to check
|
|
|
|
*
|
|
|
|
* On some hosts the set of NUMA nodes isn't continuous.
|
|
|
|
* Use this function to test if the @node is available.
|
|
|
|
*
|
|
|
|
* Returns: true if @node is available,
|
|
|
|
* false if @node doesn't exist
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
virNumaNodeIsAvailable(int node)
|
|
|
|
{
|
|
|
|
return numa_bitmask_isbitset(numa_nodes_ptr, node);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-04 12:39:01 +00:00
|
|
|
/**
|
|
|
|
* virNumaGetDistances:
|
|
|
|
* @node: identifier of the requested NUMA node
|
|
|
|
* @distances: array of distances to sibling nodes
|
|
|
|
* @ndistances: size of @distances
|
|
|
|
*
|
|
|
|
* Get array of distances to sibling nodes from @node. If a
|
|
|
|
* distances[x] equals to zero, the node x is not enabled or
|
|
|
|
* doesn't exist. As a special case, if @node itself refers to
|
|
|
|
* disabled or nonexistent NUMA node, then @distances and
|
|
|
|
* @ndistances are set to NULL and zero respectively.
|
|
|
|
*
|
|
|
|
* The distances are a bit of magic. For a local node the value
|
|
|
|
* is 10, for remote it's typically 20 meaning that time penalty
|
|
|
|
* for accessing a remote node is two time bigger than when
|
|
|
|
* accessing a local node.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -1 otherwise.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaGetDistances(int node,
|
|
|
|
int **distances,
|
|
|
|
int *ndistances)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
int max_node;
|
|
|
|
size_t i;
|
|
|
|
|
2014-06-16 12:29:15 +00:00
|
|
|
if (!virNumaNodeIsAvailable(node)) {
|
2014-06-04 12:39:01 +00:00
|
|
|
VIR_DEBUG("Node %d does not exist", node);
|
|
|
|
*distances = NULL;
|
|
|
|
*ndistances = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((max_node = virNumaGetMaxNode()) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2014-06-23 09:49:02 +00:00
|
|
|
if (VIR_ALLOC_N(*distances, max_node + 1) < 0)
|
2014-06-04 12:39:01 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
*ndistances = max_node + 1;
|
|
|
|
|
2014-08-11 23:04:19 +00:00
|
|
|
for (i = 0; i <= max_node; i++) {
|
2014-06-16 12:29:15 +00:00
|
|
|
if (!virNumaNodeIsAvailable(node))
|
2014-06-04 12:39:01 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
(*distances)[i] = numa_distance(node, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
|
|
return ret;
|
|
|
|
}
|
2014-06-16 12:29:15 +00:00
|
|
|
|
2014-11-05 16:51:27 +00:00
|
|
|
#else /* !(WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET) */
|
2014-06-16 12:29:15 +00:00
|
|
|
|
|
|
|
bool
|
|
|
|
virNumaNodeIsAvailable(int node)
|
|
|
|
{
|
|
|
|
int max_node = virNumaGetMaxNode();
|
|
|
|
|
|
|
|
if (max_node < 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Do we have anything better? */
|
2014-11-06 11:17:10 +00:00
|
|
|
return (node >= 0) && (node <= max_node);
|
2014-06-16 12:29:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-04 12:39:01 +00:00
|
|
|
int
|
|
|
|
virNumaGetDistances(int node ATTRIBUTE_UNUSED,
|
|
|
|
int **distances,
|
|
|
|
int *ndistances)
|
|
|
|
{
|
|
|
|
*distances = NULL;
|
|
|
|
*ndistances = 0;
|
2014-07-21 17:24:21 +00:00
|
|
|
VIR_DEBUG("NUMA distance information isn't available on this host");
|
2014-06-04 12:39:01 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2014-11-05 16:51:27 +00:00
|
|
|
#endif /* !(WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET) */
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
|
2014-06-23 13:56:46 +00:00
|
|
|
/* currently all the huge page stuff below is linux only */
|
|
|
|
#ifdef __linux__
|
2014-06-23 07:25:04 +00:00
|
|
|
|
|
|
|
# define HUGEPAGES_NUMA_PREFIX "/sys/devices/system/node/"
|
|
|
|
# define HUGEPAGES_SYSTEM_PREFIX "/sys/kernel/mm/hugepages/"
|
|
|
|
# define HUGEPAGES_PREFIX "hugepages-"
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
virNumaGetHugePageInfoPath(char **path,
|
|
|
|
int node,
|
|
|
|
unsigned int page_size,
|
|
|
|
const char *suffix)
|
|
|
|
{
|
2015-10-21 04:13:41 +00:00
|
|
|
int ret;
|
|
|
|
|
2014-06-06 16:09:01 +00:00
|
|
|
if (node == -1) {
|
|
|
|
/* We are aiming at overall system info */
|
2015-10-21 04:13:41 +00:00
|
|
|
ret = virAsprintf(path,
|
|
|
|
HUGEPAGES_SYSTEM_PREFIX HUGEPAGES_PREFIX "%ukB/%s",
|
2019-02-12 16:25:06 +00:00
|
|
|
page_size, NULLSTR_EMPTY(suffix));
|
2014-06-06 16:09:01 +00:00
|
|
|
} else {
|
|
|
|
/* We are aiming on specific NUMA node */
|
2015-10-21 04:13:41 +00:00
|
|
|
ret = virAsprintf(path,
|
|
|
|
HUGEPAGES_NUMA_PREFIX "node%d/hugepages/"
|
|
|
|
HUGEPAGES_PREFIX "%ukB/%s",
|
2019-02-12 16:25:06 +00:00
|
|
|
node, page_size, NULLSTR_EMPTY(suffix));
|
2015-10-21 04:13:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ret >= 0 && !virFileExists(*path)) {
|
|
|
|
ret = -1;
|
|
|
|
if (node != -1) {
|
|
|
|
if (!virNumaNodeIsAvailable(node)) {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("NUMA node %d is not available"),
|
|
|
|
node);
|
|
|
|
} else {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("page size %u is not available on node %d"),
|
|
|
|
page_size, node);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("page size %u is not available"),
|
|
|
|
page_size);
|
|
|
|
}
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
2015-10-21 04:13:41 +00:00
|
|
|
|
|
|
|
return ret;
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
|
|
|
|
2015-10-21 04:13:39 +00:00
|
|
|
static int
|
|
|
|
virNumaGetHugePageInfoDir(char **path, int node)
|
|
|
|
{
|
|
|
|
if (node == -1) {
|
|
|
|
return VIR_STRDUP(*path, HUGEPAGES_SYSTEM_PREFIX);
|
|
|
|
} else {
|
|
|
|
return virAsprintf(path,
|
|
|
|
HUGEPAGES_NUMA_PREFIX "node%d/hugepages/",
|
|
|
|
node);
|
|
|
|
}
|
|
|
|
}
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetHugePageInfo:
|
|
|
|
* @node: NUMA node id
|
|
|
|
* @page_size: which huge page are we interested in
|
|
|
|
* @page_avail: total number of huge pages in the pool
|
|
|
|
* @page_free: the number of free huge pages in the pool
|
|
|
|
*
|
|
|
|
* For given NUMA node and huge page size fetch information on
|
|
|
|
* total number of huge pages in the pool (both free and taken)
|
|
|
|
* and count for free huge pages in the pool.
|
|
|
|
*
|
|
|
|
* If you're interested in just one bit, pass NULL to the other one.
|
|
|
|
*
|
|
|
|
* As a special case, if @node == -1, overall info is fetched
|
|
|
|
* from the system.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -1 otherwise (with error reported).
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
virNumaGetHugePageInfo(int node,
|
|
|
|
unsigned int page_size,
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned long long *page_avail,
|
|
|
|
unsigned long long *page_free)
|
2014-06-06 16:09:01 +00:00
|
|
|
{
|
|
|
|
char *end;
|
2018-07-28 18:01:39 +00:00
|
|
|
VIR_AUTOFREE(char *) path = NULL;
|
|
|
|
VIR_AUTOFREE(char *) buf = NULL;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
if (page_avail) {
|
|
|
|
if (virNumaGetHugePageInfoPath(&path, node,
|
|
|
|
page_size, "nr_hugepages") < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
if (virFileReadAll(path, 1024, &buf) < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
2018-04-23 14:36:53 +00:00
|
|
|
if (virStrToLong_ull(buf, &end, 10, page_avail) < 0 ||
|
2014-06-06 16:09:01 +00:00
|
|
|
*end != '\n') {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("unable to parse: %s"),
|
|
|
|
buf);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
|
|
|
VIR_FREE(buf);
|
|
|
|
VIR_FREE(path);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page_free) {
|
|
|
|
if (virNumaGetHugePageInfoPath(&path, node,
|
|
|
|
page_size, "free_hugepages") < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
if (virFileReadAll(path, 1024, &buf) < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
2018-04-23 14:36:53 +00:00
|
|
|
if (virStrToLong_ull(buf, &end, 10, page_free) < 0 ||
|
2014-06-06 16:09:01 +00:00
|
|
|
*end != '\n') {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("unable to parse: %s"),
|
|
|
|
buf);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-28 18:01:39 +00:00
|
|
|
return 0;
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetPageInfo:
|
|
|
|
* @node: NUMA node id
|
|
|
|
* @page_size: which huge page are we interested in (in KiB)
|
2014-06-23 13:04:11 +00:00
|
|
|
* @huge_page_sum: the sum of memory taken by huge pages (in
|
|
|
|
* bytes)
|
2014-06-06 16:09:01 +00:00
|
|
|
* @page_avail: total number of huge pages in the pool
|
|
|
|
* @page_free: the number of free huge pages in the pool
|
|
|
|
*
|
|
|
|
* For given NUMA node and page size fetch information on
|
|
|
|
* total number of pages in the pool (both free and taken)
|
|
|
|
* and count for free pages in the pool.
|
|
|
|
*
|
2014-06-23 13:04:11 +00:00
|
|
|
* The @huge_page_sum parameter exists due to the Linux kernel
|
|
|
|
* limitation. The problem is, if there are some huge pages
|
|
|
|
* allocated, they are accounted under the 'MemUsed' field in the
|
|
|
|
* meminfo file instead of being subtracted from the 'MemTotal'.
|
|
|
|
* We must do the subtraction ourselves.
|
|
|
|
* If unsure, pass 0.
|
|
|
|
*
|
2014-06-06 16:09:01 +00:00
|
|
|
* If you're interested in just one bit, pass NULL to the other one.
|
|
|
|
*
|
|
|
|
* As a special case, if @node == -1, overall info is fetched
|
|
|
|
* from the system.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -1 otherwise (with error reported).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaGetPageInfo(int node,
|
|
|
|
unsigned int page_size,
|
2014-06-23 13:04:11 +00:00
|
|
|
unsigned long long huge_page_sum,
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned long long *page_avail,
|
|
|
|
unsigned long long *page_free)
|
2014-06-06 16:09:01 +00:00
|
|
|
{
|
|
|
|
int ret = -1;
|
2015-02-02 10:26:49 +00:00
|
|
|
long system_page_size = virGetSystemPageSize();
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
/* sysconf() returns page size in bytes,
|
|
|
|
* the @page_size is however in kibibytes */
|
|
|
|
if (page_size == system_page_size / 1024) {
|
|
|
|
unsigned long long memsize, memfree;
|
|
|
|
|
|
|
|
/* TODO: come up with better algorithm that takes huge pages into
|
|
|
|
* account. The problem is huge pages cut off regular memory. */
|
|
|
|
if (node == -1) {
|
2016-04-13 17:16:16 +00:00
|
|
|
if (virHostMemGetInfo(&memsize, &memfree) < 0)
|
2014-06-06 16:09:01 +00:00
|
|
|
goto cleanup;
|
|
|
|
} else {
|
|
|
|
if (virNumaGetNodeMemory(node, &memsize, &memfree) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2014-06-23 13:04:11 +00:00
|
|
|
/* see description above */
|
|
|
|
memsize -= huge_page_sum;
|
|
|
|
|
2014-06-06 16:09:01 +00:00
|
|
|
if (page_avail)
|
|
|
|
*page_avail = memsize / system_page_size;
|
|
|
|
|
|
|
|
if (page_free)
|
|
|
|
*page_free = memfree / system_page_size;
|
|
|
|
} else {
|
|
|
|
if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetPages:
|
|
|
|
* @node: NUMA node id
|
|
|
|
* @pages_size: list of pages supported on @node
|
|
|
|
* @pages_avail: list of the pool sizes on @node
|
|
|
|
* @pages_free: list of free pages on @node
|
|
|
|
* @npages: the lists size
|
|
|
|
*
|
|
|
|
* For given NUMA node fetch info on pages. The size of pages
|
|
|
|
* (e.g. 4K, 2M, 1G) is stored into @pages_size, the size of the
|
|
|
|
* pool is then stored into @pages_avail and the number of free
|
|
|
|
* pages in the pool is stored into @pages_free.
|
|
|
|
*
|
|
|
|
* If you're interested only in some lists, pass NULL to the
|
|
|
|
* other ones.
|
|
|
|
*
|
|
|
|
* As a special case, if @node == -1, overall info is fetched
|
|
|
|
* from the system.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -1 otherwise.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
virNumaGetPages(int node,
|
|
|
|
unsigned int **pages_size,
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned long long **pages_avail,
|
|
|
|
unsigned long long **pages_free,
|
2014-06-06 16:09:01 +00:00
|
|
|
size_t *npages)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
DIR *dir = NULL;
|
2014-06-23 13:46:31 +00:00
|
|
|
int direrr = 0;
|
2014-06-06 16:09:01 +00:00
|
|
|
struct dirent *entry;
|
|
|
|
unsigned int ntmp = 0;
|
|
|
|
size_t i;
|
|
|
|
bool exchange;
|
|
|
|
long system_page_size;
|
2014-06-23 13:04:11 +00:00
|
|
|
unsigned long long huge_page_sum = 0;
|
2018-07-28 18:01:39 +00:00
|
|
|
VIR_AUTOFREE(char *) path = NULL;
|
|
|
|
VIR_AUTOFREE(unsigned int *) tmp_size = NULL;
|
|
|
|
VIR_AUTOFREE(unsigned long long *) tmp_avail = NULL;
|
|
|
|
VIR_AUTOFREE(unsigned long long *) tmp_free = NULL;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
|
|
|
/* sysconf() returns page size in bytes,
|
|
|
|
* but we are storing the page size in kibibytes. */
|
2015-02-02 10:26:49 +00:00
|
|
|
system_page_size = virGetSystemPageSizeKB();
|
2014-06-06 16:09:01 +00:00
|
|
|
|
2014-06-23 13:04:11 +00:00
|
|
|
/* Query huge pages at first.
|
|
|
|
* On Linux systems, the huge pages pool cuts off the available memory and
|
|
|
|
* is always shown as used memory. Here, however, we want to report
|
|
|
|
* slightly different information. So we take the total memory on a node
|
|
|
|
* and subtract memory taken by the huge pages. */
|
2015-10-21 04:13:39 +00:00
|
|
|
if (virNumaGetHugePageInfoDir(&path, node) < 0)
|
2014-06-06 16:09:01 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
2016-06-21 14:47:24 +00:00
|
|
|
/* It's okay if the @path doesn't exist. Maybe we are running on
|
|
|
|
* system without huge pages support where the path may not exist. */
|
|
|
|
if (virDirOpenIfExists(&dir, path) < 0)
|
|
|
|
goto cleanup;
|
2014-06-06 16:09:01 +00:00
|
|
|
|
2014-06-23 13:46:31 +00:00
|
|
|
while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
|
2014-06-06 16:09:01 +00:00
|
|
|
const char *page_name = entry->d_name;
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned int page_size;
|
|
|
|
unsigned long long page_avail = 0;
|
|
|
|
unsigned long long page_free = 0;
|
2014-06-06 16:09:01 +00:00
|
|
|
char *end;
|
|
|
|
|
|
|
|
/* Just to give you a hint, we're dealing with this:
|
|
|
|
* hugepages-2048kB/ or hugepages-1048576kB/ */
|
|
|
|
if (!STRPREFIX(entry->d_name, HUGEPAGES_PREFIX))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
page_name += strlen(HUGEPAGES_PREFIX);
|
|
|
|
|
|
|
|
if (virStrToLong_ui(page_name, &end, 10, &page_size) < 0 ||
|
|
|
|
STRCASENEQ(end, "kB")) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("unable to parse %s"),
|
|
|
|
entry->d_name);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2014-06-23 13:04:11 +00:00
|
|
|
if (virNumaGetHugePageInfo(node, page_size,
|
2014-06-06 16:09:01 +00:00
|
|
|
&page_avail, &page_free) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 ||
|
|
|
|
VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 ||
|
|
|
|
VIR_REALLOC_N(tmp_free, ntmp + 1) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
tmp_size[ntmp] = page_size;
|
|
|
|
tmp_avail[ntmp] = page_avail;
|
|
|
|
tmp_free[ntmp] = page_free;
|
|
|
|
ntmp++;
|
2014-06-23 13:04:11 +00:00
|
|
|
|
|
|
|
/* page_size is in kibibytes while we want huge_page_sum
|
|
|
|
* in just bytes. */
|
|
|
|
huge_page_sum += 1024 * page_size * page_avail;
|
2014-06-06 16:09:01 +00:00
|
|
|
}
|
|
|
|
|
2014-06-23 07:10:04 +00:00
|
|
|
if (direrr < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2014-06-23 13:04:11 +00:00
|
|
|
/* Now append the ordinary system pages */
|
|
|
|
if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 ||
|
|
|
|
VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 ||
|
|
|
|
VIR_REALLOC_N(tmp_free, ntmp + 1) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if (virNumaGetPageInfo(node, system_page_size, huge_page_sum,
|
|
|
|
&tmp_avail[ntmp], &tmp_free[ntmp]) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
tmp_size[ntmp] = system_page_size;
|
|
|
|
ntmp++;
|
|
|
|
|
2014-06-06 16:09:01 +00:00
|
|
|
/* Just to produce nice output, sort the arrays by increasing page size */
|
|
|
|
do {
|
|
|
|
exchange = false;
|
|
|
|
for (i = 0; i < ntmp -1; i++) {
|
|
|
|
if (tmp_size[i] > tmp_size[i + 1]) {
|
|
|
|
exchange = true;
|
|
|
|
SWAP(tmp_size[i], tmp_size[i + 1]);
|
|
|
|
SWAP(tmp_avail[i], tmp_avail[i + 1]);
|
|
|
|
SWAP(tmp_free[i], tmp_free[i + 1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (exchange);
|
|
|
|
|
|
|
|
if (pages_size) {
|
|
|
|
*pages_size = tmp_size;
|
|
|
|
tmp_size = NULL;
|
|
|
|
}
|
|
|
|
if (pages_avail) {
|
|
|
|
*pages_avail = tmp_avail;
|
|
|
|
tmp_avail = NULL;
|
|
|
|
}
|
|
|
|
if (pages_free) {
|
|
|
|
*pages_free = tmp_free;
|
|
|
|
tmp_free = NULL;
|
|
|
|
}
|
|
|
|
*npages = ntmp;
|
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
2016-06-21 10:40:29 +00:00
|
|
|
VIR_DIR_CLOSE(dir);
|
2014-06-06 16:09:01 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2014-06-23 07:25:04 +00:00
|
|
|
|
|
|
|
|
2014-09-18 07:24:16 +00:00
|
|
|
int
|
|
|
|
virNumaSetPagePoolSize(int node,
|
|
|
|
unsigned int page_size,
|
|
|
|
unsigned long long page_count,
|
|
|
|
bool add)
|
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
unsigned long long nr_count;
|
2018-07-28 18:01:39 +00:00
|
|
|
VIR_AUTOFREE(char *) nr_path = NULL;
|
|
|
|
VIR_AUTOFREE(char *) nr_buf = NULL;
|
2014-09-18 07:24:16 +00:00
|
|
|
|
2015-02-02 10:26:49 +00:00
|
|
|
if (page_size == virGetSystemPageSizeKB()) {
|
2014-09-18 07:24:16 +00:00
|
|
|
/* Special case as kernel handles system pages
|
|
|
|
* differently to huge pages. */
|
|
|
|
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
|
|
|
|
_("system pages pool can't be modified"));
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (virNumaGetHugePageInfoPath(&nr_path, node, page_size, "nr_hugepages") < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
|
|
|
|
/* Firstly check, if there's anything for us to do */
|
|
|
|
if (virFileReadAll(nr_path, 1024, &nr_buf) < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
|
|
|
|
if (virStrToLong_ull(nr_buf, &end, 10, &nr_count) < 0 ||
|
|
|
|
*end != '\n') {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("invalid number '%s' in '%s'"),
|
|
|
|
nr_buf, nr_path);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (add) {
|
|
|
|
if (!page_count) {
|
|
|
|
VIR_DEBUG("Nothing left to do: add = true page_count = 0");
|
2018-07-28 18:01:39 +00:00
|
|
|
return 0;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
page_count += nr_count;
|
|
|
|
} else {
|
|
|
|
if (nr_count == page_count) {
|
|
|
|
VIR_DEBUG("Nothing left to do: nr_count = page_count = %llu",
|
|
|
|
page_count);
|
2018-07-28 18:01:39 +00:00
|
|
|
return 0;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Okay, page pool adjustment must be done in two steps. In
|
|
|
|
* first we write the desired number into nr_hugepages file.
|
|
|
|
* Kernel then starts to allocate the pages (return from
|
|
|
|
* write should be postponed until the kernel is finished).
|
|
|
|
* However, kernel may have not been successful and reserved
|
|
|
|
* all the pages we wanted. So do the second read to check.
|
|
|
|
*/
|
|
|
|
VIR_FREE(nr_buf);
|
|
|
|
if (virAsprintf(&nr_buf, "%llu", page_count) < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
|
|
|
|
if (virFileWriteStr(nr_path, nr_buf, 0) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to write to: %s"), nr_path);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* And now do the check. */
|
|
|
|
|
|
|
|
VIR_FREE(nr_buf);
|
|
|
|
if (virFileReadAll(nr_path, 1024, &nr_buf) < 0)
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
|
|
|
|
if (virStrToLong_ull(nr_buf, &end, 10, &nr_count) < 0 ||
|
|
|
|
*end != '\n') {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("invalid number '%s' in '%s'"),
|
|
|
|
nr_buf, nr_path);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (nr_count != page_count) {
|
|
|
|
virReportError(VIR_ERR_OPERATION_FAILED,
|
|
|
|
_("Unable to allocate %llu pages. Allocated only %llu"),
|
|
|
|
page_count, nr_count);
|
2018-07-28 18:01:39 +00:00
|
|
|
return -1;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
2018-07-28 18:01:39 +00:00
|
|
|
return 0;
|
2014-09-18 07:24:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-23 13:56:46 +00:00
|
|
|
#else /* #ifdef __linux__ */
|
2014-06-23 07:25:04 +00:00
|
|
|
int
|
|
|
|
virNumaGetPageInfo(int node ATTRIBUTE_UNUSED,
|
|
|
|
unsigned int page_size ATTRIBUTE_UNUSED,
|
2014-06-23 13:04:11 +00:00
|
|
|
unsigned long long huge_page_sum ATTRIBUTE_UNUSED,
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned long long *page_avail ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long *page_free ATTRIBUTE_UNUSED)
|
2014-06-23 07:25:04 +00:00
|
|
|
{
|
|
|
|
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
|
|
|
|
_("page info is not supported on this platform"));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
virNumaGetPages(int node ATTRIBUTE_UNUSED,
|
|
|
|
unsigned int **pages_size ATTRIBUTE_UNUSED,
|
2018-04-23 14:36:53 +00:00
|
|
|
unsigned long long **pages_avail ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long **pages_free ATTRIBUTE_UNUSED,
|
2014-06-23 07:25:04 +00:00
|
|
|
size_t *npages ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
|
|
|
|
_("page info is not supported on this platform"));
|
|
|
|
return -1;
|
|
|
|
}
|
2014-09-18 07:24:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
virNumaSetPagePoolSize(int node ATTRIBUTE_UNUSED,
|
|
|
|
unsigned int page_size ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long page_count ATTRIBUTE_UNUSED,
|
|
|
|
bool add ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
|
|
|
|
_("page pool allocation is not supported on this platform"));
|
|
|
|
return -1;
|
|
|
|
}
|
2014-06-23 13:56:46 +00:00
|
|
|
#endif /* #ifdef __linux__ */
|
2014-11-06 11:16:54 +00:00
|
|
|
|
|
|
|
bool
|
|
|
|
virNumaNodesetIsAvailable(virBitmapPtr nodeset)
|
|
|
|
{
|
|
|
|
ssize_t bit = -1;
|
|
|
|
|
|
|
|
if (!nodeset)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
|
|
|
|
if (virNumaNodeIsAvailable(bit))
|
|
|
|
continue;
|
|
|
|
|
2015-03-25 09:05:58 +00:00
|
|
|
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
2014-11-06 11:16:54 +00:00
|
|
|
_("NUMA node %zd is unavailable"), bit);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2014-12-13 08:57:00 +00:00
|
|
|
|
2016-09-13 13:55:06 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* virNumaGetHostMemoryNodeset:
|
|
|
|
*
|
|
|
|
* Returns a bitmap of guest numa node ids that contain memory.
|
|
|
|
*/
|
2014-12-13 08:57:00 +00:00
|
|
|
virBitmapPtr
|
2016-09-13 13:55:06 +00:00
|
|
|
virNumaGetHostMemoryNodeset(void)
|
2014-12-13 08:57:00 +00:00
|
|
|
{
|
|
|
|
int maxnode = virNumaGetMaxNode();
|
2016-09-13 13:55:06 +00:00
|
|
|
unsigned long long nodesize;
|
2014-12-13 08:57:00 +00:00
|
|
|
size_t i = 0;
|
|
|
|
virBitmapPtr nodeset = NULL;
|
|
|
|
|
|
|
|
if (maxnode < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (!(nodeset = virBitmapNew(maxnode + 1)))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (i = 0; i <= maxnode; i++) {
|
|
|
|
if (!virNumaNodeIsAvailable(i))
|
|
|
|
continue;
|
|
|
|
|
2016-09-13 13:55:06 +00:00
|
|
|
/* if we can't detect NUMA node size assume that it's present */
|
|
|
|
if (virNumaGetNodeMemory(i, &nodesize, NULL) < 0 || nodesize > 0)
|
|
|
|
ignore_value(virBitmapSetBit(nodeset, i));
|
2014-12-13 08:57:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nodeset;
|
|
|
|
}
|