Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Xen: Devel

[PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools

 

 

Xen devel RSS feed   Index | Next | Previous | View Threaded


raistlin at linux

Jun 15, 2012, 10:04 AM

Post #1 of 4 (63 views)
Permalink
[PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools

In such a way that only the cpus belonging to the cpupool of the
domain being placed are considered for the placement itself.

This happens by filtering out all the nodes in which the cpupool has
not any cpu from the placement candidates. After that -- as a cpu pooling
not necessarily happens at NUMA nodes boundaries -- we also make sure
only the actual cpus that are part of the pool are considered when
counting how much processors a placement candidate is able to provide.

Signed-off-by: Dario Faggioli <dario.faggioli [at] citrix>

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -198,15 +198,27 @@ static void comb_get_nodemap(comb_iter_t
libxl_bitmap_set(nodemap, it[i]);
}

+/* Retrieve how many nodes a nodemap spans. */
+static int nodemap_to_nr_nodes(const libxl_bitmap *nodemap)
+{
+ int i, nr_nodes = 0;
+
+ libxl_for_each_set_bit(i, *nodemap)
+ nr_nodes++;
+ return nr_nodes;
+}
+
/* Retrieve the number of cpus that the nodes that are part of the nodemap
- * span. */
+ * span and that are also set in suitable_cpumap. */
static int nodemap_to_nodes_cpus(libxl_cputopology *tinfo, int nr_cpus,
+ const libxl_bitmap *suitable_cpumap,
const libxl_bitmap *nodemap)
{
int i, nodes_cpus = 0;

for (i = 0; i < nr_cpus; i++) {
- if (libxl_bitmap_test(nodemap, tinfo[i].node))
+ if (libxl_bitmap_test(suitable_cpumap, i) &&
+ libxl_bitmap_test(nodemap, tinfo[i].node))
nodes_cpus++;
}
return nodes_cpus;
@@ -311,12 +323,13 @@ static int cpus_per_node_count(libxl_cpu
int libxl__get_numa_candidates(libxl__gc *gc,
uint32_t min_free_memkb, int min_cpus,
int min_nodes, int max_nodes,
+ const libxl_bitmap *suitable_cpumap,
libxl__numa_candidate *cndts[], int *nr_cndts)
{
libxl__numa_candidate *new_cndts = NULL;
libxl_cputopology *tinfo = NULL;
libxl_numainfo *ninfo = NULL;
- libxl_bitmap nodemap;
+ libxl_bitmap suitable_nodemap, nodemap;
int nr_nodes, nr_cpus;
int array_size, rc;

@@ -340,6 +353,15 @@ int libxl__get_numa_candidates(libxl__gc
if (rc)
goto out;

+ /* Allocate and prepare the map of the node that can be utilized for
+ * placement, basing on the map of suitable cpus. */
+ rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap);
+ if (rc)
+ goto out;
+ rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap);
+ if (rc)
+ goto out;
+
/*
* Round up and down some of the constraints. For instance, the minimum
* number of cpus a candidate should have must at least be non-negative.
@@ -391,9 +413,14 @@ int libxl__get_numa_candidates(libxl__gc
for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes); comb_ok;
comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) {
uint32_t nodes_free_memkb;
- int nodes_cpus;
+ int i, nodes_cpus;

+ /* Get the nodemap for the combination and filter unwnted nodes */
comb_get_nodemap(comb_iter, &nodemap, min_nodes);
+ libxl_for_each_set_bit(i, nodemap) {
+ if (!libxl_bitmap_test(&suitable_nodemap, i))
+ libxl_bitmap_reset(&nodemap, i);
+ }

/* If there is not enough memoy in this combination, skip it
* and go generating the next one... */
@@ -402,7 +429,8 @@ int libxl__get_numa_candidates(libxl__gc
continue;

/* And the same applies if this combination is short in cpus */
- nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, &nodemap);
+ nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, suitable_cpumap,
+ &nodemap);
if (min_cpus > 0 && nodes_cpus < min_cpus)
continue;

@@ -427,12 +455,13 @@ int libxl__get_numa_candidates(libxl__gc
new_cndts[*nr_cndts].nr_domains =
nodemap_to_nr_domains(gc, tinfo, &nodemap);
new_cndts[*nr_cndts].free_memkb = nodes_free_memkb;
- new_cndts[*nr_cndts].nr_nodes = min_nodes;
+ new_cndts[*nr_cndts].nr_nodes = nodemap_to_nr_nodes(&nodemap);
new_cndts[*nr_cndts].nr_cpus = nodes_cpus;

LOG(DEBUG, "NUMA placement candidate #%d found: nr_nodes=%d, "
"nr_cpus=%d, free_memkb=%"PRIu32"", *nr_cndts,
- min_nodes, new_cndts[*nr_cndts].nr_cpus,
+ new_cndts[*nr_cndts].nr_nodes,
+ new_cndts[*nr_cndts].nr_cpus,
new_cndts[*nr_cndts].free_memkb / 1024);

(*nr_cndts)++;
@@ -442,6 +471,7 @@ int libxl__get_numa_candidates(libxl__gc

*cndts = new_cndts;
out:
+ libxl_bitmap_dispose(&suitable_nodemap);
libxl_bitmap_dispose(&nodemap);
libxl_cputopology_list_free(tinfo, nr_cpus);
libxl_numainfo_list_free(ninfo, nr_nodes);
@@ -485,23 +515,27 @@ static int numa_cmpf(const void *v1, con
}

/* The actual automatic NUMA placement routine */
-static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
+static int numa_place_domain(libxl__gc *gc, uint32_t domid,
+ libxl_domain_build_info *info)
{
int nr_candidates = 0;
libxl__numa_candidate *candidates = NULL;
libxl_bitmap candidate_nodemap;
- libxl_cpupoolinfo *pinfo;
- int nr_pools, rc = 0;
+ libxl_cpupoolinfo cpupool_info;
+ int i, cpupool, rc = 0;
uint32_t memkb;

- /* First of all, if cpupools are in use, better not to mess with them */
- pinfo = libxl_list_cpupool(CTX, &nr_pools);
- if (!pinfo)
- return ERROR_FAIL;
- if (nr_pools > 1) {
- LOG(NOTICE, "skipping NUMA placement as cpupools are in use");
- goto out;
- }
+ /*
+ * Extract the cpumap from the cpupool the domain belong to. In fact,
+ * it only makes sense to consider the cpus/nodes that are in there
+ * for placement.
+ */
+ rc = cpupool = libxl__domain_cpupool(gc, domid);
+ if (rc < 0)
+ return rc;
+ rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
+ if (rc)
+ return rc;

rc = libxl_domain_need_memory(CTX, info, &memkb);
if (rc)
@@ -513,7 +547,8 @@ static int numa_place_domain(libxl__gc *

/* Find all the candidates with enough free memory and at least
* as much pcpus as the domain has vcpus. */
- rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, 0, 0,
+ rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus,
+ 0, 0, &cpupool_info.cpumap,
&candidates, &nr_candidates);
if (rc)
goto out;
@@ -538,13 +573,20 @@ static int numa_place_domain(libxl__gc *
if (rc)
goto out;

+ /* Avoid trying to set the affinity to cpus that might be in the
+ * nodemap but not in our cpupool. */
+ libxl_for_each_set_bit(i, info->cpumap) {
+ if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
+ libxl_bitmap_reset(&info->cpumap, i);
+ }
+
LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
"%"PRIu32" KB free selected", candidates[0].nr_nodes,
candidates[0].nr_cpus, candidates[0].free_memkb / 1024);

out:
libxl_bitmap_dispose(&candidate_nodemap);
- libxl_cpupoolinfo_list_free(pinfo, nr_pools);
+ libxl_cpupoolinfo_dispose(&cpupool_info);
return rc;
}

@@ -567,7 +609,7 @@ int libxl__build_pre(libxl__gc *gc, uint
* whatever that turns out to be.
*/
if (libxl_bitmap_is_full(&info->cpumap)) {
- int rc = numa_place_domain(gc, info);
+ int rc = numa_place_domain(gc, domid, info);
if (rc)
return rc;
}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2094,14 +2094,17 @@ typedef struct {
* least that amount of free memory and that number of cpus, respectively. If
* min_free_memkb and/or min_cpus are 0, the candidates' free memory and number
* of cpus won't be checked at all, which means a candidate will always be
- * considered suitable wrt the specific constraint. cndts is where the list of
- * exactly nr_cndts candidates is returned. Note that, in case no candidates
- * are found at all, the function returns successfully, but with nr_cndts equal
- * to zero.
+ * considered suitable wrt the specific constraint. suitable_cpumap is useful
+ * for specifyin we want only the cpus in that mask to be considered while
+ * generating placement candidates (for example because of cpupools). cndts is
+ * where the list of exactly nr_cndts candidates is returned. Note that, in
+ * case no candidates are found at all, the function returns successfully, but
+ * with nr_cndts equal to zero.
*/
_hidden int libxl__get_numa_candidates(libxl__gc *gc,
uint32_t min_free_memkb, int min_cpus,
int min_nodes, int max_nodes,
+ const libxl_bitmap *suitable_cpumap,
libxl__numa_candidate *cndts[], int *nr_cndts);

/* allocation and deallocation for placement candidates */

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Jun 21, 2012, 6:31 AM

Post #2 of 4 (48 views)
Permalink
Re: [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools [In reply to]

On Fri, 2012-06-15 at 18:04 +0100, Dario Faggioli wrote:
> In such a way that only the cpus belonging to the cpupool of the
> domain being placed are considered for the placement itself.
>
> This happens by filtering out all the nodes in which the cpupool has
> not any cpu from the placement candidates. After that -- as a cpu pooling
> not necessarily happens at NUMA nodes boundaries -- we also make sure
> only the actual cpus that are part of the pool are considered when
> counting how much processors a placement candidate is able to provide.
>
> Signed-off-by: Dario Faggioli <dario.faggioli [at] citrix>
>
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -198,15 +198,27 @@ static void comb_get_nodemap(comb_iter_t
> libxl_bitmap_set(nodemap, it[i]);
> }
>
> +/* Retrieve how many nodes a nodemap spans. */
> +static int nodemap_to_nr_nodes(const libxl_bitmap *nodemap)
> +{
> + int i, nr_nodes = 0;
> +
> + libxl_for_each_set_bit(i, *nodemap)
> + nr_nodes++;
> + return nr_nodes;
> +}
> +
> /* Retrieve the number of cpus that the nodes that are part of the nodemap
> - * span. */
> + * span and that are also set in suitable_cpumap. */
> static int nodemap_to_nodes_cpus(libxl_cputopology *tinfo, int nr_cpus,
> + const libxl_bitmap *suitable_cpumap,
> const libxl_bitmap *nodemap)
> {
> int i, nodes_cpus = 0;
>
> for (i = 0; i < nr_cpus; i++) {
> - if (libxl_bitmap_test(nodemap, tinfo[i].node))
> + if (libxl_bitmap_test(suitable_cpumap, i) &&
> + libxl_bitmap_test(nodemap, tinfo[i].node))
> nodes_cpus++;
> }
> return nodes_cpus;
> @@ -311,12 +323,13 @@ static int cpus_per_node_count(libxl_cpu
> int libxl__get_numa_candidates(libxl__gc *gc,
> uint32_t min_free_memkb, int min_cpus,
> int min_nodes, int max_nodes,
> + const libxl_bitmap *suitable_cpumap,
> libxl__numa_candidate *cndts[], int *nr_cndts)
> {
> libxl__numa_candidate *new_cndts = NULL;
> libxl_cputopology *tinfo = NULL;
> libxl_numainfo *ninfo = NULL;
> - libxl_bitmap nodemap;
> + libxl_bitmap suitable_nodemap, nodemap;
> int nr_nodes, nr_cpus;
> int array_size, rc;
>
> @@ -340,6 +353,15 @@ int libxl__get_numa_candidates(libxl__gc
> if (rc)
> goto out;
>
> + /* Allocate and prepare the map of the node that can be utilized for
> + * placement, basing on the map of suitable cpus. */
> + rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap);
> + if (rc)
> + goto out;
> + rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap);
> + if (rc)
> + goto out;
> +
> /*
> * Round up and down some of the constraints. For instance, the minimum
> * number of cpus a candidate should have must at least be non-negative.
> @@ -391,9 +413,14 @@ int libxl__get_numa_candidates(libxl__gc
> for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes); comb_ok;
> comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) {
> uint32_t nodes_free_memkb;
> - int nodes_cpus;
> + int i, nodes_cpus;
>
> + /* Get the nodemap for the combination and filter unwnted nodes */

unwanted

> comb_get_nodemap(comb_iter, &nodemap, min_nodes);
> + libxl_for_each_set_bit(i, nodemap) {
> + if (!libxl_bitmap_test(&suitable_nodemap, i))
> + libxl_bitmap_reset(&nodemap, i);
> + }
>
> /* If there is not enough memoy in this combination, skip it
> * and go generating the next one... */
> @@ -402,7 +429,8 @@ int libxl__get_numa_candidates(libxl__gc
> continue;
>
> /* And the same applies if this combination is short in cpus */
> - nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, &nodemap);
> + nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, suitable_cpumap,
> + &nodemap);
> if (min_cpus > 0 && nodes_cpus < min_cpus)
> continue;
>
> @@ -427,12 +455,13 @@ int libxl__get_numa_candidates(libxl__gc
> new_cndts[*nr_cndts].nr_domains =
> nodemap_to_nr_domains(gc, tinfo, &nodemap);
> new_cndts[*nr_cndts].free_memkb = nodes_free_memkb;
> - new_cndts[*nr_cndts].nr_nodes = min_nodes;
> + new_cndts[*nr_cndts].nr_nodes = nodemap_to_nr_nodes(&nodemap);
> new_cndts[*nr_cndts].nr_cpus = nodes_cpus;
>
> LOG(DEBUG, "NUMA placement candidate #%d found: nr_nodes=%d, "
> "nr_cpus=%d, free_memkb=%"PRIu32"", *nr_cndts,
> - min_nodes, new_cndts[*nr_cndts].nr_cpus,
> + new_cndts[*nr_cndts].nr_nodes,
> + new_cndts[*nr_cndts].nr_cpus,
> new_cndts[*nr_cndts].free_memkb / 1024);
>
> (*nr_cndts)++;
> @@ -442,6 +471,7 @@ int libxl__get_numa_candidates(libxl__gc
>
> *cndts = new_cndts;
> out:
> + libxl_bitmap_dispose(&suitable_nodemap);
> libxl_bitmap_dispose(&nodemap);
> libxl_cputopology_list_free(tinfo, nr_cpus);
> libxl_numainfo_list_free(ninfo, nr_nodes);
> @@ -485,23 +515,27 @@ static int numa_cmpf(const void *v1, con
> }
>
> /* The actual automatic NUMA placement routine */
> -static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
> +static int numa_place_domain(libxl__gc *gc, uint32_t domid,
> + libxl_domain_build_info *info)
> {
> int nr_candidates = 0;
> libxl__numa_candidate *candidates = NULL;
> libxl_bitmap candidate_nodemap;
> - libxl_cpupoolinfo *pinfo;
> - int nr_pools, rc = 0;
> + libxl_cpupoolinfo cpupool_info;
> + int i, cpupool, rc = 0;
> uint32_t memkb;
>
> - /* First of all, if cpupools are in use, better not to mess with them */
> - pinfo = libxl_list_cpupool(CTX, &nr_pools);
> - if (!pinfo)
> - return ERROR_FAIL;
> - if (nr_pools > 1) {
> - LOG(NOTICE, "skipping NUMA placement as cpupools are in use");
> - goto out;
> - }
> + /*
> + * Extract the cpumap from the cpupool the domain belong to. In fact,
> + * it only makes sense to consider the cpus/nodes that are in there
> + * for placement.
> + */
> + rc = cpupool = libxl__domain_cpupool(gc, domid);
> + if (rc < 0)
> + return rc;
> + rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
> + if (rc)
> + return rc;
>
> rc = libxl_domain_need_memory(CTX, info, &memkb);
> if (rc)
> @@ -513,7 +547,8 @@ static int numa_place_domain(libxl__gc *
>
> /* Find all the candidates with enough free memory and at least
> * as much pcpus as the domain has vcpus. */
> - rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, 0, 0,
> + rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus,
> + 0, 0, &cpupool_info.cpumap,
> &candidates, &nr_candidates);
> if (rc)
> goto out;
> @@ -538,13 +573,20 @@ static int numa_place_domain(libxl__gc *
> if (rc)
> goto out;
>
> + /* Avoid trying to set the affinity to cpus that might be in the
> + * nodemap but not in our cpupool. */
> + libxl_for_each_set_bit(i, info->cpumap) {
> + if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
> + libxl_bitmap_reset(&info->cpumap, i);
> + }
> +
> LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
> "%"PRIu32" KB free selected", candidates[0].nr_nodes,
> candidates[0].nr_cpus, candidates[0].free_memkb / 1024);
>
> out:
> libxl_bitmap_dispose(&candidate_nodemap);
> - libxl_cpupoolinfo_list_free(pinfo, nr_pools);
> + libxl_cpupoolinfo_dispose(&cpupool_info);
> return rc;
> }
>
> @@ -567,7 +609,7 @@ int libxl__build_pre(libxl__gc *gc, uint
> * whatever that turns out to be.
> */
> if (libxl_bitmap_is_full(&info->cpumap)) {
> - int rc = numa_place_domain(gc, info);
> + int rc = numa_place_domain(gc, domid, info);
> if (rc)
> return rc;
> }
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2094,14 +2094,17 @@ typedef struct {
> * least that amount of free memory and that number of cpus, respectively. If
> * min_free_memkb and/or min_cpus are 0, the candidates' free memory and number
> * of cpus won't be checked at all, which means a candidate will always be
> - * considered suitable wrt the specific constraint. cndts is where the list of
> - * exactly nr_cndts candidates is returned. Note that, in case no candidates
> - * are found at all, the function returns successfully, but with nr_cndts equal
> - * to zero.
> + * considered suitable wrt the specific constraint. suitable_cpumap is useful
> + * for specifyin we want only the cpus in that mask to be considered while

specifying

Apart from those two spelling errors:
Acked-by: Ian Campbell <ian.campbell [at] citrix>

> + * generating placement candidates (for example because of cpupools). cndts is
> + * where the list of exactly nr_cndts candidates is returned. Note that, in
> + * case no candidates are found at all, the function returns successfully, but
> + * with nr_cndts equal to zero.
> */
> _hidden int libxl__get_numa_candidates(libxl__gc *gc,
> uint32_t min_free_memkb, int min_cpus,
> int min_nodes, int max_nodes,
> + const libxl_bitmap *suitable_cpumap,
> libxl__numa_candidate *cndts[], int *nr_cndts);
>
> /* allocation and deallocation for placement candidates */



_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


raistlin at linux

Jun 21, 2012, 6:54 AM

Post #3 of 4 (51 views)
Permalink
Re: [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools [In reply to]

On Thu, 2012-06-21 at 14:31 +0100, Ian Campbell wrote:
> > + * considered suitable wrt the specific constraint. suitable_cpumap is useful
> > + * for specifyin we want only the cpus in that mask to be considered while
>
> specifying
>
> Apart from those two spelling errors:
>
Yeah, sorry for that. I'm very bad in spelling correctly when typing,
and this bloody keyboard is not helping! :-P

Isn't there any tool for spell checking code comments?

Thanks and Regards,
Dario

--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
Attachments: signature.asc (0.19 KB)


Ian.Campbell at citrix

Jun 21, 2012, 6:58 AM

Post #4 of 4 (52 views)
Permalink
Re: [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools [In reply to]

On Thu, 2012-06-21 at 14:54 +0100, Dario Faggioli wrote:
> On Thu, 2012-06-21 at 14:31 +0100, Ian Campbell wrote:
> > > + * considered suitable wrt the specific constraint. suitable_cpumap is useful
> > > + * for specifyin we want only the cpus in that mask to be considered while
> >
> > specifying
> >
> > Apart from those two spelling errors:
> >
> Yeah, sorry for that. I'm very bad in spelling correctly when typing,
> and this bloody keyboard is not helping! :-P
>
> Isn't there any tool for spell checking code comments?

I've often thought such a thing would be useful, but I don't know of
one.

TBH I only really spot the spelling errors because my mail client
happens to draw a squiggly red line under them in the quotes.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel

Xen devel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.