cpumask_weight() is known to be expensive. In Credit2, we use it in
load-balancing, but only for knowing how many CPUs are active in a
runqueue.
Keeping such count in an integer field of the per-runqueue data
structure we have, completely avoids the need for cpumask_weight().
While there, remove as much other uses of it as we can, even if not in
hot-paths.
Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
---
xen/common/sched_credit2.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 6958b265fc..7034325243 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -466,6 +466,7 @@ struct csched2_runqueue_data {
spinlock_t lock; /* Lock for this runqueue */
struct list_head runq; /* Ordered list of runnable vms */
+ int nr_cpus; /* How many CPUs are sharing this runqueue */
int id; /* ID of this runqueue (-1 if invalid) */
int load; /* Instantaneous load (num of non-idle vcpus) */
@@ -2613,8 +2614,8 @@ retry:
if ( st.orqd->b_avgload > load_max )
load_max = st.orqd->b_avgload;
- cpus_max = cpumask_weight(&st.lrqd->active);
- i = cpumask_weight(&st.orqd->active);
+ cpus_max = st.lrqd->nr_cpus;
+ i = st.orqd->nr_cpus;
if ( i > cpus_max )
cpus_max = i;
@@ -3697,7 +3698,7 @@ csched2_dump(const struct scheduler *ops)
"\tinstload = %d\n"
"\taveload = %"PRI_stime" (~%"PRI_stime"%%)\n",
i,
- cpumask_weight(&prv->rqd[i].active),
+ prv->rqd[i].nr_cpus,
nr_cpu_ids, cpumask_bits(&prv->rqd[i].active),
prv->rqd[i].max_weight,
prv->rqd[i].pick_bias,
@@ -3818,6 +3819,9 @@ init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
__cpumask_set_cpu(cpu, &prv->initialized);
__cpumask_set_cpu(cpu, &rqd->smt_idle);
+ rqd->nr_cpus++;
+ ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
+
/* On the boot cpu we are called before cpu_sibling_mask has been set up. */
if ( cpu == 0 && system_state < SYS_STATE_active )
__cpumask_set_cpu(cpu, &csched2_pcpu(cpu)->sibling_mask);
@@ -3829,8 +3833,11 @@ init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
__cpumask_set_cpu(rcpu, &csched2_pcpu(cpu)->sibling_mask);
}
- if ( cpumask_weight(&rqd->active) == 1 )
+ if ( rqd->nr_cpus == 1 )
+ {
+ ASSERT(cpumask_weight(&rqd->active) == 1);
rqd->pick_bias = cpu;
+ }
return spc->runq_id;
}
@@ -3944,8 +3951,12 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
__cpumask_clear_cpu(cpu, &rqd->smt_idle);
__cpumask_clear_cpu(cpu, &rqd->active);
- if ( cpumask_empty(&rqd->active) )
+ rqd->nr_cpus--;
+ ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
+
+ if ( rqd->nr_cpus == 0 )
{
+ ASSERT(cpumask_empty(&rqd->active));
printk(XENLOG_INFO " No cpus left on runqueue, disabling\n");
deactivate_runqueue(prv, spc->runq_id);
}
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Hello Dario,
On 20.04.19 18:24, Dario Faggioli wrote:
> cpumask_weight() is known to be expensive. In Credit2, we use it in
> load-balancing, but only for knowing how many CPUs are active in a
> runqueue.
>
> Keeping such count in an integer field of the per-runqueue data
> structure we have, completely avoids the need for cpumask_weight().
>
> While there, remove as much other uses of it as we can, even if not in
> hot-paths.
>
> Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> ---
> xen/common/sched_credit2.c | 21 ++++++++++++++++-----
> 1 file changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 6958b265fc..7034325243 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -466,6 +466,7 @@ struct csched2_runqueue_data {
> spinlock_t lock; /* Lock for this runqueue */
>
> struct list_head runq; /* Ordered list of runnable vms */
> + int nr_cpus; /* How many CPUs are sharing this runqueue */
> int id; /* ID of this runqueue (-1 if invalid) */
>
> int load; /* Instantaneous load (num of non-idle vcpus) */
> @@ -2613,8 +2614,8 @@ retry:
> if ( st.orqd->b_avgload > load_max )
> load_max = st.orqd->b_avgload;
>
> - cpus_max = cpumask_weight(&st.lrqd->active);
> - i = cpumask_weight(&st.orqd->active);
> + cpus_max = st.lrqd->nr_cpus;
> + i = st.orqd->nr_cpus;
> if ( i > cpus_max )
> cpus_max = i;
>
> @@ -3697,7 +3698,7 @@ csched2_dump(const struct scheduler *ops)
> "\tinstload = %d\n"
> "\taveload = %"PRI_stime" (~%"PRI_stime"%%)\n",
> i,
> - cpumask_weight(&prv->rqd[i].active),
> + prv->rqd[i].nr_cpus,
> nr_cpu_ids, cpumask_bits(&prv->rqd[i].active),
> prv->rqd[i].max_weight,
> prv->rqd[i].pick_bias,
> @@ -3818,6 +3819,9 @@ init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
> __cpumask_set_cpu(cpu, &prv->initialized);
> __cpumask_set_cpu(cpu, &rqd->smt_idle);
>
> + rqd->nr_cpus++;
> + ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
> +
> /* On the boot cpu we are called before cpu_sibling_mask has been set up. */
> if ( cpu == 0 && system_state < SYS_STATE_active )
> __cpumask_set_cpu(cpu, &csched2_pcpu(cpu)->sibling_mask);
> @@ -3829,8 +3833,11 @@ init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
> __cpumask_set_cpu(rcpu, &csched2_pcpu(cpu)->sibling_mask);
> }
>
> - if ( cpumask_weight(&rqd->active) == 1 )
> + if ( rqd->nr_cpus == 1 )
> + {
> + ASSERT(cpumask_weight(&rqd->active) == 1);
Please do not use hard tabs.
> rqd->pick_bias = cpu;
> + }
>
> return spc->runq_id;
> }
> @@ -3944,8 +3951,12 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
> __cpumask_clear_cpu(cpu, &rqd->smt_idle);
> __cpumask_clear_cpu(cpu, &rqd->active);
>
> - if ( cpumask_empty(&rqd->active) )
> + rqd->nr_cpus--;
> + ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
> +
> + if ( rqd->nr_cpus == 0 )
> {
> + ASSERT(cpumask_empty(&rqd->active));
Ditto.
> printk(XENLOG_INFO " No cpus left on runqueue, disabling\n");
> deactivate_runqueue(prv, spc->runq_id);
> }
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel
>
With nits fixed:
Reviewed-by: Andrii Anisov <andrii_anisov@epam.com>
--
Sincerely,
Andrii Anisov.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 20/04/2019 16:24, Dario Faggioli wrote:
> cpumask_weight() is known to be expensive. In Credit2, we use it in
> load-balancing, but only for knowing how many CPUs are active in a
> runqueue.
With a few small changes it can be improved substantially (by using
POPCNT when available), but it is still fundamentally an O(n) operation.
Definitely +1 to algorithm changes which avoid its use entirely.
A few cosmetic observations.
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 6958b265fc..7034325243 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -466,6 +466,7 @@ struct csched2_runqueue_data {
> spinlock_t lock; /* Lock for this runqueue */
>
> struct list_head runq; /* Ordered list of runnable vms */
> + int nr_cpus; /* How many CPUs are sharing this runqueue */
Unsigned int.
> @@ -3829,8 +3833,11 @@ init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
> __cpumask_set_cpu(rcpu, &csched2_pcpu(cpu)->sibling_mask);
> }
>
> - if ( cpumask_weight(&rqd->active) == 1 )
> + if ( rqd->nr_cpus == 1 )
> + {
> + ASSERT(cpumask_weight(&rqd->active) == 1);
Tab.
> rqd->pick_bias = cpu;
> + }
>
> return spc->runq_id;
> }
> @@ -3944,8 +3951,12 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
> __cpumask_clear_cpu(cpu, &rqd->smt_idle);
> __cpumask_clear_cpu(cpu, &rqd->active);
>
> - if ( cpumask_empty(&rqd->active) )
> + rqd->nr_cpus--;
> + ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
> +
> + if ( rqd->nr_cpus == 0 )
> {
> + ASSERT(cpumask_empty(&rqd->active));
And here.
Is it really worth having both asserts? The second is redundant.
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Tue, 2019-04-23 at 10:44 +0100, Andrew Cooper wrote:
> On 20/04/2019 16:24, Dario Faggioli wrote:
>
> Definitely +1 to algorithm changes which avoid its use entirely.
>
> A few cosmetic observations.
>
Ok...
> > diff --git a/xen/common/sched_credit2.c
> > b/xen/common/sched_credit2.c
> > index 6958b265fc..7034325243 100644
> > --- a/xen/common/sched_credit2.c
> > +++ b/xen/common/sched_credit2.c
> > @@ -466,6 +466,7 @@ struct csched2_runqueue_data {
> > spinlock_t lock; /* Lock for this
> > runqueue */
> >
> > struct list_head runq; /* Ordered list of runnable
> > vms */
> > + int nr_cpus; /* How many CPUs are sharing this
> > runqueue */
>
> Unsigned int.
>
Yep.
> > @@ -3944,8 +3951,12 @@ csched2_deinit_pdata(const struct scheduler
> > *ops, void *pcpu, int cpu)
> > __cpumask_clear_cpu(cpu, &rqd->smt_idle);
> > __cpumask_clear_cpu(cpu, &rqd->active);
> >
> > - if ( cpumask_empty(&rqd->active) )
> > + rqd->nr_cpus--;
> > + ASSERT(cpumask_weight(&rqd->active) == rqd->nr_cpus);
> > +
> > + if ( rqd->nr_cpus == 0 )
> > {
> > + ASSERT(cpumask_empty(&rqd->active));
>
> And here.
>
Right. Not sure how they ended in there. Will remove them.
> Is it really worth having both asserts? The second is redundant.
>
I think I agree. I'll keep the first one only.
Thanks and Regards
--
Dario Faggioli, Ph.D
http://about.me/dario.faggioli
Virtualization Software Engineer
SUSE Labs, SUSE https://www.suse.com/
-------------------------------------------------------------------
<<This happens because _I_ choose it to happen!>> (Raistlin Majere)
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2025 Red Hat, Inc.