[patch V3 07/20] cpumask: Introduce cpumask_weighted_or()

Thomas Gleixner posted 20 patches 3 months, 1 week ago
There is a newer version of this series
[patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Thomas Gleixner 3 months, 1 week ago
CID management OR's two cpumasks and then calculates the weight on the
result. That's inefficient as that has to walk the same stuff twice. As
this is done with runqueue lock held, there is a real benefit of speeding
this up. Depending on the system this results in 10-20% less cycles spent
with runqueue lock held for a 4K cpumask.

Provide cpumask_weighted_or() and the corresponding bitmap functions which
return the weight of the OR result right away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
---
V3: Rename again - Yury
V2: Rename and use the BITMAP_WEIGHT() macro - Yury
---
 include/linux/bitmap.h  |   15 +++++++++++++++
 include/linux/cpumask.h |   16 ++++++++++++++++
 lib/bitmap.c            |    6 ++++++
 3 files changed, 37 insertions(+)

--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,7 @@ struct device;
  *  bitmap_copy(dst, src, nbits)                *dst = *src
  *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
  *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
+ *  bitmap_weighted_or(dst, src1, src2, nbits)	*dst = *src1 | *src2. Returns Hamming Weight of dst
  *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
  *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
  *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
@@ -165,6 +166,8 @@ bool __bitmap_and(unsigned long *dst, co
 		 const unsigned long *bitmap2, unsigned int nbits);
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1,
+				  const unsigned long *bitmap2, unsigned int nbits);
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 		  const unsigned long *bitmap2, unsigned int nbits);
 bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
@@ -338,6 +341,18 @@ void bitmap_or(unsigned long *dst, const
 }
 
 static __always_inline
+unsigned int bitmap_weighted_or(unsigned long *dst, const unsigned long *src1,
+				const unsigned long *src2, unsigned int nbits)
+{
+	if (small_const_nbits(nbits)) {
+		*dst = *src1 | *src2;
+		return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits));
+	} else {
+		return __bitmap_weighted_or(dst, src1, src2, nbits);
+	}
+}
+
+static __always_inline
 void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 		const unsigned long *src2, unsigned int nbits)
 {
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -729,6 +729,22 @@ void cpumask_or(struct cpumask *dstp, co
 }
 
 /**
+ * cpumask_weighted_or - *dstp = *src1p | *src2p and return the weight of the result
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Return: The number of bits set in the resulting cpumask @dstp
+ */
+static __always_inline
+unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
+				 const struct cpumask *src2p)
+{
+	return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
+				  cpumask_bits(src2p), small_cpumask_bits);
+}
+
+/**
  * cpumask_xor - *dstp = *src1p ^ *src2p
  * @dstp: the cpumask result
  * @src1p: the first input
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -355,6 +355,12 @@ unsigned int __bitmap_weight_andnot(cons
 }
 EXPORT_SYMBOL(__bitmap_weight_andnot);
 
+unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1,
+				  const unsigned long *bitmap2, unsigned int bits)
+{
+	return BITMAP_WEIGHT(({dst[idx] = bitmap1[idx] | bitmap2[idx]; dst[idx]; }), bits);
+}
+
 void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
Re: [patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Shrikanth Hegde 3 months, 1 week ago

On 10/29/25 6:39 PM, Thomas Gleixner wrote:
> CID management OR's two cpumasks and then calculates the weight on the
> result. That's inefficient as that has to walk the same stuff twice. As
> this is done with runqueue lock held, there is a real benefit of speeding
> this up. Depending on the system this results in 10-20% less cycles spent
> with runqueue lock held for a 4K cpumask.
> 
> Provide cpumask_weighted_or() and the corresponding bitmap functions which
> return the weight of the OR result right away.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Reviewed-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
> ---
> V3: Rename again - Yury
> V2: Rename and use the BITMAP_WEIGHT() macro - Yury
> ---
>   include/linux/bitmap.h  |   15 +++++++++++++++
>   include/linux/cpumask.h |   16 ++++++++++++++++
>   lib/bitmap.c            |    6 ++++++
>   3 files changed, 37 insertions(+)
> 
   
>   /**
> + * cpumask_weighted_or - *dstp = *src1p | *src2p and return the weight of the result
> + * @dstp: the cpumask result
> + * @src1p: the first input
> + * @src2p: the second input
> + *
> + * Return: The number of bits set in the resulting cpumask @dstp
> + */
> +static __always_inline
> +unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
> +				 const struct cpumask *src2p)
> +{
> +	return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
> +				  cpumask_bits(src2p), small_cpumask_bits);
> +}

nit:

We have currently cpumask_weight_and & variants.
Wouldn't it be better to name it cpumask_weight_or ?
Re: [patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Thomas Gleixner 3 months, 1 week ago
On Mon, Nov 03 2025 at 14:45, Shrikanth Hegde wrote:
> On 10/29/25 6:39 PM, Thomas Gleixner wrote:
>> +static __always_inline
>> +unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
>> +				 const struct cpumask *src2p)
>> +{
>> +	return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
>> +				  cpumask_bits(src2p), small_cpumask_bits);
>> +}
>
> nit:
>
> We have currently cpumask_weight_and & variants.
> Wouldn't it be better to name it cpumask_weight_or ?

No. cpumask_weight_and() does weight(mask1 & mask2) but this does

    mask3 = mask1 | mask2;
    weight(mask3);

That's two very different things.
Re: [patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Peter Zijlstra 3 months ago
On Mon, Nov 03, 2025 at 02:29:59PM +0100, Thomas Gleixner wrote:
> On Mon, Nov 03 2025 at 14:45, Shrikanth Hegde wrote:
> > On 10/29/25 6:39 PM, Thomas Gleixner wrote:
> >> +static __always_inline
> >> +unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
> >> +				 const struct cpumask *src2p)
> >> +{
> >> +	return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
> >> +				  cpumask_bits(src2p), small_cpumask_bits);
> >> +}
> >
> > nit:
> >
> > We have currently cpumask_weight_and & variants.
> > Wouldn't it be better to name it cpumask_weight_or ?
> 
> No. cpumask_weight_and() does weight(mask1 & mask2) but this does

The comment was about naming, notable: s/_weighted_or/_weight_or/g to
better match the existing _weight_and().
Re: [patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Mathieu Desnoyers 3 months ago
On 2025-11-10 11:11, Peter Zijlstra wrote:
> On Mon, Nov 03, 2025 at 02:29:59PM +0100, Thomas Gleixner wrote:
>> On Mon, Nov 03 2025 at 14:45, Shrikanth Hegde wrote:
>>> On 10/29/25 6:39 PM, Thomas Gleixner wrote:
>>>> +static __always_inline
>>>> +unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
>>>> +				 const struct cpumask *src2p)
>>>> +{
>>>> +	return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
>>>> +				  cpumask_bits(src2p), small_cpumask_bits);
>>>> +}
>>>
>>> nit:
>>>
>>> We have currently cpumask_weight_and & variants.
>>> Wouldn't it be better to name it cpumask_weight_or ?
>>
>> No. cpumask_weight_and() does weight(mask1 & mask2) but this does
> 
> The comment was about naming, notable: s/_weighted_or/_weight_or/g to
> better match the existing _weight_and().

But if we go for "_weight_or" to match "_weight_and", we end up with
the following different semantics between "or" and "and":

cpumask_weight_and():
     inputs: mask1, mask2
     outputs: none

     return weight(mask1 & mask2);

cpumask_weight_or():
     inputs: mask1, mask2
     outputs: mask3

     mask3 = mask1 | mask2;
     return weight(mask3);

What we are trying to do here is apply a bitwise operation on two
inputs, write the resulting mask into mask3, *and* calculate the weight
as well, which is different from just calculating the weight.

Naming things is hard. I agree that the distinction between "weight" and
"weighted" is subtle.

Perhaps something along the lines of cpumask_eval_weight_or()
which would state the two operations performed (evaluate and calculate
the weight) could work ?

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Re: [patch V3 07/20] cpumask: Introduce cpumask_weighted_or()
Posted by Mathieu Desnoyers 3 months, 1 week ago
On 2025-10-29 09:09, Thomas Gleixner wrote:
[...]
> Provide cpumask_weighted_or() and the corresponding bitmap functions which
> return the weight of the OR result right away.

Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com