bitmap: introduce bitmap_weight_from()

[PATCH 1/2] bitmap: add bitmap_weight_from()

Posted by Yury Norov 2 months, 2 weeks ago

From: Yury Norov (NVIDIA) <yury.norov@gmail.com>

bitmap_weight_from is useful in topo_unit_count() and potentially
more spots.

Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
---
 include/linux/bitmap.h | 11 +++++++++++
 lib/bitmap.c           | 28 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 595217b7a6e7..3cde3bd766b7 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -56,6 +56,7 @@ struct device;
  *  bitmap_weight(src, nbits)                   Hamming Weight: number set bits
  *  bitmap_weight_and(src1, src2, nbits)        Hamming Weight of and'ed bitmap
  *  bitmap_weight_andnot(src1, src2, nbits)     Hamming Weight of andnot'ed bitmap
+ *  bitmap_weight_from(src, start, nbits)       Hamming Weight starting from @start
  *  bitmap_set(dst, pos, nbits)                 Set specified bit area
  *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
  *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
@@ -181,6 +182,8 @@ unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
 				 const unsigned long *bitmap2, unsigned int nbits);
 unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
 				    const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weight_from(const unsigned long *bitmap,
+					unsigned int start, unsigned int nbits);
 void __bitmap_set(unsigned long *map, unsigned int start, int len);
 void __bitmap_clear(unsigned long *map, unsigned int start, int len);
 
@@ -446,6 +449,14 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits)
 	return __bitmap_weight(src, nbits);
 }
 
+static __always_inline
+unsigned int bitmap_weight_from(const unsigned long *src, unsigned int start, unsigned int nbits)
+{
+	if (small_const_nbits(start + nbits - 1))
+		return hweight_long(*src & GENMASK(start + nbits - 1, start));
+	return __bitmap_weight_from(src, start, nbits);
+}
+
 static __always_inline
 unsigned long bitmap_weight_and(const unsigned long *src1,
 				const unsigned long *src2, unsigned int nbits)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index b97692854966..eb9905071e3b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -335,12 +335,40 @@ EXPORT_SYMBOL(__bitmap_subset);
 	w;									\
 })
 
+#define BITMAP_WEIGHT_FROM(FETCH, start, bits)					\
+({										\
+	unsigned long __start = (start), __bits = (bits);			\
+	unsigned int idx, w = 0;						\
+										\
+	if (unlikely(__start >= bits))						\
+		goto out;							\
+										\
+	idx = __start / BITS_PER_LONG;						\
+	w = (FETCH) & BITMAP_FIRST_WORD_MASK(__start);				\
+										\
+	for (++idx; idx < __bits / BITS_PER_LONG; idx++)			\
+		w += hweight_long(FETCH);					\
+										\
+	if (__bits % BITS_PER_LONG)						\
+		w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK(__bits));	\
+										\
+out:										\
+	w;									\
+})
+
 unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
 	return BITMAP_WEIGHT(bitmap[idx], bits);
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
+unsigned int __bitmap_weight_from(const unsigned long *bitmap,
+					unsigned int start, unsigned int bits)
+{
+	return BITMAP_WEIGHT_FROM(bitmap[idx], start, bits);
+}
+EXPORT_SYMBOL(__bitmap_weight_from);
+
 unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
 				const unsigned long *bitmap2, unsigned int bits)
 {
-- 
2.43.0

Re: [PATCH 1/2] bitmap: add bitmap_weight_from()

Posted by Thomas Gleixner 2 months, 2 weeks ago

On Sat, Jul 19 2025 at 21:41, Yury Norov wrote:
>  
> +#define BITMAP_WEIGHT_FROM(FETCH, start, bits)				\
> +({										\
> +	unsigned long __start = (start), __bits = (bits);			\
> +	unsigned int idx, w = 0;						\
> +										\
> +	if (unlikely(__start >= bits))						\
> +		goto out;							\
> +										\
> +	idx = __start / BITS_PER_LONG;						\
> +	w = (FETCH) & BITMAP_FIRST_WORD_MASK(__start);				\

So this expands to

        w = bitmap[idx] & (~0UL << ((start) & (BITS_PER_LONG - 1)));

Which means @w contains the content of the first bitmap word except for
the masked off bits. Let's assume @start is 0 and @bits is 32. Therefore
@idx is 0.

Assume further bitmap[idx] is all ones, which means 64bits set on a
64bit system. That results in

      w = bitmap[0] & (~0UL << ((0) & (BITS_PER_LONG - 1)));
-->   w = 0xFFFFFFFFFFFFFFFF & (0xFFFFFFFFFFFFFFFF << (0 & 0x3F));
-->   w = 0xFFFFFFFFFFFFFFFF;

which is obviously bogus.

> +	for (++idx; idx < __bits / BITS_PER_LONG; idx++)			\
> +		w += hweight_long(FETCH);					\

Evaluates to false

> +	if (__bits % BITS_PER_LONG)						\

Evaluates to true.

> +		w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK(__bits));	\

So this is executed and evaluates to:

      w += hweight_long(bitmap[1] & (~0UL >> (-(32UL) & (BITS_PER_LONG - 1))));

Let's assume the second word contains all ones as well.

-->   w += hweight_long(0xFFFFFFFFFFFFFFFF & (0xFFFFFFFFFFFFFFFF >> (0xFFFFFFFFFFFFFFE0 & 0x3F)));
-->   w += hweight_long(0xFFFFFFFFFFFFFFFF & (0xFFFFFFFFFFFFFFFF >> (0x20)));
-->   w += hweight_long(0xFFFFFFFFFFFFFFFF & 0xFFFFFFFF);

-->   w += 32;

Due to the wraparound of the addition it results in

      w = 31

which is not making the bogosity above more correct. And no, you can't
just fix up the initial assignment to @w:

	w = hweight_long((FETCH) & BITMAP_FIRST_WORD_MASK(__start);

because then the result is 32 + 32 == 64 as the final clause is
unconditionally executed.

Something like this should work:

        unsigned int idx, maxidx, w = 0;

	idx = start / BITS_PER_LONG;
	w = hweight_long((FETCH) & BITMAP_FIRST_WORD_MASK((unsigned long)start));

        maxidx = bits / BITS_PER_LONG;
        for (idx++; idx < maxidx; idx++)
        	w += hweight_long((FETCH));

        if (maxidx * BITS_PER_LONG < bits)
        	w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK((unsigned long)bits));
        
No?

Thanks,

        tglx

[PATCH 1/2] bitmap: add bitmap_weight_from()
[PATCH 2/2] x86: topology: simplify topo_unit_count()