[v3] xen/bitops: hweight() cleanup/improvements

[PATCH v3 4/5] xen/bitops: Implement hweight64() in terms of hweight{l,32}()

Posted by Andrew Cooper 2 months, 2 weeks ago

... and drop generic_hweight{32,64}().

This is identical on all architectures except ARM32.  Add one extra SELF_TEST
to check that hweight64() works when the input is split in half.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Oleksii Kurochko <oleksii.kurochko@gmail.com>
CC: Shawn Anastasio <sanastasio@raptorengineering.com>

v3:
 * Use >=, even if it does differ from ffs()
v2:
 * Reorder with respect to the hweight32() patch
 * s/__pure/attr_const/
---
 xen/arch/arm/include/asm/bitops.h |  8 ------
 xen/arch/ppc/include/asm/bitops.h |  8 ------
 xen/arch/x86/include/asm/bitops.h |  8 ------
 xen/common/bitops.c               |  3 +++
 xen/include/xen/bitops.h          | 45 ++++++-------------------------
 5 files changed, 11 insertions(+), 61 deletions(-)

diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h
index b28c25b3d52d..f163d9bb4578 100644
--- a/xen/arch/arm/include/asm/bitops.h
+++ b/xen/arch/arm/include/asm/bitops.h
@@ -78,14 +78,6 @@ bool clear_mask16_timeout(uint16_t mask, volatile void *p,
 #define arch_fls(x)  ((x) ? 32 - __builtin_clz(x) : 0)
 #define arch_flsl(x) ((x) ? BITS_PER_LONG - __builtin_clzl(x) : 0)
 
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-#define hweight64(x) generic_hweight64(x)
-
 #endif /* _ARM_BITOPS_H */
 /*
  * Local variables:
diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index f488a7c03425..c942e9432e20 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -126,12 +126,4 @@ static inline int test_and_set_bit(unsigned int nr, volatile void *addr)
 
 #define arch_hweightl(x) __builtin_popcountl(x)
 
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-#define hweight64(x) __builtin_popcountll(x)
-
 #endif /* _ASM_PPC_BITOPS_H */
diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h
index 507b043b8a86..642d8e58b288 100644
--- a/xen/arch/x86/include/asm/bitops.h
+++ b/xen/arch/x86/include/asm/bitops.h
@@ -475,12 +475,4 @@ static always_inline unsigned int arch_flsl(unsigned long x)
 }
 #define arch_flsl arch_flsl
 
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-#define hweight64(x) generic_hweight64(x)
-
 #endif /* _X86_BITOPS_H */
diff --git a/xen/common/bitops.c b/xen/common/bitops.c
index 5e5d20d225d7..91ae961440af 100644
--- a/xen/common/bitops.c
+++ b/xen/common/bitops.c
@@ -144,6 +144,9 @@ static void __init test_hweight(void)
 
     CHECK(hweightl, 1 | (1UL << (BITS_PER_LONG - 1)), 2);
     CHECK(hweightl, -1UL, BITS_PER_LONG);
+
+    /* unsigned int hweight64(uint64_t) */
+    CHECK(hweight64, -1ULL, 64);
 }
 
 static void __init __constructor test_bitops(void)
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index a462c3065158..12eb88fb1255 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -331,6 +331,14 @@ static always_inline attr_const unsigned int hweight32(uint32_t x)
     return hweightl(x);
 }
 
+static always_inline attr_const unsigned int hweight64(uint64_t x)
+{
+    if ( BITS_PER_LONG >= 64 )
+        return hweightl(x);
+    else
+        return hweight32(x >> 32) + hweight32(x);
+}
+
 /* --------------------- Please tidy below here --------------------- */
 
 #ifndef find_next_bit
@@ -399,43 +407,6 @@ static inline int get_count_order(unsigned int count)
     return order;
 }
 
-/*
- * hweightN: returns the hamming weight (i.e. the number
- * of bits set) of a N-bit word
- */
-
-static inline unsigned int generic_hweight32(unsigned int w)
-{
-    w -= (w >> 1) & 0x55555555;
-    w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
-    w =  (w + (w >> 4)) & 0x0f0f0f0f;
-
-    if ( IS_ENABLED(CONFIG_HAS_FAST_MULTIPLY) )
-        return (w * 0x01010101) >> 24;
-
-    w += w >> 8;
-
-    return (w + (w >> 16)) & 0xff;
-}
-
-static inline unsigned int generic_hweight64(uint64_t w)
-{
-    if ( BITS_PER_LONG < 64 )
-        return generic_hweight32(w >> 32) + generic_hweight32(w);
-
-    w -= (w >> 1) & 0x5555555555555555UL;
-    w =  (w & 0x3333333333333333UL) + ((w >> 2) & 0x3333333333333333UL);
-    w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0fUL;
-
-    if ( IS_ENABLED(CONFIG_HAS_FAST_MULTIPLY) )
-        return (w * 0x0101010101010101UL) >> 56;
-
-    w += w >> 8;
-    w += w >> 16;
-
-    return (w + (w >> 32)) & 0xFF;
-}
-
 /*
  * rol32 - rotate a 32-bit value left
  *
-- 
2.39.2

Re: [PATCH v3 4/5] xen/bitops: Implement hweight64() in terms of hweight{l,32}()

Posted by Stefano Stabellini 2 months, 2 weeks ago

On Wed, 4 Sep 2024, Andrew Cooper wrote:
> ... and drop generic_hweight{32,64}().
> 
> This is identical on all architectures except ARM32.  Add one extra SELF_TEST
> to check that hweight64() works when the input is split in half.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>

[PATCH v3 1/5] xen/bitops: Reinstate the please tidy message
[PATCH v3 2/5] xen/bitops: Drop the remnants of hweight{8,16}()
[PATCH v3 3/5] xen/bitops: Implement hweight32() in terms of hweightl()
[PATCH v3 4/5] xen/bitops: Implement hweight64() in terms of hweight{l,32}()
[PATCH v3 5/5] x86/bitops: Use the POPCNT instruction when available