... and drop generic_hweight64().
This is identical on all architectures except ARM32. Add one extra SELF_TEST
to check that hweight64() works when the input is split in half.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Oleksii Kurochko <oleksii.kurochko@gmail.com>
CC: Shawn Anastasio <sanastasio@raptorengineering.com>
---
xen/arch/arm/include/asm/bitops.h | 1 -
xen/arch/ppc/include/asm/bitops.h | 1 -
xen/arch/x86/include/asm/bitops.h | 1 -
xen/common/bitops.c | 3 +++
xen/include/xen/bitops.h | 26 ++++++++------------------
5 files changed, 11 insertions(+), 21 deletions(-)
diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h
index 91cd167b6bbb..bed6b3b98e08 100644
--- a/xen/arch/arm/include/asm/bitops.h
+++ b/xen/arch/arm/include/asm/bitops.h
@@ -84,7 +84,6 @@ bool clear_mask16_timeout(uint16_t mask, volatile void *p,
*
* The Hamming Weight of a number is the total number of bits set in it.
*/
-#define hweight64(x) generic_hweight64(x)
#define hweight32(x) generic_hweight32(x)
#endif /* _ARM_BITOPS_H */
diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index 64512e949530..24dc35ef644d 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -132,7 +132,6 @@ static inline int test_and_set_bit(unsigned int nr, volatile void *addr)
*
* The Hamming Weight of a number is the total number of bits set in it.
*/
-#define hweight64(x) __builtin_popcountll(x)
#define hweight32(x) __builtin_popcount(x)
#endif /* _ASM_PPC_BITOPS_H */
diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h
index 4c5b21907a64..9d3a2448036e 100644
--- a/xen/arch/x86/include/asm/bitops.h
+++ b/xen/arch/x86/include/asm/bitops.h
@@ -481,7 +481,6 @@ static always_inline unsigned int arch_flsl(unsigned long x)
*
* The Hamming Weight of a number is the total number of bits set in it.
*/
-#define hweight64(x) generic_hweight64(x)
#define hweight32(x) generic_hweight32(x)
#endif /* _X86_BITOPS_H */
diff --git a/xen/common/bitops.c b/xen/common/bitops.c
index d0c268b4994a..f6a3eb5c9daf 100644
--- a/xen/common/bitops.c
+++ b/xen/common/bitops.c
@@ -117,6 +117,9 @@ static void __init test_hweight(void)
CHECK(hweightl, 1 | (1UL << (BITS_PER_LONG - 1)), 2);
CHECK(hweightl, -1UL, BITS_PER_LONG);
+
+ /* unsigned int hweight64(uint64_t) */
+ CHECK(hweight64, -1ULL, 64);
}
static void __init __constructor test_bitops(void)
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index 11a1c9130722..e97516552a2e 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -302,6 +302,14 @@ static always_inline __pure unsigned int hweightl(unsigned long x)
#endif
}
+static always_inline __pure unsigned int hweight64(uint64_t x)
+{
+ if ( BITS_PER_LONG == 64 )
+ return hweightl(x);
+ else
+ return hweightl(x >> 32) + hweightl(x);
+}
+
/* --------------------- Please tidy below here --------------------- */
#ifndef find_next_bit
@@ -389,24 +397,6 @@ static inline unsigned int generic_hweight32(unsigned int w)
return (w + (w >> 16)) & 0xff;
}
-static inline unsigned int generic_hweight64(uint64_t w)
-{
- if ( BITS_PER_LONG < 64 )
- return generic_hweight32(w >> 32) + generic_hweight32(w);
-
- w -= (w >> 1) & 0x5555555555555555UL;
- w = (w & 0x3333333333333333UL) + ((w >> 2) & 0x3333333333333333UL);
- w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0fUL;
-
- if ( IS_ENABLED(CONFIG_HAS_FAST_MULTIPLY) )
- return (w * 0x0101010101010101UL) >> 56;
-
- w += w >> 8;
- w += w >> 16;
-
- return (w + (w >> 32)) & 0xFF;
-}
-
/*
* rol32 - rotate a 32-bit value left
*
--
2.39.2
On 23.08.2024 01:06, Andrew Cooper wrote: > ... and drop generic_hweight64(). > > This is identical on all architectures except ARM32. Add one extra SELF_TEST > to check that hweight64() works when the input is split in half. > > No functional change. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> with one remark: > --- a/xen/include/xen/bitops.h > +++ b/xen/include/xen/bitops.h > @@ -302,6 +302,14 @@ static always_inline __pure unsigned int hweightl(unsigned long x) > #endif > } > > +static always_inline __pure unsigned int hweight64(uint64_t x) > +{ > + if ( BITS_PER_LONG == 64 ) > + return hweightl(x); > + else > + return hweightl(x >> 32) + hweightl(x); This assume BITS_PER_LONG == 32, which of course is true right now, but doesn't need to be in general. Better add an explicit cast to uint32_t (or masking by 0xffffffffU)? Jan
On 26/08/2024 12:55 pm, Jan Beulich wrote: > On 23.08.2024 01:06, Andrew Cooper wrote: >> ... and drop generic_hweight64(). >> >> This is identical on all architectures except ARM32. Add one extra SELF_TEST >> to check that hweight64() works when the input is split in half. >> >> No functional change. >> >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > Reviewed-by: Jan Beulich <jbeulich@suse.com> Thanks. > with one remark: > >> --- a/xen/include/xen/bitops.h >> +++ b/xen/include/xen/bitops.h >> @@ -302,6 +302,14 @@ static always_inline __pure unsigned int hweightl(unsigned long x) >> #endif >> } >> >> +static always_inline __pure unsigned int hweight64(uint64_t x) >> +{ >> + if ( BITS_PER_LONG == 64 ) >> + return hweightl(x); >> + else >> + return hweightl(x >> 32) + hweightl(x); > This assume BITS_PER_LONG == 32, which of course is true right now, but > doesn't need to be in general. Better add an explicit cast to uint32_t > (or masking by 0xffffffffU)? This is part of the point of putting in the self-tests. They're intended to catch things like this in new build environments. Although, I think we've got enough cases which will #error on BITS_PER_LONG not being 32 or 64. Again, this is modelled after f[fl]s64() which have the same expectations about the BITS_PER_LONG != 64 case. ~Andrew
On 27.08.2024 13:50, Andrew Cooper wrote: > On 26/08/2024 12:55 pm, Jan Beulich wrote: >> On 23.08.2024 01:06, Andrew Cooper wrote: >>> --- a/xen/include/xen/bitops.h >>> +++ b/xen/include/xen/bitops.h >>> @@ -302,6 +302,14 @@ static always_inline __pure unsigned int hweightl(unsigned long x) >>> #endif >>> } >>> >>> +static always_inline __pure unsigned int hweight64(uint64_t x) >>> +{ >>> + if ( BITS_PER_LONG == 64 ) >>> + return hweightl(x); >>> + else >>> + return hweightl(x >> 32) + hweightl(x); >> This assume BITS_PER_LONG == 32, which of course is true right now, but >> doesn't need to be in general. Better add an explicit cast to uint32_t >> (or masking by 0xffffffffU)? > > This is part of the point of putting in the self-tests. They're > intended to catch things like this in new build environments. I don't think I saw any testcase where the result would be wrong if this split didn't truncate x to the low 32 bits on the rhs of the +. > Although, I think we've got enough cases which will #error on > BITS_PER_LONG not being 32 or 64. My take on this is: Such checks (#error or whatever else precaution) should like in every single place where violating the assumptions made would matter. Or else - how do you locate all the places that need changing? > Again, this is modelled after f[fl]s64() which have the same > expectations about the BITS_PER_LONG != 64 case. Both of them are fine afaict. fls64() has an explicit intermediate variable of type uint32_t, and ffs64() has a (uint32_t)x as part of the conditional expression achieving the intended effect. Anyway, why not use hweight32() instead of hweightl() here? That'll make things very explicit. Jan
On 27/08/2024 2:00 pm, Jan Beulich wrote: > On 27.08.2024 13:50, Andrew Cooper wrote: >> On 26/08/2024 12:55 pm, Jan Beulich wrote: >>> On 23.08.2024 01:06, Andrew Cooper wrote: >>>> --- a/xen/include/xen/bitops.h >>>> +++ b/xen/include/xen/bitops.h >>>> @@ -302,6 +302,14 @@ static always_inline __pure unsigned int hweightl(unsigned long x) >>>> #endif >>>> } >>>> >>>> +static always_inline __pure unsigned int hweight64(uint64_t x) >>>> +{ >>>> + if ( BITS_PER_LONG == 64 ) >>>> + return hweightl(x); >>>> + else >>>> + return hweightl(x >> 32) + hweightl(x); >>> This assume BITS_PER_LONG == 32, which of course is true right now, but >>> doesn't need to be in general. Better add an explicit cast to uint32_t >>> (or masking by 0xffffffffU)? >> This is part of the point of putting in the self-tests. They're >> intended to catch things like this in new build environments. > I don't think I saw any testcase where the result would be wrong if > this split didn't truncate x to the low 32 bits on the rhs of the +. That's arguably an error in the choice of test cases. Although, they're just my best guesses at some > >> Although, I think we've got enough cases which will #error on >> BITS_PER_LONG not being 32 or 64. > My take on this is: Such checks (#error or whatever else precaution) > should like in every single place where violating the assumptions > made would matter. Or else - how do you locate all the places that > need changing? Whomever gets to add RISCV-128 support will have to inspect every use of BITS_PER_LONG, irrespective of #error/BUILD_BUG_ON()/etc. So, the answer is `grep`. I'm not advocating that we stop helping out with #error, but it's unrealistic to expect that only addressing the build errors will result in a working Xen for BITS_PER_LONG==128. > >> Again, this is modelled after f[fl]s64() which have the same >> expectations about the BITS_PER_LONG != 64 case. > Both of them are fine afaict. fls64() has an explicit intermediate > variable of type uint32_t, and ffs64() has a (uint32_t)x as part > of the conditional expression achieving the intended effect. > > Anyway, why not use hweight32() instead of hweightl() here? That'll > make things very explicit. hweight32() doesn't exist until the next patch in the series. Although looking at the end result, I can't figure out why I thought it was necessary to transform hweight64 first. I'll swap this patch and the next one, and then use hweight32(). ~Andrew
On 27/08/2024 2:25 pm, Andrew Cooper wrote: > On 27/08/2024 2:00 pm, Jan Beulich wrote: >> On 27.08.2024 13:50, Andrew Cooper wrote: >>> On 26/08/2024 12:55 pm, Jan Beulich wrote: >>>> On 23.08.2024 01:06, Andrew Cooper wrote: >>> Again, this is modelled after f[fl]s64() which have the same >>> expectations about the BITS_PER_LONG != 64 case. >> Both of them are fine afaict. fls64() has an explicit intermediate >> variable of type uint32_t, and ffs64() has a (uint32_t)x as part >> of the conditional expression achieving the intended effect. >> >> Anyway, why not use hweight32() instead of hweightl() here? That'll >> make things very explicit. > hweight32() doesn't exist until the next patch in the series. > > Although looking at the end result, I can't figure out why I thought it > was necessary to transform hweight64 first. > > I'll swap this patch and the next one, and then use hweight32(). I've found out why. The hweight32() patch is the one that deletes generic_hweight32(), but generic_hweight64() uses it. I can work around this, but it means keeping generic_hweight32() around and deleting it in the hweight64() patch. ~Andrew
On 27.08.2024 16:32, Andrew Cooper wrote: > On 27/08/2024 2:25 pm, Andrew Cooper wrote: >> On 27/08/2024 2:00 pm, Jan Beulich wrote: >>> On 27.08.2024 13:50, Andrew Cooper wrote: >>>> On 26/08/2024 12:55 pm, Jan Beulich wrote: >>>>> On 23.08.2024 01:06, Andrew Cooper wrote: >>>> Again, this is modelled after f[fl]s64() which have the same >>>> expectations about the BITS_PER_LONG != 64 case. >>> Both of them are fine afaict. fls64() has an explicit intermediate >>> variable of type uint32_t, and ffs64() has a (uint32_t)x as part >>> of the conditional expression achieving the intended effect. >>> >>> Anyway, why not use hweight32() instead of hweightl() here? That'll >>> make things very explicit. >> hweight32() doesn't exist until the next patch in the series. >> >> Although looking at the end result, I can't figure out why I thought it >> was necessary to transform hweight64 first. >> >> I'll swap this patch and the next one, and then use hweight32(). > > I've found out why. > > The hweight32() patch is the one that deletes generic_hweight32(), but > generic_hweight64() uses it. > > I can work around this, but it means keeping generic_hweight32() around > and deleting it in the hweight64() patch. Or simply fold both patches? Jan
© 2016 - 2024 Red Hat, Inc.