From: Oleksii Kurochko <oleksii.kurochko@gmail.com>
This is most easily done together because of how arm32 is currently
structured, but it does just mirror the existing ffs()/ffsl() work.
Introduce compile and boot time testing.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Oleksii Kurochko <oleksii.kurochko@gmail.com>
CC: Shawn Anastasio <sanastasio@raptorengineering.com>
CC: consulting@bugseng.com <consulting@bugseng.com>
CC: Simone Ballarin <simone.ballarin@bugseng.com>
CC: Federico Serafini <federico.serafini@bugseng.com>
CC: Nicola Vetrini <nicola.vetrini@bugseng.com>
v2:
* New, incorperated from Oleksii's RISC-V series and adjusted.
for x86:
add/remove: 0/0 grow/shrink: 3/17 up/down: 28/-153 (-125)
Function old new delta
pci_enable_msi 1033 1049 +16
vlapic_lowest_prio 330 338 +8
kexec_early_calculations 53 57 +4
pci_restore_msi_state 1159 1157 -2
arch_hwdom_irqs 61 59 -2
control_read 132 129 -3
pci_enable_msi.cold 121 117 -4
arch_get_dma_bitsize 173 169 -4
xmem_pool_alloc 1039 1032 -7
xenheap_max_mfn 49 42 -7
mba_sanitize_thrtl 83 76 -7
xstate_init 807 799 -8
offline_page 965 957 -8
apicid_to_socket 160 152 -8
vlapic_find_highest_vector 61 48 -13
xmem_pool_free 983 967 -16
iommu_alloc 935 919 -16
free_heap_pages 1512 1496 -16
detect_ht 318 302 -16
alloc_heap_pages 1569 1553 -16
showing that the optimiser can now do a better job in most cases.
---
xen/arch/arm/include/asm/arm32/bitops.h | 2 --
xen/arch/arm/include/asm/arm64/bitops.h | 12 -------
xen/arch/arm/include/asm/bitops.h | 19 ++--------
xen/arch/ppc/include/asm/bitops.h | 4 +--
xen/arch/x86/include/asm/bitops.h | 46 +++++++++++++++----------
xen/common/bitops.c | 25 ++++++++++++++
xen/include/xen/bitops.h | 24 +++++++++++++
7 files changed, 80 insertions(+), 52 deletions(-)
diff --git a/xen/arch/arm/include/asm/arm32/bitops.h b/xen/arch/arm/include/asm/arm32/bitops.h
index d0309d47c188..0d7bb12d5c19 100644
--- a/xen/arch/arm/include/asm/arm32/bitops.h
+++ b/xen/arch/arm/include/asm/arm32/bitops.h
@@ -1,8 +1,6 @@
#ifndef _ARM_ARM32_BITOPS_H
#define _ARM_ARM32_BITOPS_H
-#define flsl fls
-
/*
* Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
*/
diff --git a/xen/arch/arm/include/asm/arm64/bitops.h b/xen/arch/arm/include/asm/arm64/bitops.h
index 906d84e5f295..a6135838dcfa 100644
--- a/xen/arch/arm/include/asm/arm64/bitops.h
+++ b/xen/arch/arm/include/asm/arm64/bitops.h
@@ -1,18 +1,6 @@
#ifndef _ARM_ARM64_BITOPS_H
#define _ARM_ARM64_BITOPS_H
-static inline int flsl(unsigned long x)
-{
- uint64_t ret;
-
- if (__builtin_constant_p(x))
- return generic_flsl(x);
-
- asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
-
- return BITS_PER_LONG - ret;
-}
-
/* Based on linux/include/asm-generic/bitops/find.h */
#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h
index d30ba44598e3..8f4bdc09d128 100644
--- a/xen/arch/arm/include/asm/bitops.h
+++ b/xen/arch/arm/include/asm/bitops.h
@@ -140,25 +140,10 @@ static inline int test_bit(int nr, const volatile void *addr)
return 1UL & (p[BITOP_WORD(nr)] >> (nr & (BITOP_BITS_PER_WORD-1)));
}
-/*
- * On ARMv5 and above those functions can be implemented around
- * the clz instruction for much better code efficiency.
- */
-
-static inline int fls(unsigned int x)
-{
- int ret;
-
- if (__builtin_constant_p(x))
- return generic_flsl(x);
-
- asm("clz\t%"__OP32"0, %"__OP32"1" : "=r" (ret) : "r" (x));
- return 32 - ret;
-}
-
-
#define arch_ffs(x) ((x) ? 1 + __builtin_ctz(x) : 0)
#define arch_ffsl(x) ((x) ? 1 + __builtin_ctzl(x) : 0)
+#define arch_fls(x) ((x) ? 32 - __builtin_clz(x) : 0)
+#define arch_flsl(x) ((x) ? BITS_PER_LONG - __builtin_clzl(x) : 0)
/**
* hweightN - returns the hamming weight of a N-bit word
diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index 761361291e6f..8119b5ace877 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -171,10 +171,10 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr)
return (old & mask) != 0;
}
-#define flsl(x) generic_flsl(x)
-#define fls(x) generic_flsl(x)
#define arch_ffs(x) ((x) ? 1 + __builtin_ctz(x) : 0)
#define arch_ffsl(x) ((x) ? 1 + __builtin_ctzl(x) : 0)
+#define arch_fls(x) ((x) ? 32 - __builtin_clz(x) : 0)
+#define arch_flsl(x) ((x) ? BITS_PER_LONG - __builtin_clzl(x) : 0)
/**
* hweightN - returns the hamming weight of a N-bit word
diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h
index 830e488f33a0..fc9fe73ad5ba 100644
--- a/xen/arch/x86/include/asm/bitops.h
+++ b/xen/arch/x86/include/asm/bitops.h
@@ -447,33 +447,41 @@ static always_inline unsigned int arch_ffsl(unsigned long x)
}
#define arch_ffsl arch_ffsl
-/**
- * fls - find last bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- */
-static inline int flsl(unsigned long x)
+static always_inline unsigned int arch_fls(unsigned int x)
{
- long r;
+ unsigned int r;
+
+ /* See arch_ffs() for safety discussions. */
+ if ( __builtin_constant_p(x > 0) && x > 0 )
+ asm ( "bsr %[val], %[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x) );
+ else
+ asm ( "bsr %[val], %[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x), "[res]" (-1) );
- asm ( "bsr %1,%0\n\t"
- "jnz 1f\n\t"
- "mov $-1,%0\n"
- "1:" : "=r" (r) : "rm" (x));
- return (int)r+1;
+ return r + 1;
}
+#define arch_fls arch_fls
-static inline int fls(unsigned int x)
+static always_inline unsigned int arch_flsl(unsigned long x)
{
- int r;
+ unsigned int r;
+
+ /* See arch_ffs() for safety discussions. */
+ if ( __builtin_constant_p(x > 0) && x > 0 )
+ asm ( "bsr %[val], %q[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x) );
+ else
+ asm ( "bsr %[val], %q[res]"
+ : [res] "=r" (r)
+ : [val] "rm" (x), "[res]" (-1) );
- asm ( "bsr %1,%0\n\t"
- "jnz 1f\n\t"
- "mov $-1,%0\n"
- "1:" : "=r" (r) : "rm" (x));
return r + 1;
}
+#define arch_flsl arch_flsl
/**
* hweightN - returns the hamming weight of a N-bit word
diff --git a/xen/common/bitops.c b/xen/common/bitops.c
index b3813f818198..b4845d9e84d1 100644
--- a/xen/common/bitops.c
+++ b/xen/common/bitops.c
@@ -26,7 +26,32 @@ static void __init test_ffs(void)
#endif
}
+static void __init test_fls(void)
+{
+ /* unsigned int fls(unsigned int) */
+ CHECK(fls, 0, 0);
+ CHECK(fls, 1, 1);
+ CHECK(fls, 3, 2);
+ CHECK(fls, 7, 3);
+ CHECK(fls, 6, 3);
+ CHECK(fls, 0x80000000U, 32);
+
+ /* unsigned int flsl(unsigned long) */
+ CHECK(flsl, 0, 0);
+ CHECK(flsl, 1, 1);
+ CHECK(flsl, 3, 2);
+ CHECK(flsl, 7, 3);
+ CHECK(flsl, 6, 3);
+
+ CHECK(flsl, 1 | (1UL << (BITS_PER_LONG - 1)), BITS_PER_LONG);
+#if BITS_PER_LONG > 32
+ CHECK(flsl, 1 | (1UL << 32), 33);
+ CHECK(flsl, 1 | (1UL << 63), 64);
+#endif
+}
+
static void __init __constructor test_bitops(void)
{
test_ffs();
+ test_fls();
}
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index 88cf27a88bcf..e7df6377372d 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -60,6 +60,30 @@ static always_inline __pure unsigned int ffsl(unsigned long x)
#endif
}
+static always_inline __pure unsigned int fls(unsigned int x)
+{
+ if ( __builtin_constant_p(x) )
+ return x ? 32 - __builtin_clz(x) : 0;
+
+#ifdef arch_fls
+ return arch_fls(x);
+#else
+ return generic_flsl(x);
+#endif
+}
+
+static always_inline __pure unsigned int flsl(unsigned long x)
+{
+ if ( __builtin_constant_p(x) )
+ return x ? BITS_PER_LONG - __builtin_clzl(x) : 0;
+
+#ifdef arch_fls
+ return arch_flsl(x);
+#else
+ return generic_flsl(x);
+#endif
+}
+
/* --------------------- Please tidy below here --------------------- */
#ifndef find_next_bit
--
2.30.2
On 24.05.2024 22:03, Andrew Cooper wrote: > From: Oleksii Kurochko <oleksii.kurochko@gmail.com> > > This is most easily done together because of how arm32 is currently > structured, but it does just mirror the existing ffs()/ffsl() work. > > Introduce compile and boot time testing. > > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> with small adjustments possibly to be done on the earlier similar patches also done here. Jan
© 2016 - 2024 Red Hat, Inc.