arch/x86/include/asm/bitops.h | 9 +++++++++ 1 file changed, 9 insertions(+)
Similar to commit fdb6649ab7c1 ("x86/asm/bitops: Use __builtin_ctzl() to
evaluate constant expressions") and commit 2fcff790dcb4 ("powerpc: Use
builtin functions for fls()/__fls()/fls64()").
From a recent discussion, I noticed that x86 is lacking an optimization
that appears in arch/powerpc/include/asm/bitops.h related to constant
folding. If you add a BUILD_BUG_ON(__builtin_constant_p(param)) to
these functions, you'll find that there were cases where the use of
inline asm pessimized the compiler's ability to perform constant folding
resulting in runtime calculation of a value that could have been
computed at compile time.
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/llvm/CAKwvOdm_y6UOnxFrAiDxou2jc8CRUvyhfH9kAdc3PG0=bEvduw@mail.gmail.com/
---
arch/x86/include/asm/bitops.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2edf68475fec..50e5ebf9d0a0 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -293,6 +293,9 @@ static __always_inline unsigned long variable_ffz(unsigned long word)
*/
static __always_inline unsigned long __fls(unsigned long word)
{
+ if (__builtin_constant_p(word))
+ return BITS_PER_LONG - 1 - __builtin_clzl(word);
+
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
@@ -360,6 +363,9 @@ static __always_inline int fls(unsigned int x)
{
int r;
+ if (__builtin_constant_p(x))
+ return x ? 32 - __builtin_clz(x) : 0;
+
#ifdef CONFIG_X86_64
/*
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
@@ -401,6 +407,9 @@ static __always_inline int fls(unsigned int x)
static __always_inline int fls64(__u64 x)
{
int bitpos = -1;
+
+ if (__builtin_constant_p(x))
+ return x ? 64 - __builtin_clzll(x) : 0;
/*
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
---
base-commit: 2dde18cd1d8fac735875f2e4987f11817cc0bc2c
change-id: 20230828-x86_fls-cf2cc49f40f3
Best regards,
--
Nick Desaulniers <ndesaulniers@google.com>
The following commit has been merged into the x86/asm branch of tip:
Commit-ID: 3dae5c43badf285e22f6d88388e8a232a83bdfec
Gitweb: https://git.kernel.org/tip/3dae5c43badf285e22f6d88388e8a232a83bdfec
Author: Nick Desaulniers <ndesaulniers@google.com>
AuthorDate: Mon, 28 Aug 2023 11:53:57 -07:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 07 Sep 2023 00:05:50 +02:00
x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions
Micro-optimize the bitops code some more, similar to commits:
fdb6649ab7c1 ("x86/asm/bitops: Use __builtin_ctzl() to evaluate constant expressions")
2fcff790dcb4 ("powerpc: Use builtin functions for fls()/__fls()/fls64()")
>From a recent discussion, I noticed that x86 is lacking an optimization
that appears in arch/powerpc/include/asm/bitops.h related to constant
folding. If you add a BUILD_BUG_ON(__builtin_constant_p(param)) to
these functions, you'll find that there were cases where the use of
inline asm pessimized the compiler's ability to perform constant folding
resulting in runtime calculation of a value that could have been
computed at compile time.
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230828-x86_fls-v1-1-e6a31b9f79c3@google.com
---
arch/x86/include/asm/bitops.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2edf684..50e5ebf 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -293,6 +293,9 @@ static __always_inline unsigned long variable_ffz(unsigned long word)
*/
static __always_inline unsigned long __fls(unsigned long word)
{
+ if (__builtin_constant_p(word))
+ return BITS_PER_LONG - 1 - __builtin_clzl(word);
+
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
@@ -360,6 +363,9 @@ static __always_inline int fls(unsigned int x)
{
int r;
+ if (__builtin_constant_p(x))
+ return x ? 32 - __builtin_clz(x) : 0;
+
#ifdef CONFIG_X86_64
/*
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
@@ -401,6 +407,9 @@ static __always_inline int fls(unsigned int x)
static __always_inline int fls64(__u64 x)
{
int bitpos = -1;
+
+ if (__builtin_constant_p(x))
+ return x ? 64 - __builtin_clzll(x) : 0;
/*
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
© 2016 - 2025 Red Hat, Inc.