Introduce convenience helpers use_alternative_likely() and
use_alternative_unlikely() to implement the pattern of using asm goto to
check if an alternative is selected. Existing code will be converted in
subsequent patches.
Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv,
alternatives are not all CPU capabilities.
Suggested-by: Aydın Mercan <aydin@mercan.dev>
Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
---
arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -158,4 +158,77 @@
_ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2)
+/*
+ * use_alternative_{likely,unlikely}() returns true if the alternative is
+ * applied and false otherwise, but in a way where the compiler can optimize
+ * this check down to a nop instruction that's patched into a jump, or vice
+ * versa.
+ *
+ * Always returns false if the alternatives mechanism is not available.
+ *
+ * Usage example:
+ * if (use_alternative_likely(0, RISCV_ISA_ZBB))
+ *
+ * Similar to static keys, "likely" means use a nop if the alternative is
+ * selected, and jump if unselected; "unlikely" is the other way around.
+ */
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_ALTERNATIVE
+
+static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
+ BUILD_BUG_ON(!__builtin_constant_p(patch_id));
+
+ asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1)
+ :
+ : [vendor_id] "i"(vendor_id),
+ [patch_id] "i"(patch_id)
+ :
+ : no_alt);
+
+ return true;
+
+no_alt:
+ return false;
+}
+
+static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
+ BUILD_BUG_ON(!__builtin_constant_p(patch_id));
+
+ asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1)
+ :
+ : [vendor_id] "i"(vendor_id),
+ [patch_id] "i"(patch_id)
+ :
+ : alt);
+
+ return false;
+
+alt:
+ return true;
+}
+
+#else
+
+static inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
+{
+ return false;
+}
+
+static inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
+{
+ return false;
+}
+
+#endif /* CONFIG_RISCV_ALTERNATIVE */
+
+#endif /* __ASSEMBLER__ */
+
#endif
--
2.50.1
On Wed, Aug 20, 2025 at 09:44:45PM +0800, Vivian Wang wrote: > Introduce convenience helpers use_alternative_likely() and > use_alternative_unlikely() to implement the pattern of using asm goto to > check if an alternative is selected. Existing code will be converted in > subsequent patches. > > Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv, > alternatives are not all CPU capabilities. > > Suggested-by: Aydın Mercan <aydin@mercan.dev> > Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn> > --- > arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++ > 1 file changed, 73 insertions(+) > > diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h > index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644 > --- a/arch/riscv/include/asm/alternative-macros.h > +++ b/arch/riscv/include/asm/alternative-macros.h > @@ -158,4 +158,77 @@ > _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ > new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) > > +/* > + * use_alternative_{likely,unlikely}() returns true if the alternative is > + * applied and false otherwise, but in a way where the compiler can optimize > + * this check down to a nop instruction that's patched into a jump, or vice > + * versa. > + * > + * Always returns false if the alternatives mechanism is not available. > + * > + * Usage example: > + * if (use_alternative_likely(0, RISCV_ISA_ZBB)) > + * > + * Similar to static keys, "likely" means use a nop if the alternative is > + * selected, and jump if unselected; "unlikely" is the other way around. > + */ > + > +#ifndef __ASSEMBLER__ > + > +#include <linux/types.h> > + > +#ifdef CONFIG_RISCV_ALTERNATIVE > + > +static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id) > +{ > + BUILD_BUG_ON(!__builtin_constant_p(vendor_id)); > + BUILD_BUG_ON(!__builtin_constant_p(patch_id)); > + > + asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1) > + : > + : [vendor_id] "i"(vendor_id), > + [patch_id] "i"(patch_id) > + : > + : no_alt); > + > + return true; > + > +no_alt: > + return false; > +} Apart from those BUILD_BUG_ON()s, it looks similar to __riscv_has_extension_likely(). Can you make sure you don't duplicate it? If so, can you describe what's the difference between those two in the commit message? > +static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id) > +{ > + BUILD_BUG_ON(!__builtin_constant_p(vendor_id)); > + BUILD_BUG_ON(!__builtin_constant_p(patch_id)); > + > + asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1) > + : > + : [vendor_id] "i"(vendor_id), > + [patch_id] "i"(patch_id) > + : > + : alt); > + > + return false; > + > +alt: > + return true; > +} This 'unlikely' version is just an negation of 'likely' one, and it looks like an attempt to save on one negation. On the other hand, the function is __always_inline, which means that compiler should normally take care of it. Can you prove with objdump that it really works as intended? I mean that if (use_alternative_unlikely()) do_something(); generates a better code than if (!use_alternative_likely()) do_something(); Thanks, Yury
Hi Yury, Thanks for the review. On 8/20/25 22:56, Yury Norov wrote: > On Wed, Aug 20, 2025 at 09:44:45PM +0800, Vivian Wang wrote: >> Introduce convenience helpers use_alternative_likely() and >> use_alternative_unlikely() to implement the pattern of using asm goto to >> check if an alternative is selected. Existing code will be converted in >> subsequent patches. >> >> Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv, >> alternatives are not all CPU capabilities. >> >> Suggested-by: Aydın Mercan <aydin@mercan.dev> >> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn> >> --- >> arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++ >> 1 file changed, 73 insertions(+) >> >> diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h >> index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644 >> --- a/arch/riscv/include/asm/alternative-macros.h >> +++ b/arch/riscv/include/asm/alternative-macros.h >> @@ -158,4 +158,77 @@ >> _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ >> new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) >> >> +/* >> + * use_alternative_{likely,unlikely}() returns true if the alternative is >> + * applied and false otherwise, but in a way where the compiler can optimize >> + * this check down to a nop instruction that's patched into a jump, or vice >> + * versa. >> + * >> + * Always returns false if the alternatives mechanism is not available. >> + * >> + * Usage example: >> + * if (use_alternative_likely(0, RISCV_ISA_ZBB)) >> + * >> + * Similar to static keys, "likely" means use a nop if the alternative is >> + * selected, and jump if unselected; "unlikely" is the other way around. >> + */ >> + >> +#ifndef __ASSEMBLER__ >> + >> +#include <linux/types.h> >> + >> +#ifdef CONFIG_RISCV_ALTERNATIVE >> + >> +static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id) >> +{ >> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id)); >> + BUILD_BUG_ON(!__builtin_constant_p(patch_id)); >> + >> + asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1) >> + : >> + : [vendor_id] "i"(vendor_id), >> + [patch_id] "i"(patch_id) >> + : >> + : no_alt); >> + >> + return true; >> + >> +no_alt: >> + return false; >> +} > Apart from those BUILD_BUG_ON()s, it looks similar to > __riscv_has_extension_likely(). Can you make sure you don't duplicate > it? > > If so, can you describe what's the difference between those two in the > commit message? Whoops, *completely* missed that. Thanks for the catch. It turns out I was trying to find uses of this pattern by searching for "j<space>%l[...]". The block in __riscv_has_extension_{likely,unlikely} uses "j<tab>%l[...]". I'll just use __riscv_has_extension_{likely,unlikely} in v2 and drop this. >> +static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id) >> +{ >> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id)); >> + BUILD_BUG_ON(!__builtin_constant_p(patch_id)); >> + >> + asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1) >> + : >> + : [vendor_id] "i"(vendor_id), >> + [patch_id] "i"(patch_id) >> + : >> + : alt); >> + >> + return false; >> + >> +alt: >> + return true; >> +} > This 'unlikely' version is just an negation of 'likely' one, and it > looks like an attempt to save on one negation. On the other hand, the > function is __always_inline, which means that compiler should normally > take care of it. Can you prove with objdump that it really works as > intended? I mean that > > if (use_alternative_unlikely()) > do_something(); > > generates a better code than > > if (!use_alternative_likely()) > do_something(); use_alternative_likely() and use_alternative_unlikely() are not negations of each other and in fact should be functionally equivalent. I also briefly explained the difference in the comment, but the difference is which case is nop i.e. fallthrough, and which case requires a jump instruction. The likely case should get a "nop", and the unlikely case should get a "j %l[...]". This choice does work as intended [1]. I don't think it is possible to give both options to the compiler, so at least for now AIUI users have to pick one. The same applies to __riscv_has_extension_{likely,unlikely}. Vivian "dramforever" Wang [1]: https://godbolt.org/z/v8zTEhzTx
> > This 'unlikely' version is just an negation of 'likely' one, and it > > looks like an attempt to save on one negation. On the other hand, the > > function is __always_inline, which means that compiler should normally > > take care of it. Can you prove with objdump that it really works as > > intended? I mean that > > > > if (use_alternative_unlikely()) > > do_something(); > > > > generates a better code than > > > > if (!use_alternative_likely()) > > do_something(); > > use_alternative_likely() and use_alternative_unlikely() are not > negations of each other and in fact should be functionally equivalent. I > also briefly explained the difference in the comment, but the difference > is which case is nop i.e. fallthrough, and which case requires a jump > instruction. The likely case should get a "nop", and the unlikely case > should get a "j %l[...]". This choice does work as intended [1]. > > I don't think it is possible to give both options to the compiler, so at > least for now AIUI users have to pick one. > > The same applies to __riscv_has_extension_{likely,unlikely}. > > Vivian "dramforever" Wang > > [1]: https://godbolt.org/z/v8zTEhzTx I realize that likely and unlikely versions generate different code, I'm just not convinced that 1. it works in real kernel as intended, not only in the godbold; and 2. has any measurable impact. That's why I asked you to share objdump and possibly perf tests.
On 8/20/25 23:43, Yury Norov wrote: >>> This 'unlikely' version is just an negation of 'likely' one, and it >>> looks like an attempt to save on one negation. On the other hand, the >>> function is __always_inline, which means that compiler should normally >>> take care of it. Can you prove with objdump that it really works as >>> intended? I mean that >>> >>> if (use_alternative_unlikely()) >>> do_something(); >>> >>> generates a better code than >>> >>> if (!use_alternative_likely()) >>> do_something(); >> use_alternative_likely() and use_alternative_unlikely() are not >> negations of each other and in fact should be functionally equivalent. I >> also briefly explained the difference in the comment, but the difference >> is which case is nop i.e. fallthrough, and which case requires a jump >> instruction. The likely case should get a "nop", and the unlikely case >> should get a "j %l[...]". This choice does work as intended [1]. >> >> I don't think it is possible to give both options to the compiler, so at >> least for now AIUI users have to pick one. >> >> The same applies to __riscv_has_extension_{likely,unlikely}. >> >> Vivian "dramforever" Wang >> >> [1]: https://godbolt.org/z/v8zTEhzTx > I realize that likely and unlikely versions generate different code, > I'm just not convinced that > > 1. it works in real kernel as intended, not only in the godbold; and > 2. has any measurable impact. > > That's why I asked you to share objdump and possibly perf tests. Ah, that makes sense. I had considered my patch to only be refactoring, so I only sought to preserve the original logic rather than to achieve an optimization. I don't have concrete performance benchmark results, but since it is a mere refactoring, the performance should not be worse than what's already in v6.17-rc1. Having said that, I am also fairly certain that the selection works in a real kernel. I have put two objdump examples at the bottom of this message. Vivian "dramforever" Wang ------------------------------ I grabbed v6.17-rc1 with this series applied, and built a defconfig then mod2noconfig then DEBUG_INFO_DWARF5=y kernel. The compiler is riscv64-unknown-linux-gnu-gcc (GCC) 14.3.0. Then I looked for random uses of Zbb instructions. Here is an example in register_pidns_sysctls(), where it calls num_possible_cpus(), which uses hweight_long(), which can use a cpop instruction with Zbb extension, and falls back to __sw_hweight64() otherwise. Here's the code: pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max, ffffffff8004ee38: 892a mv s2,a0 ffffffff8004ee3a: 0444aa83 lw s5,68(s1) ffffffff8004ee3e: 9781aa03 lw s4,-1672(gp) # ffffffff81814258 <pid_max_max> return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); ffffffff8004ee42: 012ef517 auipc a0,0x12ef ffffffff8004ee46: e2653503 ld a0,-474(a0) # ffffffff8133dc68 <__cpu_possible_mask> ffffffff8004ee4a: 05c0006f j ffffffff8004eea6 <register_pidns_sysctls+0xc2> ^~~~~~~~~~~~~~~~~~~~~~~~ Jump to "unlikely" non-Zbb fallback "Has Zbb" is "likely" here, and in that case this jump gets patched into a nop and falls through to the cpop here: asm (".option push\n" ffffffff8004ee4e: 60251793 cpop a5,a0 ^~~~~~~~~~~~~ Zbb implementation ffffffff8004ee52: 00a7979b slliw a5,a5,0xa ffffffff8004ee56: 873e mv a4,a5 ffffffff8004ee58: 0157d363 bge a5,s5,ffffffff8004ee5e <register_pidns_sysctls+0x7a> ffffffff8004ee5c: 8756 mv a4,s5 ffffffff8004ee5e: 2701 sext.w a4,a4 ffffffff8004ee60: 87ba mv a5,a4 ffffffff8004ee62: 00ea5363 bge s4,a4,ffffffff8004ee68 <register_pidns_sysctls+0x84> ffffffff8004ee66: 87d2 mv a5,s4 ffffffff8004ee68: c0fc sw a5,68(s1) PIDS_PER_CPU_DEFAULT * num_possible_cpus())); ... Later comes the fallback code that calls __sw_hweight64() and jumps back: return __sw_hweight64(w); ffffffff8004eea6: 004f2097 auipc ra,0x4f2 ffffffff8004eeaa: 006080e7 jalr 6(ra) # ffffffff80540eac <__sw_hweight64> ffffffff8004eeae: 87aa mv a5,a0 ffffffff8004eeb0: b74d j ffffffff8004ee52 <register_pidns_sysctls+0x6e> ------------------------------ Here's another example ip_fast_csum() which has a Zbb implementation and a non-Zbb one. The asm goto line seems to have been preserved in debug information more nicely: static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id) { BUILD_BUG_ON(!__builtin_constant_p(vendor_id)); BUILD_BUG_ON(!__builtin_constant_p(patch_id)); asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1) ffffffff8000f952: 01e0006f j ffffffff8000f970 <ip_fast_csum+0x40> ^~~~~~~~~~~~~~~~~~~~~~~~ Jump to "unlikely" non-Zbb fallback rori %[csum], %[csum], 16 \n\ sub %[csum], %[fold_temp], %[csum] \n\ .option pop" : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); } else { asm(".option push \n\ ffffffff8000f956: 6207d713 rori a4,a5,0x20 ffffffff8000f95a: 97ba add a5,a5,a4 ffffffff8000f95c: 9381 srli a5,a5,0x20 ffffffff8000f95e: fff7c713 not a4,a5 ffffffff8000f962: 6107d79b roriw a5,a5,0x10 ffffffff8000f966: 40f707bb subw a5,a4,a5 ^~~~~~~~~~~~~~~~~~ This block is the Zbb implementation roriw %[csum], %[csum], 16 \n\ subw %[csum], %[fold_temp], %[csum] \n\ .option pop" : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); } return (__force __sum16)(csum >> 16); ffffffff8000f96a: 0107d51b srliw a0,a5,0x10 ffffffff8000f96e: a015 j ffffffff8000f992 <ip_fast_csum+0x62> ... and then it jumps further to more code. Then comes the non-Zbb implementation, which starts with a rotate operation as well but has to use three instructions for it * @word: value to rotate * @shift: bits to roll */ static inline __u64 ror64(__u64 word, unsigned int shift) { return (word >> (shift & 63)) | (word << ((-shift) & 63)); ffffffff8000f970: 0207d693 srli a3,a5,0x20 ffffffff8000f974: 02079713 slli a4,a5,0x20 ffffffff8000f978: 8f55 or a4,a4,a3 ... And the non-Zbb implementation goes on...
© 2016 - 2025 Red Hat, Inc.