Introduce convenience helpers use_alternative_likely() and
use_alternative_unlikely() to implement the pattern of using asm goto to
check if an alternative is selected. Existing code will be converted in
subsequent patches.
Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv,
alternatives are not all CPU capabilities.
Suggested-by: Aydın Mercan <aydin@mercan.dev>
Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
---
arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -158,4 +158,77 @@
_ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2)
+/*
+ * use_alternative_{likely,unlikely}() returns true if the alternative is
+ * applied and false otherwise, but in a way where the compiler can optimize
+ * this check down to a nop instruction that's patched into a jump, or vice
+ * versa.
+ *
+ * Always returns false if the alternatives mechanism is not available.
+ *
+ * Usage example:
+ * if (use_alternative_likely(0, RISCV_ISA_ZBB))
+ *
+ * Similar to static keys, "likely" means use a nop if the alternative is
+ * selected, and jump if unselected; "unlikely" is the other way around.
+ */
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_ALTERNATIVE
+
+static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
+ BUILD_BUG_ON(!__builtin_constant_p(patch_id));
+
+ asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1)
+ :
+ : [vendor_id] "i"(vendor_id),
+ [patch_id] "i"(patch_id)
+ :
+ : no_alt);
+
+ return true;
+
+no_alt:
+ return false;
+}
+
+static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
+ BUILD_BUG_ON(!__builtin_constant_p(patch_id));
+
+ asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1)
+ :
+ : [vendor_id] "i"(vendor_id),
+ [patch_id] "i"(patch_id)
+ :
+ : alt);
+
+ return false;
+
+alt:
+ return true;
+}
+
+#else
+
+static inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
+{
+ return false;
+}
+
+static inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
+{
+ return false;
+}
+
+#endif /* CONFIG_RISCV_ALTERNATIVE */
+
+#endif /* __ASSEMBLER__ */
+
#endif
--
2.50.1
On Wed, Aug 20, 2025 at 09:44:45PM +0800, Vivian Wang wrote:
> Introduce convenience helpers use_alternative_likely() and
> use_alternative_unlikely() to implement the pattern of using asm goto to
> check if an alternative is selected. Existing code will be converted in
> subsequent patches.
>
> Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv,
> alternatives are not all CPU capabilities.
>
> Suggested-by: Aydın Mercan <aydin@mercan.dev>
> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
> ---
> arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++
> 1 file changed, 73 insertions(+)
>
> diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
> index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644
> --- a/arch/riscv/include/asm/alternative-macros.h
> +++ b/arch/riscv/include/asm/alternative-macros.h
> @@ -158,4 +158,77 @@
> _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
> new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2)
>
> +/*
> + * use_alternative_{likely,unlikely}() returns true if the alternative is
> + * applied and false otherwise, but in a way where the compiler can optimize
> + * this check down to a nop instruction that's patched into a jump, or vice
> + * versa.
> + *
> + * Always returns false if the alternatives mechanism is not available.
> + *
> + * Usage example:
> + * if (use_alternative_likely(0, RISCV_ISA_ZBB))
> + *
> + * Similar to static keys, "likely" means use a nop if the alternative is
> + * selected, and jump if unselected; "unlikely" is the other way around.
> + */
> +
> +#ifndef __ASSEMBLER__
> +
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_RISCV_ALTERNATIVE
> +
> +static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
> +{
> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
> + BUILD_BUG_ON(!__builtin_constant_p(patch_id));
> +
> + asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1)
> + :
> + : [vendor_id] "i"(vendor_id),
> + [patch_id] "i"(patch_id)
> + :
> + : no_alt);
> +
> + return true;
> +
> +no_alt:
> + return false;
> +}
Apart from those BUILD_BUG_ON()s, it looks similar to
__riscv_has_extension_likely(). Can you make sure you don't duplicate
it?
If so, can you describe what's the difference between those two in the
commit message?
> +static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
> +{
> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
> + BUILD_BUG_ON(!__builtin_constant_p(patch_id));
> +
> + asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1)
> + :
> + : [vendor_id] "i"(vendor_id),
> + [patch_id] "i"(patch_id)
> + :
> + : alt);
> +
> + return false;
> +
> +alt:
> + return true;
> +}
This 'unlikely' version is just an negation of 'likely' one, and it
looks like an attempt to save on one negation. On the other hand, the
function is __always_inline, which means that compiler should normally
take care of it. Can you prove with objdump that it really works as
intended? I mean that
if (use_alternative_unlikely())
do_something();
generates a better code than
if (!use_alternative_likely())
do_something();
Thanks,
Yury
Hi Yury,
Thanks for the review.
On 8/20/25 22:56, Yury Norov wrote:
> On Wed, Aug 20, 2025 at 09:44:45PM +0800, Vivian Wang wrote:
>> Introduce convenience helpers use_alternative_likely() and
>> use_alternative_unlikely() to implement the pattern of using asm goto to
>> check if an alternative is selected. Existing code will be converted in
>> subsequent patches.
>>
>> Similar to arm64 alternative_has_cap_{likely,unlikely}, but for riscv,
>> alternatives are not all CPU capabilities.
>>
>> Suggested-by: Aydın Mercan <aydin@mercan.dev>
>> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
>> ---
>> arch/riscv/include/asm/alternative-macros.h | 73 +++++++++++++++++++++++++++++
>> 1 file changed, 73 insertions(+)
>>
>> diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
>> index 231d777d936c2d29c858decaa9a3fa5f172efbb8..be9835b5e4eba03d76db3a73da19ac9e2981c4db 100644
>> --- a/arch/riscv/include/asm/alternative-macros.h
>> +++ b/arch/riscv/include/asm/alternative-macros.h
>> @@ -158,4 +158,77 @@
>> _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
>> new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2)
>>
>> +/*
>> + * use_alternative_{likely,unlikely}() returns true if the alternative is
>> + * applied and false otherwise, but in a way where the compiler can optimize
>> + * this check down to a nop instruction that's patched into a jump, or vice
>> + * versa.
>> + *
>> + * Always returns false if the alternatives mechanism is not available.
>> + *
>> + * Usage example:
>> + * if (use_alternative_likely(0, RISCV_ISA_ZBB))
>> + *
>> + * Similar to static keys, "likely" means use a nop if the alternative is
>> + * selected, and jump if unselected; "unlikely" is the other way around.
>> + */
>> +
>> +#ifndef __ASSEMBLER__
>> +
>> +#include <linux/types.h>
>> +
>> +#ifdef CONFIG_RISCV_ALTERNATIVE
>> +
>> +static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
>> +{
>> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
>> + BUILD_BUG_ON(!__builtin_constant_p(patch_id));
>> +
>> + asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1)
>> + :
>> + : [vendor_id] "i"(vendor_id),
>> + [patch_id] "i"(patch_id)
>> + :
>> + : no_alt);
>> +
>> + return true;
>> +
>> +no_alt:
>> + return false;
>> +}
> Apart from those BUILD_BUG_ON()s, it looks similar to
> __riscv_has_extension_likely(). Can you make sure you don't duplicate
> it?
>
> If so, can you describe what's the difference between those two in the
> commit message?
Whoops, *completely* missed that. Thanks for the catch.
It turns out I was trying to find uses of this pattern by searching for
"j<space>%l[...]". The block in __riscv_has_extension_{likely,unlikely}
uses "j<tab>%l[...]".
I'll just use __riscv_has_extension_{likely,unlikely} in v2 and drop this.
>> +static __always_inline bool use_alternative_unlikely(u16 vendor_id, u32 patch_id)
>> +{
>> + BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
>> + BUILD_BUG_ON(!__builtin_constant_p(patch_id));
>> +
>> + asm goto(ALTERNATIVE("nop", "j %l[alt]", %[vendor_id], %[patch_id], 1)
>> + :
>> + : [vendor_id] "i"(vendor_id),
>> + [patch_id] "i"(patch_id)
>> + :
>> + : alt);
>> +
>> + return false;
>> +
>> +alt:
>> + return true;
>> +}
> This 'unlikely' version is just an negation of 'likely' one, and it
> looks like an attempt to save on one negation. On the other hand, the
> function is __always_inline, which means that compiler should normally
> take care of it. Can you prove with objdump that it really works as
> intended? I mean that
>
> if (use_alternative_unlikely())
> do_something();
>
> generates a better code than
>
> if (!use_alternative_likely())
> do_something();
use_alternative_likely() and use_alternative_unlikely() are not
negations of each other and in fact should be functionally equivalent. I
also briefly explained the difference in the comment, but the difference
is which case is nop i.e. fallthrough, and which case requires a jump
instruction. The likely case should get a "nop", and the unlikely case
should get a "j %l[...]". This choice does work as intended [1].
I don't think it is possible to give both options to the compiler, so at
least for now AIUI users have to pick one.
The same applies to __riscv_has_extension_{likely,unlikely}.
Vivian "dramforever" Wang
[1]: https://godbolt.org/z/v8zTEhzTx
> > This 'unlikely' version is just an negation of 'likely' one, and it
> > looks like an attempt to save on one negation. On the other hand, the
> > function is __always_inline, which means that compiler should normally
> > take care of it. Can you prove with objdump that it really works as
> > intended? I mean that
> >
> > if (use_alternative_unlikely())
> > do_something();
> >
> > generates a better code than
> >
> > if (!use_alternative_likely())
> > do_something();
>
> use_alternative_likely() and use_alternative_unlikely() are not
> negations of each other and in fact should be functionally equivalent. I
> also briefly explained the difference in the comment, but the difference
> is which case is nop i.e. fallthrough, and which case requires a jump
> instruction. The likely case should get a "nop", and the unlikely case
> should get a "j %l[...]". This choice does work as intended [1].
>
> I don't think it is possible to give both options to the compiler, so at
> least for now AIUI users have to pick one.
>
> The same applies to __riscv_has_extension_{likely,unlikely}.
>
> Vivian "dramforever" Wang
>
> [1]: https://godbolt.org/z/v8zTEhzTx
I realize that likely and unlikely versions generate different code,
I'm just not convinced that
1. it works in real kernel as intended, not only in the godbold; and
2. has any measurable impact.
That's why I asked you to share objdump and possibly perf tests.
On 8/20/25 23:43, Yury Norov wrote:
>>> This 'unlikely' version is just an negation of 'likely' one, and it
>>> looks like an attempt to save on one negation. On the other hand, the
>>> function is __always_inline, which means that compiler should normally
>>> take care of it. Can you prove with objdump that it really works as
>>> intended? I mean that
>>>
>>> if (use_alternative_unlikely())
>>> do_something();
>>>
>>> generates a better code than
>>>
>>> if (!use_alternative_likely())
>>> do_something();
>> use_alternative_likely() and use_alternative_unlikely() are not
>> negations of each other and in fact should be functionally equivalent. I
>> also briefly explained the difference in the comment, but the difference
>> is which case is nop i.e. fallthrough, and which case requires a jump
>> instruction. The likely case should get a "nop", and the unlikely case
>> should get a "j %l[...]". This choice does work as intended [1].
>>
>> I don't think it is possible to give both options to the compiler, so at
>> least for now AIUI users have to pick one.
>>
>> The same applies to __riscv_has_extension_{likely,unlikely}.
>>
>> Vivian "dramforever" Wang
>>
>> [1]: https://godbolt.org/z/v8zTEhzTx
> I realize that likely and unlikely versions generate different code,
> I'm just not convinced that
>
> 1. it works in real kernel as intended, not only in the godbold; and
> 2. has any measurable impact.
>
> That's why I asked you to share objdump and possibly perf tests.
Ah, that makes sense. I had considered my patch to only be refactoring,
so I only sought to preserve the original logic rather than to achieve
an optimization.
I don't have concrete performance benchmark results, but since it is a
mere refactoring, the performance should not be worse than what's
already in v6.17-rc1.
Having said that, I am also fairly certain that the selection works in a
real kernel. I have put two objdump examples at the bottom of this message.
Vivian "dramforever" Wang
------------------------------
I grabbed v6.17-rc1 with this series applied, and built a defconfig then
mod2noconfig then DEBUG_INFO_DWARF5=y kernel. The compiler is
riscv64-unknown-linux-gnu-gcc (GCC) 14.3.0. Then I looked for random
uses of Zbb instructions.
Here is an example in register_pidns_sysctls(), where it
calls num_possible_cpus(), which uses hweight_long(), which can use a
cpop instruction with Zbb extension, and falls back to __sw_hweight64()
otherwise. Here's the code:
pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max,
ffffffff8004ee38: 892a mv s2,a0
ffffffff8004ee3a: 0444aa83 lw s5,68(s1)
ffffffff8004ee3e: 9781aa03 lw s4,-1672(gp) # ffffffff81814258 <pid_max_max>
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
ffffffff8004ee42: 012ef517 auipc a0,0x12ef
ffffffff8004ee46: e2653503 ld a0,-474(a0) # ffffffff8133dc68 <__cpu_possible_mask>
ffffffff8004ee4a: 05c0006f j ffffffff8004eea6 <register_pidns_sysctls+0xc2>
^~~~~~~~~~~~~~~~~~~~~~~~ Jump to "unlikely" non-Zbb fallback
"Has Zbb" is "likely" here, and in that case this jump gets patched into
a nop and falls through to the cpop here:
asm (".option push\n"
ffffffff8004ee4e: 60251793 cpop a5,a0
^~~~~~~~~~~~~ Zbb implementation
ffffffff8004ee52: 00a7979b slliw a5,a5,0xa
ffffffff8004ee56: 873e mv a4,a5
ffffffff8004ee58: 0157d363 bge a5,s5,ffffffff8004ee5e <register_pidns_sysctls+0x7a>
ffffffff8004ee5c: 8756 mv a4,s5
ffffffff8004ee5e: 2701 sext.w a4,a4
ffffffff8004ee60: 87ba mv a5,a4
ffffffff8004ee62: 00ea5363 bge s4,a4,ffffffff8004ee68 <register_pidns_sysctls+0x84>
ffffffff8004ee66: 87d2 mv a5,s4
ffffffff8004ee68: c0fc sw a5,68(s1)
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
...
Later comes the fallback code that calls __sw_hweight64() and jumps back:
return __sw_hweight64(w);
ffffffff8004eea6: 004f2097 auipc ra,0x4f2
ffffffff8004eeaa: 006080e7 jalr 6(ra) # ffffffff80540eac <__sw_hweight64>
ffffffff8004eeae: 87aa mv a5,a0
ffffffff8004eeb0: b74d j ffffffff8004ee52 <register_pidns_sysctls+0x6e>
------------------------------
Here's another example ip_fast_csum() which has a Zbb implementation and
a non-Zbb one. The asm goto line seems to have been preserved in debug
information more nicely:
static __always_inline bool use_alternative_likely(u16 vendor_id, u32 patch_id)
{
BUILD_BUG_ON(!__builtin_constant_p(vendor_id));
BUILD_BUG_ON(!__builtin_constant_p(patch_id));
asm goto(ALTERNATIVE("j %l[no_alt]", "nop", %[vendor_id], %[patch_id], 1)
ffffffff8000f952: 01e0006f j ffffffff8000f970 <ip_fast_csum+0x40>
^~~~~~~~~~~~~~~~~~~~~~~~ Jump to "unlikely" non-Zbb fallback
rori %[csum], %[csum], 16 \n\
sub %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
} else {
asm(".option push \n\
ffffffff8000f956: 6207d713 rori a4,a5,0x20
ffffffff8000f95a: 97ba add a5,a5,a4
ffffffff8000f95c: 9381 srli a5,a5,0x20
ffffffff8000f95e: fff7c713 not a4,a5
ffffffff8000f962: 6107d79b roriw a5,a5,0x10
ffffffff8000f966: 40f707bb subw a5,a4,a5
^~~~~~~~~~~~~~~~~~ This block is the Zbb implementation
roriw %[csum], %[csum], 16 \n\
subw %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
}
return (__force __sum16)(csum >> 16);
ffffffff8000f96a: 0107d51b srliw a0,a5,0x10
ffffffff8000f96e: a015 j ffffffff8000f992 <ip_fast_csum+0x62>
... and then it jumps further to more code. Then comes the non-Zbb
implementation, which starts with a rotate operation as well but has to
use three instructions for it
* @word: value to rotate
* @shift: bits to roll
*/
static inline __u64 ror64(__u64 word, unsigned int shift)
{
return (word >> (shift & 63)) | (word << ((-shift) & 63));
ffffffff8000f970: 0207d693 srli a3,a5,0x20
ffffffff8000f974: 02079713 slli a4,a5,0x20
ffffffff8000f978: 8f55 or a4,a4,a3
...
And the non-Zbb implementation goes on...
© 2016 - 2026 Red Hat, Inc.