Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB,
replacing the use of asm goto with ALTERNATIVE.
The "likely" variant is used to match the behavior of the original
implementation using ALTERNATIVE("j %l[legacy]", "nop", ...).
Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
---
arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------
1 file changed, 8 insertions(+), 24 deletions(-)
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -47,9 +47,8 @@
static __always_inline unsigned long variable__ffs(unsigned long word)
{
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBB, 1)
- : : : : legacy);
+ if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+ return generic___ffs(word);
asm volatile (".option push\n"
".option arch,+zbb\n"
@@ -58,9 +57,6 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
: "=r" (word) : "r" (word) :);
return word;
-
-legacy:
- return generic___ffs(word);
}
/**
@@ -76,9 +72,8 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
static __always_inline unsigned long variable__fls(unsigned long word)
{
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBB, 1)
- : : : : legacy);
+ if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+ return generic___fls(word);
asm volatile (".option push\n"
".option arch,+zbb\n"
@@ -87,9 +82,6 @@ static __always_inline unsigned long variable__fls(unsigned long word)
: "=r" (word) : "r" (word) :);
return BITS_PER_LONG - 1 - word;
-
-legacy:
- return generic___fls(word);
}
/**
@@ -105,9 +97,8 @@ static __always_inline unsigned long variable__fls(unsigned long word)
static __always_inline int variable_ffs(int x)
{
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBB, 1)
- : : : : legacy);
+ if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+ return generic_ffs(x);
if (!x)
return 0;
@@ -119,9 +110,6 @@ static __always_inline int variable_ffs(int x)
: "=r" (x) : "r" (x) :);
return x + 1;
-
-legacy:
- return generic_ffs(x);
}
/**
@@ -137,9 +125,8 @@ static __always_inline int variable_ffs(int x)
static __always_inline int variable_fls(unsigned int x)
{
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBB, 1)
- : : : : legacy);
+ if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+ return generic_fls(x);
if (!x)
return 0;
@@ -151,9 +138,6 @@ static __always_inline int variable_fls(unsigned int x)
: "=r" (x) : "r" (x) :);
return 32 - x;
-
-legacy:
- return generic_fls(x);
}
/**
--
2.50.1
On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote: > Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, > replacing the use of asm goto with ALTERNATIVE. > > The "likely" variant is used to match the behavior of the original > implementation using ALTERNATIVE("j %l[legacy]", "nop", ...). > > Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn> > --- > arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------ > 1 file changed, 8 insertions(+), 24 deletions(-) > > diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h > index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644 > --- a/arch/riscv/include/asm/bitops.h > +++ b/arch/riscv/include/asm/bitops.h > @@ -47,9 +47,8 @@ > > static __always_inline unsigned long variable__ffs(unsigned long word) > { > - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, > - RISCV_ISA_EXT_ZBB, 1) > - : : : : legacy); > + if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB)) > + return generic___ffs(word); So, on the previous round you spent quite a lot of time explaining how 'unlikely()' version is handy over '!likely()', and now use just the latter. I feel really lost about how the code generation should look. Can you please share bloat-o-meter report against this patch? Can you also show an example of code generation before and after? Have you tried the 'unlikely()` one? How the output looks? > asm volatile (".option push\n" > ".option arch,+zbb\n" Yeah, now the diff is much cleaner. Thanks.
On 8/21/25 22:44, Yury Norov wrote: > On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote: >> Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, >> replacing the use of asm goto with ALTERNATIVE. >> >> The "likely" variant is used to match the behavior of the original >> implementation using ALTERNATIVE("j %l[legacy]", "nop", ...). >> >> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn> >> --- >> arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------ >> 1 file changed, 8 insertions(+), 24 deletions(-) >> >> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h >> index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644 >> --- a/arch/riscv/include/asm/bitops.h >> +++ b/arch/riscv/include/asm/bitops.h >> @@ -47,9 +47,8 @@ >> >> static __always_inline unsigned long variable__ffs(unsigned long word) >> { >> - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, >> - RISCV_ISA_EXT_ZBB, 1) >> - : : : : legacy); >> + if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB)) >> + return generic___ffs(word); > So, on the previous round you spent quite a lot of time explaining how > 'unlikely()' version is handy over '!likely()', and now use just the > latter. I feel really lost about how the code generation should look. It's not "handy". The operative part is "has_extension", and both functions return true if the extension is available and false if not. Functionally: if (likely()) <-- equivalent --> if (unlikely()) if (!likely()) <-- equivalent --> if (!unlikely()) Whereas: if (likely()) <-- **opposite of** --> if (!unlikely()) if (unlikely()) <-- **opposite of** --> if (!likely()) All the asm goto dispatch stuff work like this: static_branch_{likely,unlikely}, (arm64) alternative_has_cap_{likely,unlikely}, __riscv_has_extension_{likely,unlikely}. Maybe it's confusing, but it seems to be the convention. And, codegen-wise: ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop", "j %l[has_alt]", ...) -> unlikely() Since the original code has the "likely" pattern, using "if (likely())" preserves it. Whatever the codegen was, it's still the same. > Can you please share bloat-o-meter report against this patch? Can you > also show an example of code generation before and after? Have you > tried the 'unlikely()` one? How the output looks? Thanks for the tip on bloat-o-meter. I'll take a look tomorrow. >> asm volatile (".option push\n" >> ".option arch,+zbb\n" > Yeah, now the diff is much cleaner. Thanks. This is why the condition at the top needed to be "!has_extension". So the structure can be: if (!has_extension) return sw_version; multi_line zbb_version; If I used "if (has_extension)" the code would need be if (has_extension) { multi_line zbb_version; } else { sw_version; } And whether it was "likely" or "unlikely" had no bearing on the structure of the code. Vivian "dramforever" Wang
On 8/22/25 01:46, Vivian Wang wrote: > [...] >> Can you please share bloat-o-meter report against this patch? Can you >> also show an example of code generation before and after? Have you >> tried the 'unlikely()` one? How the output looks? > Thanks for the tip on bloat-o-meter. I'll take a look tomorrow. That "tomorrow" took a while. This is what it looks like, old being v6.17-rc1 and new being this patch series. It's not as identical as I had hoped originally, but I had went into each plus and a few minuses and confirmed that the actual asm goto part seems to have been recreated as expected. The rest of the differences appear to be explainable by unpredictable factors in the compiler (GCC 14.3.0 in my case). For example, bpf_lru_populate seems to have got worse register allocation. It uses one more callee-saved register. Moreover, RISC-V compressed instructions has shorter encodings when used with some registers, so for example sd a1,32(s1) is encodable as 2 bytes, but sd a1,32(s2) is only encodable as 4 bytes. This appears to explain the +16 in code size. As far as I can tell, which is basically me staring at objdump and seeing "yeah looks normal", all of these are caused by random factors due to changes in how now we write the control structures: add/remove: 0/0 grow/shrink: 14/24 up/down: 72/-234 (-162) Function old new delta bpf_lru_populate 450 466 +16 spi_nor_scan 3506 3516 +10 wants_mount_setattr 688 696 +8 regulator_irq_map_event_simple 202 208 +6 idling_boosts_thr_without_issues 198 204 +6 trie_lookup_elem 704 708 +4 ethnl_set_tsconfig 1694 1698 +4 dev_xdp_attach 1142 1146 +4 add_mtd_device 1468 1472 +4 xhci_count_num_new_endpoints.isra 104 106 +2 rtl_init_one 4360 4362 +2 queued_read_lock_slowpath 414 416 +2 osq_lock 262 264 +2 cpufreq_dbs_governor_start 520 522 +2 thaw_super_locked 622 620 -2 stop_machine_from_inactive_cpu 372 370 -2 objpool_init 962 960 -2 memweight 168 166 -2 irq_destroy_ipi 248 246 -2 fat_fill_super 3408 3406 -2 create_boot_cache 292 290 -2 snd_soc_dapm_get_volsw 588 584 -4 ip_rcv_core 770 766 -4 ip_mc_check_igmp 736 732 -4 tmigr_quick_check 224 218 -6 nvdimm_security_flags 152 146 -6 inode_switch_wbs_work_fn 1934 1928 -6 sd_uhs2_power_up 176 168 -8 mmc_power_up.part 402 394 -8 __alloc_bucket_spinlocks 190 182 -8 __clk_hw_register_mux 624 612 -12 bfq_bfqq_expire 872 858 -14 perf_prepare_sample 1810 1794 -16 wq_update_node_max_active 308 288 -20 blk_mq_num_queues 94 74 -20 register_pidns_sysctls 248 226 -22 dw8250_setup_port 1212 1182 -30 build_sched_domains 4748 4716 -32 Total: Before=16029885, After=16029723, chg -0.00% That's all I can figure out. I hope this is satisfactory, to anyone reading. Vivian "dramforever" Wang
On Fri, Aug 22, 2025 at 01:46:19AM +0800, Vivian Wang wrote: > On 8/21/25 22:44, Yury Norov wrote: > > > On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote: > >> Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, > >> replacing the use of asm goto with ALTERNATIVE. > >> > >> The "likely" variant is used to match the behavior of the original > >> implementation using ALTERNATIVE("j %l[legacy]", "nop", ...). > >> > >> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn> > >> --- > >> arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------ > >> 1 file changed, 8 insertions(+), 24 deletions(-) > >> > >> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h > >> index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644 > >> --- a/arch/riscv/include/asm/bitops.h > >> +++ b/arch/riscv/include/asm/bitops.h > >> @@ -47,9 +47,8 @@ > >> > >> static __always_inline unsigned long variable__ffs(unsigned long word) > >> { > >> - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, > >> - RISCV_ISA_EXT_ZBB, 1) > >> - : : : : legacy); > >> + if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB)) > >> + return generic___ffs(word); > > So, on the previous round you spent quite a lot of time explaining how > > 'unlikely()' version is handy over '!likely()', and now use just the > > latter. I feel really lost about how the code generation should look. > > It's not "handy". The operative part is "has_extension", and both > functions return true if the extension is available and false if not. > Functionally: > > if (likely()) <-- equivalent --> if (unlikely()) > if (!likely()) <-- equivalent --> if (!unlikely()) > > Whereas: > > if (likely()) <-- **opposite of** --> if (!unlikely()) > if (unlikely()) <-- **opposite of** --> if (!likely()) > > All the asm goto dispatch stuff work like this: > static_branch_{likely,unlikely}, (arm64) > alternative_has_cap_{likely,unlikely}, > __riscv_has_extension_{likely,unlikely}. Maybe it's confusing, but it > seems to be the convention. > > And, codegen-wise: > > ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop", > "j %l[has_alt]", ...) -> unlikely() > > Since the original code has the "likely" pattern, using "if (likely())" > preserves it. Whatever the codegen was, it's still the same. > > > Can you please share bloat-o-meter report against this patch? Can you > > also show an example of code generation before and after? Have you > > tried the 'unlikely()` one? How the output looks? > > Thanks for the tip on bloat-o-meter. I'll take a look tomorrow. > > >> asm volatile (".option push\n" > >> ".option arch,+zbb\n" > > Yeah, now the diff is much cleaner. Thanks. > > This is why the condition at the top needed to be "!has_extension". So > the structure can be: > > if (!has_extension) > return sw_version; > > multi_line > zbb_version; > > If I used "if (has_extension)" the code would need be > > if (has_extension) { > multi_line > zbb_version; > } else { > sw_version; > } > > And whether it was "likely" or "unlikely" had no bearing on the > structure of the code. OK, I see. Sorry for confusion. Acked-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
On 8/22/25 01:46, Vivian Wang wrote: > [...] > > And, codegen-wise: > > ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop", > "j %l[has_alt]", ...) -> unlikely() > I messed up the formatting, should be: ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop", "j %l[has_alt]", ...) -> unlikely() Vivian "dramforever" Wang
© 2016 - 2025 Red Hat, Inc.