[PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely

Vivian Wang posted 5 patches 1 month, 1 week ago
[PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Vivian Wang 1 month, 1 week ago
Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB,
replacing the use of asm goto with ALTERNATIVE.

The "likely" variant is used to match the behavior of the original
implementation using ALTERNATIVE("j %l[legacy]", "nop", ...).

Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
---
 arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -47,9 +47,8 @@
 
 static __always_inline unsigned long variable__ffs(unsigned long word)
 {
-	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-				      RISCV_ISA_EXT_ZBB, 1)
-			  : : : : legacy);
+	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+		return generic___ffs(word);
 
 	asm volatile (".option push\n"
 		      ".option arch,+zbb\n"
@@ -58,9 +57,6 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
 		      : "=r" (word) : "r" (word) :);
 
 	return word;
-
-legacy:
-	return generic___ffs(word);
 }
 
 /**
@@ -76,9 +72,8 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
 
 static __always_inline unsigned long variable__fls(unsigned long word)
 {
-	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-				      RISCV_ISA_EXT_ZBB, 1)
-			  : : : : legacy);
+	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+		return generic___fls(word);
 
 	asm volatile (".option push\n"
 		      ".option arch,+zbb\n"
@@ -87,9 +82,6 @@ static __always_inline unsigned long variable__fls(unsigned long word)
 		      : "=r" (word) : "r" (word) :);
 
 	return BITS_PER_LONG - 1 - word;
-
-legacy:
-	return generic___fls(word);
 }
 
 /**
@@ -105,9 +97,8 @@ static __always_inline unsigned long variable__fls(unsigned long word)
 
 static __always_inline int variable_ffs(int x)
 {
-	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-				      RISCV_ISA_EXT_ZBB, 1)
-			  : : : : legacy);
+	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+		return generic_ffs(x);
 
 	if (!x)
 		return 0;
@@ -119,9 +110,6 @@ static __always_inline int variable_ffs(int x)
 		      : "=r" (x) : "r" (x) :);
 
 	return x + 1;
-
-legacy:
-	return generic_ffs(x);
 }
 
 /**
@@ -137,9 +125,8 @@ static __always_inline int variable_ffs(int x)
 
 static __always_inline int variable_fls(unsigned int x)
 {
-	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-				      RISCV_ISA_EXT_ZBB, 1)
-			  : : : : legacy);
+	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
+		return generic_fls(x);
 
 	if (!x)
 		return 0;
@@ -151,9 +138,6 @@ static __always_inline int variable_fls(unsigned int x)
 		      : "=r" (x) : "r" (x) :);
 
 	return 32 - x;
-
-legacy:
-	return generic_fls(x);
 }
 
 /**

-- 
2.50.1
Re: [PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Yury Norov 1 month, 1 week ago
On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote:
> Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB,
> replacing the use of asm goto with ALTERNATIVE.
> 
> The "likely" variant is used to match the behavior of the original
> implementation using ALTERNATIVE("j %l[legacy]", "nop", ...).
> 
> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
> ---
>  arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------
>  1 file changed, 8 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -47,9 +47,8 @@
>  
>  static __always_inline unsigned long variable__ffs(unsigned long word)
>  {
> -	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
> -				      RISCV_ISA_EXT_ZBB, 1)
> -			  : : : : legacy);
> +	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
> +		return generic___ffs(word);

So, on the previous round you spent quite a lot of time explaining how
'unlikely()' version is handy over '!likely()', and now use just the
latter. I feel really lost about how the code generation should look.

Can you please share bloat-o-meter report against this patch? Can you
also show an example of code generation before and after? Have you
tried the 'unlikely()` one? How the output looks?

>  	asm volatile (".option push\n"
>  		      ".option arch,+zbb\n"

Yeah, now the diff is much cleaner. Thanks.
Re: [PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Vivian Wang 1 month, 1 week ago
On 8/21/25 22:44, Yury Norov wrote:

> On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote:
>> Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB,
>> replacing the use of asm goto with ALTERNATIVE.
>>
>> The "likely" variant is used to match the behavior of the original
>> implementation using ALTERNATIVE("j %l[legacy]", "nop", ...).
>>
>> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
>> ---
>>  arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------
>>  1 file changed, 8 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
>> index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644
>> --- a/arch/riscv/include/asm/bitops.h
>> +++ b/arch/riscv/include/asm/bitops.h
>> @@ -47,9 +47,8 @@
>>  
>>  static __always_inline unsigned long variable__ffs(unsigned long word)
>>  {
>> -	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
>> -				      RISCV_ISA_EXT_ZBB, 1)
>> -			  : : : : legacy);
>> +	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
>> +		return generic___ffs(word);
> So, on the previous round you spent quite a lot of time explaining how
> 'unlikely()' version is handy over '!likely()', and now use just the
> latter. I feel really lost about how the code generation should look.

It's not "handy". The operative part is "has_extension", and both
functions return true if the extension is available and false if not.
Functionally:

    if (likely()) <-- equivalent --> if (unlikely())
    if (!likely()) <-- equivalent --> if (!unlikely())

Whereas:

    if (likely()) <-- **opposite of** --> if (!unlikely())
    if (unlikely()) <-- **opposite of** --> if (!likely())

All the asm goto dispatch stuff work like this:
static_branch_{likely,unlikely}, (arm64)
alternative_has_cap_{likely,unlikely},
__riscv_has_extension_{likely,unlikely}. Maybe it's confusing, but it
seems to be the convention.

And, codegen-wise:

ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop",
"j %l[has_alt]", ...) -> unlikely()

Since the original code has the "likely" pattern, using "if (likely())"
preserves it. Whatever the codegen was, it's still the same.

> Can you please share bloat-o-meter report against this patch? Can you
> also show an example of code generation before and after? Have you
> tried the 'unlikely()` one? How the output looks?

Thanks for the tip on bloat-o-meter. I'll take a look tomorrow.

>>  	asm volatile (".option push\n"
>>  		      ".option arch,+zbb\n"
> Yeah, now the diff is much cleaner. Thanks.

This is why the condition at the top needed to be "!has_extension". So
the structure can be:

    if (!has_extension)
        return sw_version;

    multi_line
    zbb_version;

If I used "if (has_extension)" the code would need be

    if (has_extension) {
        multi_line
        zbb_version;
    } else {
        sw_version;
    }

And whether it was "likely" or "unlikely" had no bearing on the
structure of the code.

Vivian "dramforever" Wang

Re: [PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Vivian Wang 1 month, 1 week ago
On 8/22/25 01:46, Vivian Wang wrote:

> [...]
>> Can you please share bloat-o-meter report against this patch? Can you
>> also show an example of code generation before and after? Have you
>> tried the 'unlikely()` one? How the output looks?
> Thanks for the tip on bloat-o-meter. I'll take a look tomorrow.

That "tomorrow" took a while.

This is what it looks like, old being v6.17-rc1 and new being this patch
series.

It's not as identical as I had hoped originally, but I had went into
each plus and a few minuses and confirmed that the actual asm goto part
seems to have been recreated as expected. The rest of the differences
appear to be explainable by unpredictable factors in the compiler (GCC
14.3.0 in my case).

For example, bpf_lru_populate seems to have got worse register
allocation. It uses one more callee-saved register. Moreover, RISC-V
compressed instructions has shorter encodings when used with some
registers, so for example sd a1,32(s1) is encodable as 2 bytes, but sd
a1,32(s2) is only encodable as 4 bytes. This appears to explain the +16
in code size.

As far as I can tell, which is basically me staring at objdump and
seeing "yeah looks normal", all of these are caused by random factors
due to changes in how now we write the control structures:

add/remove: 0/0 grow/shrink: 14/24 up/down: 72/-234 (-162)
Function                                     old     new   delta
bpf_lru_populate                             450     466     +16
spi_nor_scan                                3506    3516     +10
wants_mount_setattr                          688     696      +8
regulator_irq_map_event_simple               202     208      +6
idling_boosts_thr_without_issues             198     204      +6
trie_lookup_elem                             704     708      +4
ethnl_set_tsconfig                          1694    1698      +4
dev_xdp_attach                              1142    1146      +4
add_mtd_device                              1468    1472      +4
xhci_count_num_new_endpoints.isra            104     106      +2
rtl_init_one                                4360    4362      +2
queued_read_lock_slowpath                    414     416      +2
osq_lock                                     262     264      +2
cpufreq_dbs_governor_start                   520     522      +2
thaw_super_locked                            622     620      -2
stop_machine_from_inactive_cpu               372     370      -2
objpool_init                                 962     960      -2
memweight                                    168     166      -2
irq_destroy_ipi                              248     246      -2
fat_fill_super                              3408    3406      -2
create_boot_cache                            292     290      -2
snd_soc_dapm_get_volsw                       588     584      -4
ip_rcv_core                                  770     766      -4
ip_mc_check_igmp                             736     732      -4
tmigr_quick_check                            224     218      -6
nvdimm_security_flags                        152     146      -6
inode_switch_wbs_work_fn                    1934    1928      -6
sd_uhs2_power_up                             176     168      -8
mmc_power_up.part                            402     394      -8
__alloc_bucket_spinlocks                     190     182      -8
__clk_hw_register_mux                        624     612     -12
bfq_bfqq_expire                              872     858     -14
perf_prepare_sample                         1810    1794     -16
wq_update_node_max_active                    308     288     -20
blk_mq_num_queues                             94      74     -20
register_pidns_sysctls                       248     226     -22
dw8250_setup_port                           1212    1182     -30
build_sched_domains                         4748    4716     -32
Total: Before=16029885, After=16029723, chg -0.00%

That's all I can figure out. I hope this is satisfactory, to anyone reading.

Vivian "dramforever" Wang

Re: [PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Yury Norov 1 month, 1 week ago
On Fri, Aug 22, 2025 at 01:46:19AM +0800, Vivian Wang wrote:
> On 8/21/25 22:44, Yury Norov wrote:
> 
> > On Thu, Aug 21, 2025 at 05:16:34PM +0800, Vivian Wang wrote:
> >> Use __riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB,
> >> replacing the use of asm goto with ALTERNATIVE.
> >>
> >> The "likely" variant is used to match the behavior of the original
> >> implementation using ALTERNATIVE("j %l[legacy]", "nop", ...).
> >>
> >> Signed-off-by: Vivian Wang <wangruikang@iscas.ac.cn>
> >> ---
> >>  arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------
> >>  1 file changed, 8 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> >> index d59310f74c2ba70caeb7b9b0e9221882117583f5..f70ccc0c2ffb86a6fda3bc373504143d0c6a1093 100644
> >> --- a/arch/riscv/include/asm/bitops.h
> >> +++ b/arch/riscv/include/asm/bitops.h
> >> @@ -47,9 +47,8 @@
> >>  
> >>  static __always_inline unsigned long variable__ffs(unsigned long word)
> >>  {
> >> -	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
> >> -				      RISCV_ISA_EXT_ZBB, 1)
> >> -			  : : : : legacy);
> >> +	if (!__riscv_has_extension_likely(0, RISCV_ISA_EXT_ZBB))
> >> +		return generic___ffs(word);
> > So, on the previous round you spent quite a lot of time explaining how
> > 'unlikely()' version is handy over '!likely()', and now use just the
> > latter. I feel really lost about how the code generation should look.
> 
> It's not "handy". The operative part is "has_extension", and both
> functions return true if the extension is available and false if not.
> Functionally:
> 
>     if (likely()) <-- equivalent --> if (unlikely())
>     if (!likely()) <-- equivalent --> if (!unlikely())
> 
> Whereas:
> 
>     if (likely()) <-- **opposite of** --> if (!unlikely())
>     if (unlikely()) <-- **opposite of** --> if (!likely())
> 
> All the asm goto dispatch stuff work like this:
> static_branch_{likely,unlikely}, (arm64)
> alternative_has_cap_{likely,unlikely},
> __riscv_has_extension_{likely,unlikely}. Maybe it's confusing, but it
> seems to be the convention.
> 
> And, codegen-wise:
> 
> ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop",
> "j %l[has_alt]", ...) -> unlikely()
> 
> Since the original code has the "likely" pattern, using "if (likely())"
> preserves it. Whatever the codegen was, it's still the same.
> 
> > Can you please share bloat-o-meter report against this patch? Can you
> > also show an example of code generation before and after? Have you
> > tried the 'unlikely()` one? How the output looks?
> 
> Thanks for the tip on bloat-o-meter. I'll take a look tomorrow.
> 
> >>  	asm volatile (".option push\n"
> >>  		      ".option arch,+zbb\n"
> > Yeah, now the diff is much cleaner. Thanks.
> 
> This is why the condition at the top needed to be "!has_extension". So
> the structure can be:
> 
>     if (!has_extension)
>         return sw_version;
> 
>     multi_line
>     zbb_version;
> 
> If I used "if (has_extension)" the code would need be
> 
>     if (has_extension) {
>         multi_line
>         zbb_version;
>     } else {
>         sw_version;
>     }
> 
> And whether it was "likely" or "unlikely" had no bearing on the
> structure of the code.

OK, I see. Sorry for confusion.

Acked-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Re: [PATCH v2 4/5] riscv: bitops: Use __riscv_has_extension_likely
Posted by Vivian Wang 1 month, 1 week ago
On 8/22/25 01:46, Vivian Wang wrote:

> [...]
>
> And, codegen-wise:
>
> ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely() ALTERNATIVE("nop",
> "j %l[has_alt]", ...) -> unlikely()
>
I messed up the formatting, should be:

    ALTERNATIVE("j %l[no_alt]", "nop", ...) -> likely()
    ALTERNATIVE("nop", "j %l[has_alt]", ...) -> unlikely()

Vivian "dramforever" Wang