[PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments

Alexandre Ghiti posted 1 patch 1 year, 10 months ago
arch/riscv/include/asm/sbi.h | 10 ++++++----
arch/riscv/kernel/sbi.c      | 10 +++++-----
2 files changed, 11 insertions(+), 9 deletions(-)
[PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments
Posted by Alexandre Ghiti 1 year, 10 months ago
The sbi_ecall() function arguments are not in the same order as the
ecall arguments, so we end up re-ordering the registers before the
ecall which is useless and costly.

So simply reorder the arguments in the same way as expected by ecall.
Instead of reordering directly the arguments of sbi_ecall(), use a proxy
macro since the current ordering is more natural.

Before:

Dump of assembler code for function sbi_ecall:
   0xffffffff800085e0 <+0>: add sp,sp,-32
   0xffffffff800085e2 <+2>: sd s0,24(sp)
   0xffffffff800085e4 <+4>: mv t1,a0
   0xffffffff800085e6 <+6>: add s0,sp,32
   0xffffffff800085e8 <+8>: mv t3,a1
   0xffffffff800085ea <+10>: mv a0,a2
   0xffffffff800085ec <+12>: mv a1,a3
   0xffffffff800085ee <+14>: mv a2,a4
   0xffffffff800085f0 <+16>: mv a3,a5
   0xffffffff800085f2 <+18>: mv a4,a6
   0xffffffff800085f4 <+20>: mv a5,a7
   0xffffffff800085f6 <+22>: mv a6,t3
   0xffffffff800085f8 <+24>: mv a7,t1
   0xffffffff800085fa <+26>: ecall
   0xffffffff800085fe <+30>: ld s0,24(sp)
   0xffffffff80008600 <+32>: add sp,sp,32
   0xffffffff80008602 <+34>: ret

After:

Dump of assembler code for function __sbi_ecall:
   0xffffffff8000b6b2 <+0>:	add	sp,sp,-32
   0xffffffff8000b6b4 <+2>:	sd	s0,24(sp)
   0xffffffff8000b6b6 <+4>:	add	s0,sp,32
   0xffffffff8000b6b8 <+6>:	ecall
   0xffffffff8000b6bc <+10>:	ld	s0,24(sp)
   0xffffffff8000b6be <+12>:	add	sp,sp,32
   0xffffffff8000b6c0 <+14>:	ret

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/include/asm/sbi.h | 10 ++++++----
 arch/riscv/kernel/sbi.c      | 10 +++++-----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 6e68f8dff76b..9041b009d3b5 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -292,10 +292,12 @@ struct sbiret {
 };
 
 void sbi_init(void);
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
-			unsigned long arg1, unsigned long arg2,
-			unsigned long arg3, unsigned long arg4,
-			unsigned long arg5);
+struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
+			  unsigned long arg2, unsigned long arg3,
+			  unsigned long arg4, unsigned long arg5,
+			  int fid, int ext);
+#define sbi_ecall(e, f, a0, a1, a2, a3, a4, a5)	\
+		__sbi_ecall(a0, a1, a2, a3, a4, a5, f, e)
 
 #ifdef CONFIG_RISCV_SBI_V01
 void sbi_console_putchar(int ch);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index e66e0999a800..5719fa03c3d1 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -24,10 +24,10 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
 			   unsigned long start, unsigned long size,
 			   unsigned long arg4, unsigned long arg5) __ro_after_init;
 
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
-			unsigned long arg1, unsigned long arg2,
-			unsigned long arg3, unsigned long arg4,
-			unsigned long arg5)
+struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
+			  unsigned long arg2, unsigned long arg3,
+			  unsigned long arg4, unsigned long arg5,
+			  int fid, int ext)
 {
 	struct sbiret ret;
 
@@ -48,7 +48,7 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 
 	return ret;
 }
-EXPORT_SYMBOL(sbi_ecall);
+EXPORT_SYMBOL(__sbi_ecall);
 
 int sbi_err_map_linux_errno(int err)
 {
-- 
2.39.2
Re: [PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments
Posted by Qingfang Deng 1 year, 10 months ago
Hi Alexandre,

You can simply make sbi_ecall `__always_inline`, so the C function call
overhead can be fully avoided.
Re: [PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments
Posted by Alexandre Ghiti 1 year, 10 months ago
Hi Qingfang,

On Sat, Mar 23, 2024 at 7:26 AM Qingfang Deng <dqfext@gmail.com> wrote:
>
> Hi Alexandre,
>
> You can simply make sbi_ecall `__always_inline`, so the C function call
> overhead can be fully avoided.

I understand your point, though I don't think we need to mark
sbi_ecall() as inline, it's not in any hot path so it's not worth it.
This patch simply gets rid of a really useless overhead, but it does
not visibly accelerate anything.

I hope it makes sense,

Thanks,

Alex
Re: [External] Re: [PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments
Posted by yunhui cui 1 year, 10 months ago
Hi Alex,

On Mon, Mar 25, 2024 at 2:20 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Qingfang,
>
> On Sat, Mar 23, 2024 at 7:26 AM Qingfang Deng <dqfext@gmail.com> wrote:
> >
> > Hi Alexandre,
> >
> > You can simply make sbi_ecall `__always_inline`, so the C function call
> > overhead can be fully avoided.
>
> I understand your point, though I don't think we need to mark
> sbi_ecall() as inline, it's not in any hot path so it's not worth it.
> This patch simply gets rid of a really useless overhead, but it does
> not visibly accelerate anything.
>
> I hope it makes sense,
>
> Thanks,
>
> Alex
>

The compiler will have inline size restrictions, so I agree with the
modification of this patch instead of inline.
Please refer to:
https://github.com/gcc-mirror/gcc/blob/master/gcc/common.opt,
"finline-limit-"

So:
Reviewed-by: Yunhui Cui <cuiyunhui@bytedance.com>

Thanks,
Yunhui
Re: [PATCH] riscv: Improve sbi_ecall() code generation by reordering arguments
Posted by Atish Patra 1 year, 10 months ago
On 3/22/24 04:26, Alexandre Ghiti wrote:
> The sbi_ecall() function arguments are not in the same order as the
> ecall arguments, so we end up re-ordering the registers before the
> ecall which is useless and costly.
> 
> So simply reorder the arguments in the same way as expected by ecall.
> Instead of reordering directly the arguments of sbi_ecall(), use a proxy
> macro since the current ordering is more natural.
> 
> Before:
> 
> Dump of assembler code for function sbi_ecall:
>     0xffffffff800085e0 <+0>: add sp,sp,-32
>     0xffffffff800085e2 <+2>: sd s0,24(sp)
>     0xffffffff800085e4 <+4>: mv t1,a0
>     0xffffffff800085e6 <+6>: add s0,sp,32
>     0xffffffff800085e8 <+8>: mv t3,a1
>     0xffffffff800085ea <+10>: mv a0,a2
>     0xffffffff800085ec <+12>: mv a1,a3
>     0xffffffff800085ee <+14>: mv a2,a4
>     0xffffffff800085f0 <+16>: mv a3,a5
>     0xffffffff800085f2 <+18>: mv a4,a6
>     0xffffffff800085f4 <+20>: mv a5,a7
>     0xffffffff800085f6 <+22>: mv a6,t3
>     0xffffffff800085f8 <+24>: mv a7,t1
>     0xffffffff800085fa <+26>: ecall
>     0xffffffff800085fe <+30>: ld s0,24(sp)
>     0xffffffff80008600 <+32>: add sp,sp,32
>     0xffffffff80008602 <+34>: ret
> 
> After:
> 
> Dump of assembler code for function __sbi_ecall:
>     0xffffffff8000b6b2 <+0>:	add	sp,sp,-32
>     0xffffffff8000b6b4 <+2>:	sd	s0,24(sp)
>     0xffffffff8000b6b6 <+4>:	add	s0,sp,32
>     0xffffffff8000b6b8 <+6>:	ecall
>     0xffffffff8000b6bc <+10>:	ld	s0,24(sp)
>     0xffffffff8000b6be <+12>:	add	sp,sp,32
>     0xffffffff8000b6c0 <+14>:	ret
> 

Nice!

> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>   arch/riscv/include/asm/sbi.h | 10 ++++++----
>   arch/riscv/kernel/sbi.c      | 10 +++++-----
>   2 files changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index 6e68f8dff76b..9041b009d3b5 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -292,10 +292,12 @@ struct sbiret {
>   };
>   
>   void sbi_init(void);
> -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
> -			unsigned long arg1, unsigned long arg2,
> -			unsigned long arg3, unsigned long arg4,
> -			unsigned long arg5);
> +struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
> +			  unsigned long arg2, unsigned long arg3,
> +			  unsigned long arg4, unsigned long arg5,
> +			  int fid, int ext);
> +#define sbi_ecall(e, f, a0, a1, a2, a3, a4, a5)	\
> +		__sbi_ecall(a0, a1, a2, a3, a4, a5, f, e)
>   
>   #ifdef CONFIG_RISCV_SBI_V01
>   void sbi_console_putchar(int ch);
> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
> index e66e0999a800..5719fa03c3d1 100644
> --- a/arch/riscv/kernel/sbi.c
> +++ b/arch/riscv/kernel/sbi.c
> @@ -24,10 +24,10 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
>   			   unsigned long start, unsigned long size,
>   			   unsigned long arg4, unsigned long arg5) __ro_after_init;
>   
> -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
> -			unsigned long arg1, unsigned long arg2,
> -			unsigned long arg3, unsigned long arg4,
> -			unsigned long arg5)
> +struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
> +			  unsigned long arg2, unsigned long arg3,
> +			  unsigned long arg4, unsigned long arg5,
> +			  int fid, int ext)
>   {
>   	struct sbiret ret;
>   
> @@ -48,7 +48,7 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>   
>   	return ret;
>   }
> -EXPORT_SYMBOL(sbi_ecall);
> +EXPORT_SYMBOL(__sbi_ecall);
>   
>   int sbi_err_map_linux_errno(int err)
>   {

Reviewed-by: Atish Patra <atishp@rivosinc.com>