arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++ arch/riscv/kernel/sbi.c | 7 +++++ 2 files changed, 61 insertions(+) create mode 100644 arch/riscv/include/asm/trace.h
These are useful for measuring the latency of SBI calls. The SBI HSM
extension is excluded because those functions are called from contexts
such as cpuidle where instrumentation is not allowed.
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
Changes in v2:
- Remove some blank lines
arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
arch/riscv/kernel/sbi.c | 7 +++++
2 files changed, 61 insertions(+)
create mode 100644 arch/riscv/include/asm/trace.h
diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
new file mode 100644
index 000000000000..6151cee5450c
--- /dev/null
+++ b/arch/riscv/include/asm/trace.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM riscv
+
+#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RISCV_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT_CONDITION(sbi_call,
+ TP_PROTO(int ext, int fid),
+ TP_ARGS(ext, fid),
+ TP_CONDITION(ext != SBI_EXT_HSM),
+
+ TP_STRUCT__entry(
+ __field(int, ext)
+ __field(int, fid)
+ ),
+
+ TP_fast_assign(
+ __entry->ext = ext;
+ __entry->fid = fid;
+ ),
+
+ TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
+);
+
+TRACE_EVENT_CONDITION(sbi_return,
+ TP_PROTO(int ext, long error, long value),
+ TP_ARGS(ext, error, value),
+ TP_CONDITION(ext != SBI_EXT_HSM),
+
+ TP_STRUCT__entry(
+ __field(long, error)
+ __field(long, value)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->value = value;
+ ),
+
+ TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
+);
+
+#endif /* _TRACE_RISCV_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index e66e0999a800..a1d21d8f5293 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -14,6 +14,9 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
/* default SBI version is 0.1 */
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
@@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
{
struct sbiret ret;
+ trace_sbi_call(ext, fid);
+
register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
@@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
ret.error = a0;
ret.value = a1;
+ trace_sbi_return(ext, ret.error, ret.value);
+
return ret;
}
EXPORT_SYMBOL(sbi_ecall);
--
2.43.1
On Thu, Mar 21, 2024 at 04:01:25PM -0700, Samuel Holland wrote: > These are useful for measuring the latency of SBI calls. The SBI HSM > extension is excluded because those functions are called from contexts > such as cpuidle where instrumentation is not allowed. > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > Signed-off-by: Samuel Holland <samuel.holland@sifive.com> I seem to have bisected a boot failure on v6.11-rc# to this commit, with a crash before the kernel ever gets to output anything - even on the bootconsole. For one of these crashes I got the below (which is actually U-Boots exception handler), but most of the time - nothing at all. I'll try to decodecode that tomorrow and fish out a config, because this is either config or toolchain dependant (I saw it in work, with a slightly different config etc to what I have here). Starting kernel ... Unhandled exception: Load access fault EPC: 000000008020e09c RA: 000000008020e510 TVAL: ffffffff81cbf1f0 EPC: 00000000408a009c RA: 00000000408a0510 reloc adjusted Code: 3597 01a4 b583 1f45 5613 0035 9a61 95b2 (618c) resetting ... reset not supported yet ### ERROR ### Please RESET the board ### Cheers, Conor.
On 13/08/2024 22:26, Conor Dooley wrote: > On Thu, Mar 21, 2024 at 04:01:25PM -0700, Samuel Holland wrote: >> These are useful for measuring the latency of SBI calls. The SBI HSM >> extension is excluded because those functions are called from contexts >> such as cpuidle where instrumentation is not allowed. >> >> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> >> Signed-off-by: Samuel Holland <samuel.holland@sifive.com> > I seem to have bisected a boot failure on v6.11-rc# to this commit, > with a crash before the kernel ever gets to output anything - even on > the bootconsole. For one of these crashes I got the below (which is > actually U-Boots exception handler), but most of the time - nothing at > all. I'll try to decodecode that tomorrow and fish out a config, because > this is either config or toolchain dependant (I saw it in work, with a > slightly different config etc to what I have here). > > > Starting kernel ... > > Unhandled exception: Load access fault > EPC: 000000008020e09c RA: 000000008020e510 TVAL: ffffffff81cbf1f0 > EPC: 00000000408a009c RA: 00000000408a0510 reloc adjusted > > Code: 3597 01a4 b583 1f45 5613 0035 9a61 95b2 (618c) > > > resetting ... > reset not supported yet > ### ERROR ### Please RESET the board ### > > > Cheers, > Conor. So I have just hit a similar issue while debugging the syzkaller failure. The problem is visible with KASAN, TRACEPOINTS and RISCV_ALTERNATIVE_EARLY. The latter is important since this is where the very early sbi call is made in riscv_fill_cpu_mfr_info() (https://elixir.bootlin.com/linux/v6.11-rc3/source/arch/riscv/kernel/alternative.c#L32). And since kernel/sbi.c is compiled with KASAN instrumentation, it fails to access the KASAN region as it is not yet set up (the MMU is not up yet). So the following fixes the issue: diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 06d407f1b30b..3b926c9444d0 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -31,6 +31,7 @@ endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n +KASAN_SANITIZE_sbi.o := n endif endif But I don't like this solution as it removes instrumentation everywhere in this file. I'd rather move the __sbi_ecall() into its own file and remove the instrumentation here. @Conor Can you give the diff above a try and see if it fixes your issue? Thanks, Alex > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi Samuel,
On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
<samuel.holland@sifive.com> wrote:
>
> These are useful for measuring the latency of SBI calls. The SBI HSM
> extension is excluded because those functions are called from contexts
> such as cpuidle where instrumentation is not allowed.
>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> ---
>
> Changes in v2:
> - Remove some blank lines
>
> arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
> arch/riscv/kernel/sbi.c | 7 +++++
> 2 files changed, 61 insertions(+)
> create mode 100644 arch/riscv/include/asm/trace.h
>
> diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
> new file mode 100644
> index 000000000000..6151cee5450c
> --- /dev/null
> +++ b/arch/riscv/include/asm/trace.h
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM riscv
> +
> +#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_RISCV_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT_CONDITION(sbi_call,
> + TP_PROTO(int ext, int fid),
> + TP_ARGS(ext, fid),
> + TP_CONDITION(ext != SBI_EXT_HSM),
> +
> + TP_STRUCT__entry(
> + __field(int, ext)
> + __field(int, fid)
> + ),
> +
> + TP_fast_assign(
> + __entry->ext = ext;
> + __entry->fid = fid;
> + ),
> +
> + TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
> +);
> +
> +TRACE_EVENT_CONDITION(sbi_return,
> + TP_PROTO(int ext, long error, long value),
> + TP_ARGS(ext, error, value),
> + TP_CONDITION(ext != SBI_EXT_HSM),
> +
> + TP_STRUCT__entry(
> + __field(long, error)
> + __field(long, value)
> + ),
> +
> + TP_fast_assign(
> + __entry->error = error;
> + __entry->value = value;
> + ),
> +
> + TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
> +);
> +
> +#endif /* _TRACE_RISCV_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#undef TRACE_INCLUDE_FILE
> +
> +#define TRACE_INCLUDE_PATH asm
> +#define TRACE_INCLUDE_FILE trace
> +
> +#include <trace/define_trace.h>
> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
> index e66e0999a800..a1d21d8f5293 100644
> --- a/arch/riscv/kernel/sbi.c
> +++ b/arch/riscv/kernel/sbi.c
> @@ -14,6 +14,9 @@
> #include <asm/smp.h>
> #include <asm/tlbflush.h>
>
> +#define CREATE_TRACE_POINTS
> +#include <asm/trace.h>
> +
> /* default SBI version is 0.1 */
> unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
> EXPORT_SYMBOL(sbi_spec_version);
> @@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
> {
> struct sbiret ret;
>
> + trace_sbi_call(ext, fid);
> +
> register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
> register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
> register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
> @@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
> ret.error = a0;
> ret.value = a1;
>
> + trace_sbi_return(ext, ret.error, ret.value);
> +
> return ret;
> }
> EXPORT_SYMBOL(sbi_ecall);
> --
> 2.43.1
>
sbi_ecall() is used by flush_icache_all() so I was a bit scared we
would need to patch 2 instructions using a tracepoint and then to use
flush_icache_all() which could bug if only one of the patched
instruction was seen. But I took a look at the disassembled code, and
tracepoints happen to only need to patch 1 nop into an unconditional
jump, which is not a problem then.
But then when disassembling, I noticed that the addition of the
tracepoint comes with "quite" some overhead:
Before:
Dump of assembler code for function sbi_ecall:
0xffffffff800085e0 <+0>: add sp,sp,-32
0xffffffff800085e2 <+2>: sd s0,24(sp)
0xffffffff800085e4 <+4>: mv t1,a0
0xffffffff800085e6 <+6>: add s0,sp,32
0xffffffff800085e8 <+8>: mv t3,a1
0xffffffff800085ea <+10>: mv a0,a2
0xffffffff800085ec <+12>: mv a1,a3
0xffffffff800085ee <+14>: mv a2,a4
0xffffffff800085f0 <+16>: mv a3,a5
0xffffffff800085f2 <+18>: mv a4,a6
0xffffffff800085f4 <+20>: mv a5,a7
0xffffffff800085f6 <+22>: mv a6,t3
0xffffffff800085f8 <+24>: mv a7,t1
0xffffffff800085fa <+26>: ecall
0xffffffff800085fe <+30>: ld s0,24(sp)
0xffffffff80008600 <+32>: add sp,sp,32
0xffffffff80008602 <+34>: ret
After:
Dump of assembler code for function sbi_ecall:
0xffffffff8000bbf2 <+0>: add sp,sp,-112
0xffffffff8000bbf4 <+2>: sd s0,96(sp)
0xffffffff8000bbf6 <+4>: sd s1,88(sp)
0xffffffff8000bbf8 <+6>: sd s3,72(sp)
0xffffffff8000bbfa <+8>: sd s4,64(sp)
0xffffffff8000bbfc <+10>: sd s5,56(sp)
0xffffffff8000bbfe <+12>: sd s6,48(sp)
0xffffffff8000bc00 <+14>: sd s7,40(sp)
0xffffffff8000bc02 <+16>: sd s8,32(sp)
0xffffffff8000bc04 <+18>: sd s9,24(sp)
0xffffffff8000bc06 <+20>: sd ra,104(sp)
0xffffffff8000bc08 <+22>: sd s2,80(sp)
0xffffffff8000bc0a <+24>: add s0,sp,112
0xffffffff8000bc0c <+26>: mv s1,a0
0xffffffff8000bc0e <+28>: mv s3,a1
0xffffffff8000bc10 <+30>: mv s9,a2
0xffffffff8000bc12 <+32>: mv s8,a3
0xffffffff8000bc14 <+34>: mv s7,a4
0xffffffff8000bc16 <+36>: mv s6,a5
0xffffffff8000bc18 <+38>: mv s5,a6
0xffffffff8000bc1a <+40>: mv s4,a7
0xffffffff8000bc1c <+42>: nop
0xffffffff8000bc20 <+46>: mv a0,s9
0xffffffff8000bc22 <+48>: mv a1,s8
0xffffffff8000bc24 <+50>: mv a2,s7
0xffffffff8000bc26 <+52>: mv a3,s6
0xffffffff8000bc28 <+54>: mv a4,s5
0xffffffff8000bc2a <+56>: mv a5,s4
0xffffffff8000bc2c <+58>: mv a6,s3
0xffffffff8000bc2e <+60>: mv a7,s1
0xffffffff8000bc30 <+62>: ecall
0xffffffff8000bc34 <+66>: mv s4,a0
0xffffffff8000bc36 <+68>: mv s3,a1
0xffffffff8000bc38 <+70>: nop
0xffffffff8000bc3c <+74>: ld ra,104(sp)
0xffffffff8000bc3e <+76>: ld s0,96(sp)
0xffffffff8000bc40 <+78>: ld s1,88(sp)
0xffffffff8000bc42 <+80>: ld s2,80(sp)
0xffffffff8000bc44 <+82>: ld s5,56(sp)
0xffffffff8000bc46 <+84>: ld s6,48(sp)
0xffffffff8000bc48 <+86>: ld s7,40(sp)
0xffffffff8000bc4a <+88>: ld s8,32(sp)
0xffffffff8000bc4c <+90>: ld s9,24(sp)
0xffffffff8000bc4e <+92>: mv a0,s4
0xffffffff8000bc50 <+94>: mv a1,s3
0xffffffff8000bc52 <+96>: ld s4,64(sp)
0xffffffff8000bc54 <+98>: ld s3,72(sp)
0xffffffff8000bc56 <+100>: add sp,sp,112
0xffffffff8000bc58 <+102>: ret
Not sure this is really problematic though, let me know what you think.
FYI, I'm about to send a patch to reorder the arguments of sbi_ecall()
to avoid the mv-dance before the ecall.
Thanks,
Alex
Hi Alex,
On 2024-03-22 4:27 AM, Alexandre Ghiti wrote:
> On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
> <samuel.holland@sifive.com> wrote:
>>
>> These are useful for measuring the latency of SBI calls. The SBI HSM
>> extension is excluded because those functions are called from contexts
>> such as cpuidle where instrumentation is not allowed.
>>
>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>> ---
>>
>> Changes in v2:
>> - Remove some blank lines
>>
>> arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
>> arch/riscv/kernel/sbi.c | 7 +++++
>> 2 files changed, 61 insertions(+)
>> create mode 100644 arch/riscv/include/asm/trace.h
>>
>> diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
>> new file mode 100644
>> index 000000000000..6151cee5450c
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/trace.h
>> @@ -0,0 +1,54 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM riscv
>> +
>> +#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_RISCV_H
>> +
>> +#include <linux/tracepoint.h>
>> +
>> +TRACE_EVENT_CONDITION(sbi_call,
>> + TP_PROTO(int ext, int fid),
>> + TP_ARGS(ext, fid),
>> + TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> + TP_STRUCT__entry(
>> + __field(int, ext)
>> + __field(int, fid)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->ext = ext;
>> + __entry->fid = fid;
>> + ),
>> +
>> + TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
>> +);
>> +
>> +TRACE_EVENT_CONDITION(sbi_return,
>> + TP_PROTO(int ext, long error, long value),
>> + TP_ARGS(ext, error, value),
>> + TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> + TP_STRUCT__entry(
>> + __field(long, error)
>> + __field(long, value)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->error = error;
>> + __entry->value = value;
>> + ),
>> +
>> + TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
>> +);
>> +
>> +#endif /* _TRACE_RISCV_H */
>> +
>> +#undef TRACE_INCLUDE_PATH
>> +#undef TRACE_INCLUDE_FILE
>> +
>> +#define TRACE_INCLUDE_PATH asm
>> +#define TRACE_INCLUDE_FILE trace
>> +
>> +#include <trace/define_trace.h>
>> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
>> index e66e0999a800..a1d21d8f5293 100644
>> --- a/arch/riscv/kernel/sbi.c
>> +++ b/arch/riscv/kernel/sbi.c
>> @@ -14,6 +14,9 @@
>> #include <asm/smp.h>
>> #include <asm/tlbflush.h>
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <asm/trace.h>
>> +
>> /* default SBI version is 0.1 */
>> unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
>> EXPORT_SYMBOL(sbi_spec_version);
>> @@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>> {
>> struct sbiret ret;
>>
>> + trace_sbi_call(ext, fid);
>> +
>> register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
>> register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
>> register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
>> @@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>> ret.error = a0;
>> ret.value = a1;
>>
>> + trace_sbi_return(ext, ret.error, ret.value);
>> +
>> return ret;
>> }
>> EXPORT_SYMBOL(sbi_ecall);
>> --
>> 2.43.1
>>
>
> sbi_ecall() is used by flush_icache_all() so I was a bit scared we
> would need to patch 2 instructions using a tracepoint and then to use
> flush_icache_all() which could bug if only one of the patched
> instruction was seen. But I took a look at the disassembled code, and
> tracepoints happen to only need to patch 1 nop into an unconditional
> jump, which is not a problem then.
Right, tracepoints use the static branch infrastructure, which uses a single
instruction.
> But then when disassembling, I noticed that the addition of the
> tracepoint comes with "quite" some overhead:
>
> Before:
>
> Dump of assembler code for function sbi_ecall:
> 0xffffffff800085e0 <+0>: add sp,sp,-32
> 0xffffffff800085e2 <+2>: sd s0,24(sp)
> 0xffffffff800085e4 <+4>: mv t1,a0
> 0xffffffff800085e6 <+6>: add s0,sp,32
> 0xffffffff800085e8 <+8>: mv t3,a1
> 0xffffffff800085ea <+10>: mv a0,a2
> 0xffffffff800085ec <+12>: mv a1,a3
> 0xffffffff800085ee <+14>: mv a2,a4
> 0xffffffff800085f0 <+16>: mv a3,a5
> 0xffffffff800085f2 <+18>: mv a4,a6
> 0xffffffff800085f4 <+20>: mv a5,a7
> 0xffffffff800085f6 <+22>: mv a6,t3
> 0xffffffff800085f8 <+24>: mv a7,t1
> 0xffffffff800085fa <+26>: ecall
> 0xffffffff800085fe <+30>: ld s0,24(sp)
> 0xffffffff80008600 <+32>: add sp,sp,32
> 0xffffffff80008602 <+34>: ret
>
> After:
>
> Dump of assembler code for function sbi_ecall:
> 0xffffffff8000bbf2 <+0>: add sp,sp,-112
> 0xffffffff8000bbf4 <+2>: sd s0,96(sp)
> 0xffffffff8000bbf6 <+4>: sd s1,88(sp)
> 0xffffffff8000bbf8 <+6>: sd s3,72(sp)
> 0xffffffff8000bbfa <+8>: sd s4,64(sp)
> 0xffffffff8000bbfc <+10>: sd s5,56(sp)
> 0xffffffff8000bbfe <+12>: sd s6,48(sp)
> 0xffffffff8000bc00 <+14>: sd s7,40(sp)
> 0xffffffff8000bc02 <+16>: sd s8,32(sp)
> 0xffffffff8000bc04 <+18>: sd s9,24(sp)
> 0xffffffff8000bc06 <+20>: sd ra,104(sp)
> 0xffffffff8000bc08 <+22>: sd s2,80(sp)
> 0xffffffff8000bc0a <+24>: add s0,sp,112
> 0xffffffff8000bc0c <+26>: mv s1,a0
> 0xffffffff8000bc0e <+28>: mv s3,a1
> 0xffffffff8000bc10 <+30>: mv s9,a2
> 0xffffffff8000bc12 <+32>: mv s8,a3
> 0xffffffff8000bc14 <+34>: mv s7,a4
> 0xffffffff8000bc16 <+36>: mv s6,a5
> 0xffffffff8000bc18 <+38>: mv s5,a6
> 0xffffffff8000bc1a <+40>: mv s4,a7
> 0xffffffff8000bc1c <+42>: nop
> 0xffffffff8000bc20 <+46>: mv a0,s9
> 0xffffffff8000bc22 <+48>: mv a1,s8
> 0xffffffff8000bc24 <+50>: mv a2,s7
> 0xffffffff8000bc26 <+52>: mv a3,s6
> 0xffffffff8000bc28 <+54>: mv a4,s5
> 0xffffffff8000bc2a <+56>: mv a5,s4
> 0xffffffff8000bc2c <+58>: mv a6,s3
> 0xffffffff8000bc2e <+60>: mv a7,s1
> 0xffffffff8000bc30 <+62>: ecall
> 0xffffffff8000bc34 <+66>: mv s4,a0
> 0xffffffff8000bc36 <+68>: mv s3,a1
> 0xffffffff8000bc38 <+70>: nop
> 0xffffffff8000bc3c <+74>: ld ra,104(sp)
> 0xffffffff8000bc3e <+76>: ld s0,96(sp)
> 0xffffffff8000bc40 <+78>: ld s1,88(sp)
> 0xffffffff8000bc42 <+80>: ld s2,80(sp)
> 0xffffffff8000bc44 <+82>: ld s5,56(sp)
> 0xffffffff8000bc46 <+84>: ld s6,48(sp)
> 0xffffffff8000bc48 <+86>: ld s7,40(sp)
> 0xffffffff8000bc4a <+88>: ld s8,32(sp)
> 0xffffffff8000bc4c <+90>: ld s9,24(sp)
> 0xffffffff8000bc4e <+92>: mv a0,s4
> 0xffffffff8000bc50 <+94>: mv a1,s3
> 0xffffffff8000bc52 <+96>: ld s4,64(sp)
> 0xffffffff8000bc54 <+98>: ld s3,72(sp)
> 0xffffffff8000bc56 <+100>: add sp,sp,112
> 0xffffffff8000bc58 <+102>: ret
>
> Not sure this is really problematic though, let me know what you think.
The overhead is only incurred when tracepoints are enabled. But when they are
enabled, I do not think it is avoidable, unless we were to hide this tracepoint
behind some Kconfig option. I don't think the overhead enough to be a problem,
but I suppose it's up to maintainer judgement.
Regards,
Samuel
On Mon, 25 Mar 2024 20:39:27 PDT (-0700), samuel.holland@sifive.com wrote:
> Hi Alex,
>
> On 2024-03-22 4:27 AM, Alexandre Ghiti wrote:
>> On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
>> <samuel.holland@sifive.com> wrote:
>>>
>>> These are useful for measuring the latency of SBI calls. The SBI HSM
>>> extension is excluded because those functions are called from contexts
>>> such as cpuidle where instrumentation is not allowed.
>>>
>>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>>> ---
>>>
>>> Changes in v2:
>>> - Remove some blank lines
>>>
>>> arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
>>> arch/riscv/kernel/sbi.c | 7 +++++
>>> 2 files changed, 61 insertions(+)
>>> create mode 100644 arch/riscv/include/asm/trace.h
>>>
>>> diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
>>> new file mode 100644
>>> index 000000000000..6151cee5450c
>>> --- /dev/null
>>> +++ b/arch/riscv/include/asm/trace.h
>>> @@ -0,0 +1,54 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#undef TRACE_SYSTEM
>>> +#define TRACE_SYSTEM riscv
>>> +
>>> +#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
>>> +#define _TRACE_RISCV_H
>>> +
>>> +#include <linux/tracepoint.h>
>>> +
>>> +TRACE_EVENT_CONDITION(sbi_call,
>>> + TP_PROTO(int ext, int fid),
>>> + TP_ARGS(ext, fid),
>>> + TP_CONDITION(ext != SBI_EXT_HSM),
>>> +
>>> + TP_STRUCT__entry(
>>> + __field(int, ext)
>>> + __field(int, fid)
>>> + ),
>>> +
>>> + TP_fast_assign(
>>> + __entry->ext = ext;
>>> + __entry->fid = fid;
>>> + ),
>>> +
>>> + TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
>>> +);
>>> +
>>> +TRACE_EVENT_CONDITION(sbi_return,
>>> + TP_PROTO(int ext, long error, long value),
>>> + TP_ARGS(ext, error, value),
>>> + TP_CONDITION(ext != SBI_EXT_HSM),
>>> +
>>> + TP_STRUCT__entry(
>>> + __field(long, error)
>>> + __field(long, value)
>>> + ),
>>> +
>>> + TP_fast_assign(
>>> + __entry->error = error;
>>> + __entry->value = value;
>>> + ),
>>> +
>>> + TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
>>> +);
>>> +
>>> +#endif /* _TRACE_RISCV_H */
>>> +
>>> +#undef TRACE_INCLUDE_PATH
>>> +#undef TRACE_INCLUDE_FILE
>>> +
>>> +#define TRACE_INCLUDE_PATH asm
>>> +#define TRACE_INCLUDE_FILE trace
>>> +
>>> +#include <trace/define_trace.h>
>>> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
>>> index e66e0999a800..a1d21d8f5293 100644
>>> --- a/arch/riscv/kernel/sbi.c
>>> +++ b/arch/riscv/kernel/sbi.c
>>> @@ -14,6 +14,9 @@
>>> #include <asm/smp.h>
>>> #include <asm/tlbflush.h>
>>>
>>> +#define CREATE_TRACE_POINTS
>>> +#include <asm/trace.h>
>>> +
>>> /* default SBI version is 0.1 */
>>> unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
>>> EXPORT_SYMBOL(sbi_spec_version);
>>> @@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>>> {
>>> struct sbiret ret;
>>>
>>> + trace_sbi_call(ext, fid);
>>> +
>>> register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
>>> register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
>>> register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
>>> @@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>>> ret.error = a0;
>>> ret.value = a1;
>>>
>>> + trace_sbi_return(ext, ret.error, ret.value);
>>> +
>>> return ret;
>>> }
>>> EXPORT_SYMBOL(sbi_ecall);
>>> --
>>> 2.43.1
>>>
>>
>> sbi_ecall() is used by flush_icache_all() so I was a bit scared we
>> would need to patch 2 instructions using a tracepoint and then to use
>> flush_icache_all() which could bug if only one of the patched
>> instruction was seen. But I took a look at the disassembled code, and
>> tracepoints happen to only need to patch 1 nop into an unconditional
>> jump, which is not a problem then.
>
> Right, tracepoints use the static branch infrastructure, which uses a single
> instruction.
>
>> But then when disassembling, I noticed that the addition of the
>> tracepoint comes with "quite" some overhead:
>>
>> Before:
>>
>> Dump of assembler code for function sbi_ecall:
>> 0xffffffff800085e0 <+0>: add sp,sp,-32
>> 0xffffffff800085e2 <+2>: sd s0,24(sp)
>> 0xffffffff800085e4 <+4>: mv t1,a0
>> 0xffffffff800085e6 <+6>: add s0,sp,32
>> 0xffffffff800085e8 <+8>: mv t3,a1
>> 0xffffffff800085ea <+10>: mv a0,a2
>> 0xffffffff800085ec <+12>: mv a1,a3
>> 0xffffffff800085ee <+14>: mv a2,a4
>> 0xffffffff800085f0 <+16>: mv a3,a5
>> 0xffffffff800085f2 <+18>: mv a4,a6
>> 0xffffffff800085f4 <+20>: mv a5,a7
>> 0xffffffff800085f6 <+22>: mv a6,t3
>> 0xffffffff800085f8 <+24>: mv a7,t1
>> 0xffffffff800085fa <+26>: ecall
>> 0xffffffff800085fe <+30>: ld s0,24(sp)
>> 0xffffffff80008600 <+32>: add sp,sp,32
>> 0xffffffff80008602 <+34>: ret
>>
>> After:
>>
>> Dump of assembler code for function sbi_ecall:
>> 0xffffffff8000bbf2 <+0>: add sp,sp,-112
>> 0xffffffff8000bbf4 <+2>: sd s0,96(sp)
>> 0xffffffff8000bbf6 <+4>: sd s1,88(sp)
>> 0xffffffff8000bbf8 <+6>: sd s3,72(sp)
>> 0xffffffff8000bbfa <+8>: sd s4,64(sp)
>> 0xffffffff8000bbfc <+10>: sd s5,56(sp)
>> 0xffffffff8000bbfe <+12>: sd s6,48(sp)
>> 0xffffffff8000bc00 <+14>: sd s7,40(sp)
>> 0xffffffff8000bc02 <+16>: sd s8,32(sp)
>> 0xffffffff8000bc04 <+18>: sd s9,24(sp)
>> 0xffffffff8000bc06 <+20>: sd ra,104(sp)
>> 0xffffffff8000bc08 <+22>: sd s2,80(sp)
>> 0xffffffff8000bc0a <+24>: add s0,sp,112
>> 0xffffffff8000bc0c <+26>: mv s1,a0
>> 0xffffffff8000bc0e <+28>: mv s3,a1
>> 0xffffffff8000bc10 <+30>: mv s9,a2
>> 0xffffffff8000bc12 <+32>: mv s8,a3
>> 0xffffffff8000bc14 <+34>: mv s7,a4
>> 0xffffffff8000bc16 <+36>: mv s6,a5
>> 0xffffffff8000bc18 <+38>: mv s5,a6
>> 0xffffffff8000bc1a <+40>: mv s4,a7
>> 0xffffffff8000bc1c <+42>: nop
>> 0xffffffff8000bc20 <+46>: mv a0,s9
>> 0xffffffff8000bc22 <+48>: mv a1,s8
>> 0xffffffff8000bc24 <+50>: mv a2,s7
>> 0xffffffff8000bc26 <+52>: mv a3,s6
>> 0xffffffff8000bc28 <+54>: mv a4,s5
>> 0xffffffff8000bc2a <+56>: mv a5,s4
>> 0xffffffff8000bc2c <+58>: mv a6,s3
>> 0xffffffff8000bc2e <+60>: mv a7,s1
>> 0xffffffff8000bc30 <+62>: ecall
>> 0xffffffff8000bc34 <+66>: mv s4,a0
>> 0xffffffff8000bc36 <+68>: mv s3,a1
>> 0xffffffff8000bc38 <+70>: nop
>> 0xffffffff8000bc3c <+74>: ld ra,104(sp)
>> 0xffffffff8000bc3e <+76>: ld s0,96(sp)
>> 0xffffffff8000bc40 <+78>: ld s1,88(sp)
>> 0xffffffff8000bc42 <+80>: ld s2,80(sp)
>> 0xffffffff8000bc44 <+82>: ld s5,56(sp)
>> 0xffffffff8000bc46 <+84>: ld s6,48(sp)
>> 0xffffffff8000bc48 <+86>: ld s7,40(sp)
>> 0xffffffff8000bc4a <+88>: ld s8,32(sp)
>> 0xffffffff8000bc4c <+90>: ld s9,24(sp)
>> 0xffffffff8000bc4e <+92>: mv a0,s4
>> 0xffffffff8000bc50 <+94>: mv a1,s3
>> 0xffffffff8000bc52 <+96>: ld s4,64(sp)
>> 0xffffffff8000bc54 <+98>: ld s3,72(sp)
>> 0xffffffff8000bc56 <+100>: add sp,sp,112
>> 0xffffffff8000bc58 <+102>: ret
>>
>> Not sure this is really problematic though, let me know what you think.
>
> The overhead is only incurred when tracepoints are enabled. But when they are
> enabled, I do not think it is avoidable, unless we were to hide this tracepoint
> behind some Kconfig option. I don't think the overhead enough to be a problem,
> but I suppose it's up to maintainer judgement.
IIUC it's actually just the printk() inside the tracepoint that's adding
the overhead, not the tracepoints themselves? We're shimming every SBI
call through sbi_ecall(), which fills out every argument, so we end up
saving/restoring them all around the printk(). We could convert these
over to sbi_ecall_N like we do for syscalls in glibc, then we'd only
need to save/restore the arguments that have something useful.
I'm going to pick this one up, though, as it's not directly a problem
with the patch in question. It's staged and should show up on for-next
eventually, stuff might be a bit chaotic until we figure out all the
poop...
>
> Regards,
> Samuel
© 2016 - 2026 Red Hat, Inc.