[PATCH 1/9] LoongArch/ftrace: Add basic support

Qing Zhang posted 9 patches 3 years, 7 months ago
Only 5 patches received!
There is a newer version of this series
[PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Qing Zhang 3 years, 7 months ago
This patch contains basic ftrace support for LoongArch.
Specifically, function tracer (HAVE_FUNCTION_TRACER), function graph
tracer (HAVE_FUNCTION_GRAPH_TRACER) are implemented following the
instructions in Documentation/trace/ftrace-design.txt.

Use `-pg` makes stub like a child function `void _mcount(void *ra)`.
Thus, it can be seen store RA and open stack before `call _mcount`.
Find `open stack` at first, and then find `store RA`

Note that the functions in both inst.c and time.c should not be
hooked with the compiler's -pg option: to prevent infinite self-
referencing for the former, and to ignore early setup stuff for the
latter.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
---
 arch/loongarch/Kconfig              |  2 +
 arch/loongarch/Makefile             |  5 ++
 arch/loongarch/include/asm/ftrace.h | 18 ++++++
 arch/loongarch/kernel/Makefile      |  8 +++
 arch/loongarch/kernel/ftrace.c      | 74 +++++++++++++++++++++++
 arch/loongarch/kernel/mcount.S      | 94 +++++++++++++++++++++++++++++
 6 files changed, 201 insertions(+)
 create mode 100644 arch/loongarch/include/asm/ftrace.h
 create mode 100644 arch/loongarch/kernel/ftrace.c
 create mode 100644 arch/loongarch/kernel/mcount.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 4abc9a28aba4..703a2c3a8e0d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -84,6 +84,8 @@ config LOONGARCH
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
+        select HAVE_FUNCTION_GRAPH_TRACER
+        select HAVE_FUNCTION_TRACER
 	select HAVE_GENERIC_VDSO
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index ec3de6191276..44f11a2937e9 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -29,6 +29,11 @@ ifneq ($(SUBARCH),$(ARCH))
   endif
 endif
 
+ifdef CONFIG_DYNAMIC_FTRACE
+  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
 ifdef CONFIG_64BIT
 ld-emul			= $(64bit-emul)
 cflags-y		+= -mabi=lp64s
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
new file mode 100644
index 000000000000..6a3e76234618
--- /dev/null
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_FTRACE_H
+#define _ASM_LOONGARCH_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE 4		/* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+#define mcount _mcount
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_TRACER */
+#endif /* _ASM_LOONGARCH_FTRACE_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index e5be17009fe8..0a745d24d3e5 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -14,6 +14,14 @@ obj-$(CONFIG_EFI) 		+= efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 
+ifdef CONFIG_FUNCTION_TRACER
+    obj-y += mcount.o ftrace.o
+    CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
+endif
+
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c
new file mode 100644
index 000000000000..c8ddc5f11f32
--- /dev/null
+++ b/arch/loongarch/kernel/ftrace.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/ftrace.h>
+#include <linux/syscalls.h>
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+#include <asm/loongarch.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+
+#include <asm-generic/sections.h>
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/*
+ * As `call _mcount` follows LoongArch psABI, ra-saved operation and
+ * stack operation can be found before this insn.
+ */
+
+static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off)
+{
+	union loongarch_instruction *insn;
+	int limit = 32;
+
+	insn = (union loongarch_instruction *)insn_addr;
+
+	do {
+		insn--;
+		limit--;
+
+		if (is_ra_save_ins(insn))
+			*ra_off = -((1 << 12) - insn->reg2i12_format.immediate);
+
+	} while (!is_stack_alloc_ins(insn) && limit);
+
+	if (!limit)
+		return -EINVAL;
+
+	return 0;
+}
+
+void prepare_ftrace_return(unsigned long self_addr,
+		unsigned long callsite_sp, unsigned long old)
+{
+	int ra_off;
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		return;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	if (ftrace_get_parent_ra_addr(self_addr, &ra_off))
+		goto out;
+
+	if (!function_graph_enter(old, self_addr, 0, NULL))
+		*(unsigned long *)(callsite_sp + ra_off) = return_hooker;
+
+	return;
+
+out:
+	ftrace_graph_stop();
+	WARN_ON(1);
+}
+#endif	/* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S
new file mode 100644
index 000000000000..f1c1cc1a629e
--- /dev/null
+++ b/arch/loongarch/kernel/mcount.S
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch specific _mcount support
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/export.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+#include <asm/ftrace.h>
+
+	.text
+
+#define MCOUNT_STACK_SIZE	(2 * SZREG)
+#define MCOUNT_S0_OFFSET	(0)
+#define MCOUNT_RA_OFFSET	(SZREG)
+
+	.macro MCOUNT_SAVE_REGS
+	PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE
+	PTR_S	s0, sp, MCOUNT_S0_OFFSET
+	PTR_S	ra, sp, MCOUNT_RA_OFFSET
+	move	s0, a0
+	.endm
+
+	.macro MCOUNT_RESTORE_REGS
+	move	a0, s0
+	PTR_L	ra, sp, MCOUNT_RA_OFFSET
+	PTR_L	s0, sp, MCOUNT_S0_OFFSET
+	PTR_ADDI sp, sp, MCOUNT_STACK_SIZE
+	.endm
+
+
+SYM_FUNC_START(_mcount)
+	la	t1, ftrace_stub
+	la	t2, ftrace_trace_function	/* Prepare t2 for (1) */
+	PTR_L	t2, t2, 0
+	beq	t1, t2, fgraph_trace
+
+	MCOUNT_SAVE_REGS
+
+	move	a0, ra				/* arg0: self return address */
+	move	a1, s0				/* arg1: parent's return address */
+	jirl	ra, t2, 0			/* (1) call *ftrace_trace_function */
+
+	MCOUNT_RESTORE_REGS
+
+fgraph_trace:
+#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
+	la	t1, ftrace_stub
+	la	t3, ftrace_graph_return
+	PTR_L	t3, t3, 0
+	bne	t1, t3, ftrace_graph_caller
+	la	t1, ftrace_graph_entry_stub
+	la	t3, ftrace_graph_entry
+	PTR_L	t3, t3, 0
+	bne	t1, t3, ftrace_graph_caller
+#endif
+
+	.globl ftrace_stub
+ftrace_stub:
+	jirl	zero, ra, 0
+SYM_FUNC_END(_mcount)
+EXPORT_SYMBOL(_mcount)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_FUNC_START(ftrace_graph_caller)
+	MCOUNT_SAVE_REGS
+
+	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
+	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
+	move	a2, s0					/* arg2: Callsite parent ra */
+	bl	prepare_ftrace_return
+
+	MCOUNT_RESTORE_REGS
+	jirl	zero, ra, 0
+SYM_FUNC_END(ftrace_graph_caller)
+
+SYM_FUNC_START(return_to_handler)
+	PTR_ADDI sp, sp, -2 * SZREG
+	PTR_S	a0, sp, 0
+	PTR_S	a1, sp, SZREG
+
+	bl	ftrace_return_to_handler
+
+	/* restore the real parent address: a0 -> ra */
+	move	ra, a0
+
+	PTR_L	a0, sp, 0
+	PTR_L	a1, sp, SZREG
+	PTR_ADDI	sp, sp, 2 * SZREG
+	jirl	zero, ra, 0
+SYM_FUNC_END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
2.36.1
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Steven Rostedt 3 years, 7 months ago
On Fri, 19 Aug 2022 16:13:55 +0800
Qing Zhang <zhangqing@loongson.cn> wrote:

> +#define MCOUNT_STACK_SIZE	(2 * SZREG)
> +#define MCOUNT_S0_OFFSET	(0)
> +#define MCOUNT_RA_OFFSET	(SZREG)
> +
> +	.macro MCOUNT_SAVE_REGS
> +	PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE
> +	PTR_S	s0, sp, MCOUNT_S0_OFFSET
> +	PTR_S	ra, sp, MCOUNT_RA_OFFSET
> +	move	s0, a0
> +	.endm
> +
> +	.macro MCOUNT_RESTORE_REGS
> +	move	a0, s0
> +	PTR_L	ra, sp, MCOUNT_RA_OFFSET
> +	PTR_L	s0, sp, MCOUNT_S0_OFFSET
> +	PTR_ADDI sp, sp, MCOUNT_STACK_SIZE
> +	.endm
> +
> +
> +SYM_FUNC_START(_mcount)
> +	la	t1, ftrace_stub
> +	la	t2, ftrace_trace_function	/* Prepare t2 for (1) */
> +	PTR_L	t2, t2, 0
> +	beq	t1, t2, fgraph_trace
> +
> +	MCOUNT_SAVE_REGS
> +
> +	move	a0, ra				/* arg0: self return address */
> +	move	a1, s0				/* arg1: parent's return address */
> +	jirl	ra, t2, 0			/* (1) call *ftrace_trace_function */
> +
> +	MCOUNT_RESTORE_REGS

You know, if you can implement CONFIG_FTRACE_WITH_ARGS, where the default
function callback gets a ftrace_regs pointer (that only holds what is
needed for the arguments of the function as well as the stack pointer),
then you could also implement function graph on top of that, and remove the
need for the below "fgraph_trace" trampoline.

I'd really would like all architectures to go that way. Also, the
CONFIG_FTRACE_WITH_ARGS is all you need for live kernel patching.

-- Steve


> +
> +fgraph_trace:
> +#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
> +	la	t1, ftrace_stub
> +	la	t3, ftrace_graph_return
> +	PTR_L	t3, t3, 0
> +	bne	t1, t3, ftrace_graph_caller
> +	la	t1, ftrace_graph_entry_stub
> +	la	t3, ftrace_graph_entry
> +	PTR_L	t3, t3, 0
> +	bne	t1, t3, ftrace_graph_caller
> +#endif
> +
> +	.globl ftrace_stub
> +ftrace_stub:
> +	jirl	zero, ra, 0
> +SYM_FUNC_END(_mcount)
> +EXPORT_SYMBOL(_mcount)
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +SYM_FUNC_START(ftrace_graph_caller)
> +	MCOUNT_SAVE_REGS
> +
> +	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
> +	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
> +	move	a2, s0					/* arg2: Callsite parent ra */
> +	bl	prepare_ftrace_return
> +
> +	MCOUNT_RESTORE_REGS
> +	jirl	zero, ra, 0
> +SYM_FUNC_END(ftrace_graph_caller)
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Jinyang He 3 years, 7 months ago
On 08/20/2022 01:25 AM, Steven Rostedt wrote:

> On Fri, 19 Aug 2022 16:13:55 +0800
> Qing Zhang <zhangqing@loongson.cn> wrote:
>
>> +#define MCOUNT_STACK_SIZE	(2 * SZREG)
>> +#define MCOUNT_S0_OFFSET	(0)
>> +#define MCOUNT_RA_OFFSET	(SZREG)
>> +
>> +	.macro MCOUNT_SAVE_REGS
>> +	PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE
>> +	PTR_S	s0, sp, MCOUNT_S0_OFFSET
>> +	PTR_S	ra, sp, MCOUNT_RA_OFFSET
>> +	move	s0, a0
>> +	.endm
>> +
>> +	.macro MCOUNT_RESTORE_REGS
>> +	move	a0, s0
>> +	PTR_L	ra, sp, MCOUNT_RA_OFFSET
>> +	PTR_L	s0, sp, MCOUNT_S0_OFFSET
>> +	PTR_ADDI sp, sp, MCOUNT_STACK_SIZE
>> +	.endm
>> +
>> +
>> +SYM_FUNC_START(_mcount)
>> +	la	t1, ftrace_stub
>> +	la	t2, ftrace_trace_function	/* Prepare t2 for (1) */
>> +	PTR_L	t2, t2, 0
>> +	beq	t1, t2, fgraph_trace
>> +
>> +	MCOUNT_SAVE_REGS
>> +
>> +	move	a0, ra				/* arg0: self return address */
>> +	move	a1, s0				/* arg1: parent's return address */
>> +	jirl	ra, t2, 0			/* (1) call *ftrace_trace_function */
>> +
>> +	MCOUNT_RESTORE_REGS
> You know, if you can implement CONFIG_FTRACE_WITH_ARGS, where the default
> function callback gets a ftrace_regs pointer (that only holds what is
> needed for the arguments of the function as well as the stack pointer),
> then you could also implement function graph on top of that, and remove the
> need for the below "fgraph_trace" trampoline.
>
> I'd really would like all architectures to go that way. Also, the
> CONFIG_FTRACE_WITH_ARGS is all you need for live kernel patching.
Hi, Steve,

I think we have implemented CONFIG_FTRACE_WITH_ARGS in dynamic ftrace
in the [Patch3/9]. But, for non dynamic ftrace, it is hardly to
implement it. Because the LoongArch compiler gcc treats mount as a
really call, like 'call _mcount(__builtin_return_address(0))'. That
means, they decrease stack, save args to callee saved regs and may
do some optimization before calling mcount. It is difficult to find the
original args and apply changes from tracers.

Thanks,
Jinyang

>
>
>> +
>> +fgraph_trace:
>> +#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
>> +	la	t1, ftrace_stub
>> +	la	t3, ftrace_graph_return
>> +	PTR_L	t3, t3, 0
>> +	bne	t1, t3, ftrace_graph_caller
>> +	la	t1, ftrace_graph_entry_stub
>> +	la	t3, ftrace_graph_entry
>> +	PTR_L	t3, t3, 0
>> +	bne	t1, t3, ftrace_graph_caller
>> +#endif
>> +
>> +	.globl ftrace_stub
>> +ftrace_stub:
>> +	jirl	zero, ra, 0
>> +SYM_FUNC_END(_mcount)
>> +EXPORT_SYMBOL(_mcount)
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> +SYM_FUNC_START(ftrace_graph_caller)
>> +	MCOUNT_SAVE_REGS
>> +
>> +	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
>> +	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
>> +	move	a2, s0					/* arg2: Callsite parent ra */
>> +	bl	prepare_ftrace_return
>> +
>> +	MCOUNT_RESTORE_REGS
>> +	jirl	zero, ra, 0
>> +SYM_FUNC_END(ftrace_graph_caller)
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Steven Rostedt 3 years, 7 months ago
On Sat, 20 Aug 2022 09:38:21 +0800
Jinyang He <hejinyang@loongson.cn> wrote:

> I think we have implemented CONFIG_FTRACE_WITH_ARGS in dynamic ftrace
> in the [Patch3/9]. 

Sorry, I must have missed it.

> But, for non dynamic ftrace, it is hardly to
> implement it. Because the LoongArch compiler gcc treats mount as a

Don't bother implementing it for non-dynamic. I would just add a:

config HAVE_FTRACE_WITH_ARGS if DYNAMIC_FTRACE

and be done with it.

> really call, like 'call _mcount(__builtin_return_address(0))'. That
> means, they decrease stack, save args to callee saved regs and may
> do some optimization before calling mcount. It is difficult to find the
> original args and apply changes from tracers.

Right, there's no point in implementing it for non dynamic. Like I said,
non-dynamic is just a stepping stone for getting dynamic working. Once you
have dynamic working, it's up to you to throw out the non-dynamic. It's not
useful for anything other than porting to a new architecture or for
academic purposes.

-- Steve
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Jinyang He 3 years, 7 months ago
On 08/20/2022 09:52 AM, Steven Rostedt wrote:

> On Sat, 20 Aug 2022 09:38:21 +0800
> Jinyang He <hejinyang@loongson.cn> wrote:
>
>> I think we have implemented CONFIG_FTRACE_WITH_ARGS in dynamic ftrace
>> in the [Patch3/9].
> Sorry, I must have missed it.
And there is still something left to do, Qing will do that.

>
>> But, for non dynamic ftrace, it is hardly to
>> implement it. Because the LoongArch compiler gcc treats mount as a
> Don't bother implementing it for non-dynamic. I would just add a:
>
> config HAVE_FTRACE_WITH_ARGS if DYNAMIC_FTRACE
>
> and be done with it.
Yes, it is clear.

>
>> really call, like 'call _mcount(__builtin_return_address(0))'. That
>> means, they decrease stack, save args to callee saved regs and may
>> do some optimization before calling mcount. It is difficult to find the
>> original args and apply changes from tracers.
> Right, there's no point in implementing it for non dynamic. Like I said,
> non-dynamic is just a stepping stone for getting dynamic working. Once you
> have dynamic working, it's up to you to throw out the non-dynamic. It's not
> useful for anything other than porting to a new architecture or for
> academic purposes.
>
Thanks for your detail answers.
Jinyang
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Qing Zhang 3 years, 7 months ago

On 2022/8/20 上午1:25, Steven Rostedt wrote:
> On Fri, 19 Aug 2022 16:13:55 +0800
> Qing Zhang <zhangqing@loongson.cn> wrote:
> 
>> +#define MCOUNT_STACK_SIZE	(2 * SZREG)
>> +#define MCOUNT_S0_OFFSET	(0)
>> +#define MCOUNT_RA_OFFSET	(SZREG)
>> +
>> +	.macro MCOUNT_SAVE_REGS
>> +	PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE
>> +	PTR_S	s0, sp, MCOUNT_S0_OFFSET
>> +	PTR_S	ra, sp, MCOUNT_RA_OFFSET
>> +	move	s0, a0
>> +	.endm
>> +
>> +	.macro MCOUNT_RESTORE_REGS
>> +	move	a0, s0
>> +	PTR_L	ra, sp, MCOUNT_RA_OFFSET
>> +	PTR_L	s0, sp, MCOUNT_S0_OFFSET
>> +	PTR_ADDI sp, sp, MCOUNT_STACK_SIZE
>> +	.endm
>> +
>> +
>> +SYM_FUNC_START(_mcount)
>> +	la	t1, ftrace_stub
>> +	la	t2, ftrace_trace_function	/* Prepare t2 for (1) */
>> +	PTR_L	t2, t2, 0
>> +	beq	t1, t2, fgraph_trace
>> +
>> +	MCOUNT_SAVE_REGS
>> +
>> +	move	a0, ra				/* arg0: self return address */
>> +	move	a1, s0				/* arg1: parent's return address */
>> +	jirl	ra, t2, 0			/* (1) call *ftrace_trace_function */
>> +
>> +	MCOUNT_RESTORE_REGS
> 
> You know, if you can implement CONFIG_FTRACE_WITH_ARGS, where the default
> function callback gets a ftrace_regs pointer (that only holds what is
> needed for the arguments of the function as well as the stack pointer),
> then you could also implement function graph on top of that, and remove the
> need for the below "fgraph_trace" trampoline.
> 
> I'd really would like all architectures to go that way. Also, the
> CONFIG_FTRACE_WITH_ARGS is all you need for live kernel patching.
> 
Hi, Steve
I will add the implementation of CONFIG_FTRACE_WITH_ARGS in v2.

Thanks,
- Qing
> -- Steve
> 
> 
>> +
>> +fgraph_trace:
>> +#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
>> +	la	t1, ftrace_stub
>> +	la	t3, ftrace_graph_return
>> +	PTR_L	t3, t3, 0
>> +	bne	t1, t3, ftrace_graph_caller
>> +	la	t1, ftrace_graph_entry_stub
>> +	la	t3, ftrace_graph_entry
>> +	PTR_L	t3, t3, 0
>> +	bne	t1, t3, ftrace_graph_caller
>> +#endif
>> +
>> +	.globl ftrace_stub
>> +ftrace_stub:
>> +	jirl	zero, ra, 0
>> +SYM_FUNC_END(_mcount)
>> +EXPORT_SYMBOL(_mcount)
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> +SYM_FUNC_START(ftrace_graph_caller)
>> +	MCOUNT_SAVE_REGS
>> +
>> +	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
>> +	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
>> +	move	a2, s0					/* arg2: Callsite parent ra */
>> +	bl	prepare_ftrace_return
>> +
>> +	MCOUNT_RESTORE_REGS
>> +	jirl	zero, ra, 0
>> +SYM_FUNC_END(ftrace_graph_caller)

Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Jinyang He 3 years, 7 months ago
Hi, Qing

On 2022/8/19 16:13, Qing Zhang wrote:

> This patch contains basic ftrace support for LoongArch.
> Specifically, function tracer (HAVE_FUNCTION_TRACER), function graph
> tracer (HAVE_FUNCTION_GRAPH_TRACER) are implemented following the
> instructions in Documentation/trace/ftrace-design.txt.
>
> Use `-pg` makes stub like a child function `void _mcount(void *ra)`.
> Thus, it can be seen store RA and open stack before `call _mcount`.
> Find `open stack` at first, and then find `store RA`
>
> Note that the functions in both inst.c and time.c should not be
> hooked with the compiler's -pg option: to prevent infinite self-
> referencing for the former, and to ignore early setup stuff for the
> latter.
>
> Co-developed-by: Jinyang He <hejinyang@loongson.cn>
> Signed-off-by: Jinyang He <hejinyang@loongson.cn>
> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
> ---
>   arch/loongarch/Kconfig              |  2 +
>   arch/loongarch/Makefile             |  5 ++
>   arch/loongarch/include/asm/ftrace.h | 18 ++++++
>   arch/loongarch/kernel/Makefile      |  8 +++
>   arch/loongarch/kernel/ftrace.c      | 74 +++++++++++++++++++++++
>   arch/loongarch/kernel/mcount.S      | 94 +++++++++++++++++++++++++++++
>   6 files changed, 201 insertions(+)
>   create mode 100644 arch/loongarch/include/asm/ftrace.h
>   create mode 100644 arch/loongarch/kernel/ftrace.c
>   create mode 100644 arch/loongarch/kernel/mcount.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 4abc9a28aba4..703a2c3a8e0d 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -84,6 +84,8 @@ config LOONGARCH
>   	select HAVE_DMA_CONTIGUOUS
>   	select HAVE_EXIT_THREAD
>   	select HAVE_FAST_GUP
> +        select HAVE_FUNCTION_GRAPH_TRACER
> +        select HAVE_FUNCTION_TRACER
>   	select HAVE_GENERIC_VDSO
>   	select HAVE_IOREMAP_PROT
>   	select HAVE_IRQ_EXIT_ON_IRQ_STACK
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index ec3de6191276..44f11a2937e9 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -29,6 +29,11 @@ ifneq ($(SUBARCH),$(ARCH))
>     endif
>   endif
>   
> +ifdef CONFIG_DYNAMIC_FTRACE
> +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> +endif
> +

It seems this patch adds non-dynamic ftrace, this code should not
appear here.
BTW is it really necessary for non-dynamic ftrace? I do not use it
directly and frequently, IMHO, non-dynamic can be completely

replaced dynamic?


>   ifdef CONFIG_64BIT
>   ld-emul			= $(64bit-emul)
>   cflags-y		+= -mabi=lp64s
> diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
> new file mode 100644
> index 000000000000..6a3e76234618
> --- /dev/null
> +++ b/arch/loongarch/include/asm/ftrace.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#ifndef _ASM_LOONGARCH_FTRACE_H
> +#define _ASM_LOONGARCH_FTRACE_H
> +
> +#ifdef CONFIG_FUNCTION_TRACER
> +#define MCOUNT_INSN_SIZE 4		/* sizeof mcount call */
> +
> +#ifndef __ASSEMBLY__
> +extern void _mcount(void);
> +#define mcount _mcount
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* CONFIG_FUNCTION_TRACER */
> +#endif /* _ASM_LOONGARCH_FTRACE_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index e5be17009fe8..0a745d24d3e5 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -14,6 +14,14 @@ obj-$(CONFIG_EFI) 		+= efi.o
>   
>   obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
>   
> +ifdef CONFIG_FUNCTION_TRACER
> +    obj-y += mcount.o ftrace.o
> +    CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
> +  CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
> +  CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
> +  CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
> +endif
> +
>   obj-$(CONFIG_MODULES)		+= module.o module-sections.o
>   obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
>   
> diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c
> new file mode 100644
> index 000000000000..c8ddc5f11f32
> --- /dev/null
> +++ b/arch/loongarch/kernel/ftrace.c
> @@ -0,0 +1,74 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/uaccess.h>
> +#include <linux/init.h>
> +#include <linux/ftrace.h>
> +#include <linux/syscalls.h>
> +
> +#include <asm/asm.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/cacheflush.h>
> +#include <asm/inst.h>
> +#include <asm/loongarch.h>
> +#include <asm/syscall.h>
> +#include <asm/unistd.h>
> +
> +#include <asm-generic/sections.h>
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +/*
> + * As `call _mcount` follows LoongArch psABI, ra-saved operation and
> + * stack operation can be found before this insn.
> + */
> +
> +static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off)
> +{
> +	union loongarch_instruction *insn;
> +	int limit = 32;
> +
> +	insn = (union loongarch_instruction *)insn_addr;
> +
> +	do {
> +		insn--;
> +		limit--;
> +
> +		if (is_ra_save_ins(insn))
> +			*ra_off = -((1 << 12) - insn->reg2i12_format.immediate);
> +
> +	} while (!is_stack_alloc_ins(insn) && limit);
> +
> +	if (!limit)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +void prepare_ftrace_return(unsigned long self_addr,
> +		unsigned long callsite_sp, unsigned long old)
> +{
> +	int ra_off;
> +	unsigned long return_hooker = (unsigned long)&return_to_handler;
> +
> +	if (unlikely(ftrace_graph_is_dead()))
> +		return;
> +
> +	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> +		return;
> +
> +	if (ftrace_get_parent_ra_addr(self_addr, &ra_off))
> +		goto out;
> +
> +	if (!function_graph_enter(old, self_addr, 0, NULL))
> +		*(unsigned long *)(callsite_sp + ra_off) = return_hooker;
> +
> +	return;
> +
> +out:
> +	ftrace_graph_stop();
> +	WARN_ON(1);
> +}
> +#endif	/* CONFIG_FUNCTION_GRAPH_TRACER */
> diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S
> new file mode 100644
> index 000000000000..f1c1cc1a629e
> --- /dev/null
> +++ b/arch/loongarch/kernel/mcount.S
> @@ -0,0 +1,94 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * LoongArch specific _mcount support
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <asm/export.h>
> +#include <asm/regdef.h>
> +#include <asm/stackframe.h>
> +#include <asm/ftrace.h>
> +
> +	.text
> +
> +#define MCOUNT_STACK_SIZE	(2 * SZREG)
> +#define MCOUNT_S0_OFFSET	(0)
> +#define MCOUNT_RA_OFFSET	(SZREG)
> +
> +	.macro MCOUNT_SAVE_REGS
> +	PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE
> +	PTR_S	s0, sp, MCOUNT_S0_OFFSET
> +	PTR_S	ra, sp, MCOUNT_RA_OFFSET
> +	move	s0, a0
> +	.endm
> +
> +	.macro MCOUNT_RESTORE_REGS
> +	move	a0, s0
> +	PTR_L	ra, sp, MCOUNT_RA_OFFSET
> +	PTR_L	s0, sp, MCOUNT_S0_OFFSET
> +	PTR_ADDI sp, sp, MCOUNT_STACK_SIZE
> +	.endm
> +
> +
> +SYM_FUNC_START(_mcount)
> +	la	t1, ftrace_stub
> +	la	t2, ftrace_trace_function	/* Prepare t2 for (1) */

'la.pcrel' is preferred here while 'la' should be deprecated.


> +	PTR_L	t2, t2, 0
> +	beq	t1, t2, fgraph_trace
> +
> +	MCOUNT_SAVE_REGS
> +
> +	move	a0, ra				/* arg0: self return address */
> +	move	a1, s0				/* arg1: parent's return address */
> +	jirl	ra, t2, 0			/* (1) call *ftrace_trace_function */
> +
> +	MCOUNT_RESTORE_REGS
> +
> +fgraph_trace:
> +#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
> +	la	t1, ftrace_stub
> +	la	t3, ftrace_graph_return
> +	PTR_L	t3, t3, 0
> +	bne	t1, t3, ftrace_graph_caller
> +	la	t1, ftrace_graph_entry_stub
> +	la	t3, ftrace_graph_entry
> +	PTR_L	t3, t3, 0
> +	bne	t1, t3, ftrace_graph_caller
> +#endif
> +
> +	.globl ftrace_stub
> +ftrace_stub:
> +	jirl	zero, ra, 0
> +SYM_FUNC_END(_mcount)
> +EXPORT_SYMBOL(_mcount)
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +SYM_FUNC_START(ftrace_graph_caller)
> +	MCOUNT_SAVE_REGS
> +
> +	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
> +	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
> +	move	a2, s0					/* arg2: Callsite parent ra */
> +	bl	prepare_ftrace_return
> +
> +	MCOUNT_RESTORE_REGS
> +	jirl	zero, ra, 0
We know Xuerui had done a lot works to keep asm code normalize.
So I think we should replace 'jirl zero,ra,0' with 'jr ra'. The basic

rule is use assembly macro as possible, I think.


Thanks,

Jinyang


> +SYM_FUNC_END(ftrace_graph_caller)
> +
> +SYM_FUNC_START(return_to_handler)
> +	PTR_ADDI sp, sp, -2 * SZREG
> +	PTR_S	a0, sp, 0
> +	PTR_S	a1, sp, SZREG
> +
> +	bl	ftrace_return_to_handler
> +
> +	/* restore the real parent address: a0 -> ra */
> +	move	ra, a0
> +
> +	PTR_L	a0, sp, 0
> +	PTR_L	a1, sp, SZREG
> +	PTR_ADDI	sp, sp, 2 * SZREG
> +	jirl	zero, ra, 0
> +SYM_FUNC_END(return_to_handler)
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Steven Rostedt 3 years, 7 months ago
On Fri, 19 Aug 2022 17:29:29 +0800
Jinyang He <hejinyang@loongson.cn> wrote:

> It seems this patch adds non-dynamic ftrace, this code should not
> appear here.
> BTW is it really necessary for non-dynamic ftrace? I do not use it
> directly and frequently, IMHO, non-dynamic can be completely
> 
> replaced dynamic?

Note, I keep the non dynamic ftrace around for debugging purposes.

But sure, it's pretty useless. It's also good for bringing ftrace to a new
architecture (like this patch is doing), as it is easier to implement than
dynamic ftrace, and getting the non-dynamic working is usually the first
step in getting dynamic ftrace working.

But it's really up to the arch maintainers to keep it or not.

-- Steve
Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Qing Zhang 3 years, 7 months ago

On 2022/8/20 上午12:53, Steven Rostedt wrote:
> On Fri, 19 Aug 2022 17:29:29 +0800
> Jinyang He <hejinyang@loongson.cn> wrote:
> 
>> It seems this patch adds non-dynamic ftrace, this code should not
>> appear here.
>> BTW is it really necessary for non-dynamic ftrace? I do not use it
>> directly and frequently, IMHO, non-dynamic can be completely
>>
>> replaced dynamic?
> 
> Note, I keep the non dynamic ftrace around for debugging purposes.
> 
> But sure, it's pretty useless. It's also good for bringing ftrace to a new
> architecture (like this patch is doing), as it is easier to implement than
> dynamic ftrace, and getting the non-dynamic working is usually the first
> step in getting dynamic ftrace working.
> 
> But it's really up to the arch maintainers to keep it or not.
>
Before submitting, I saw that other architectures almost kept 
non-dynamic methods without cleaning up, I tend to keep them.
How do you think, Huacai. :)

Thanks,
- Qing
> -- Steve
> 

Re: [PATCH 1/9] LoongArch/ftrace: Add basic support
Posted by Huacai Chen 3 years, 7 months ago
Hi, all,

On Sat, Aug 20, 2022 at 9:34 AM Qing Zhang <zhangqing@loongson.cn> wrote:
>
>
>
> On 2022/8/20 上午12:53, Steven Rostedt wrote:
> > On Fri, 19 Aug 2022 17:29:29 +0800
> > Jinyang He <hejinyang@loongson.cn> wrote:
> >
> >> It seems this patch adds non-dynamic ftrace, this code should not
> >> appear here.
> >> BTW is it really necessary for non-dynamic ftrace? I do not use it
> >> directly and frequently, IMHO, non-dynamic can be completely
> >>
> >> replaced dynamic?
> >
> > Note, I keep the non dynamic ftrace around for debugging purposes.
> >
> > But sure, it's pretty useless. It's also good for bringing ftrace to a new
> > architecture (like this patch is doing), as it is easier to implement than
> > dynamic ftrace, and getting the non-dynamic working is usually the first
> > step in getting dynamic ftrace working.
> >
> > But it's really up to the arch maintainers to keep it or not.
> >
> Before submitting, I saw that other architectures almost kept
> non-dynamic methods without cleaning up, I tend to keep them.
> How do you think, Huacai. :)
I prefer to keep the non-dynamic method.

Huacai
>
> Thanks,
> - Qing
> > -- Steve
> >
>
>