arch/arm64/include/asm/ftrace.h | 7 +++++++ arch/arm64/kernel/entry-ftrace.S | 6 ------ arch/arm64/kernel/ftrace.c | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-)
As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph
use ftrace directly"), we don't need special hook for graph tracer,
but instead we use graph_ops:func function to install return_hooker.
Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add
implementation for FTRACE_WITH_REGS on arm64, we can easily adopt
the same optimization on arm64.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
arch/arm64/include/asm/ftrace.h | 7 +++++++
arch/arm64/kernel/entry-ftrace.S | 6 ------
arch/arm64/kernel/ftrace.c | 21 +++++++++++++++++++++
3 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 1494cfa8639b..dbc45a4157fa 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
struct dyn_ftrace;
+struct ftrace_ops;
+struct ftrace_regs;
+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
#endif
#define ftrace_return_address(n) return_address(n)
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index e535480a4069..eb4a69b1f84d 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common)
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
bl ftrace_stub
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
- nop // If enabled, this will be replaced
- // "b ftrace_graph_caller"
-#endif
-
/*
* At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 4506c4a90ac1..1b5da231b1de 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -268,6 +268,26 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return 0;
+}
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+ unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
+
+ prepare_ftrace_return(ip, parent, frame_pointer(regs));
+}
+#else
/*
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
* depending on @enable.
@@ -297,5 +317,6 @@ int ftrace_disable_ftrace_graph_caller(void)
{
return ftrace_modify_graph_caller(false);
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
--
2.20.1
On Tue, 22 Feb 2022 21:00:49 +0800
Chengming Zhou <zhouchengming@bytedance.com> wrote:
> As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph
> use ftrace directly"), we don't need special hook for graph tracer,
> but instead we use graph_ops:func function to install return_hooker.
>
> Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add
> implementation for FTRACE_WITH_REGS on arm64, we can easily adopt
> the same optimization on arm64.
Note. Ideally we want it to hook with DYNAMIC_FTARCE_WITH_ARGS, and not
FTRACE_WITH_REGS. If arm64 is like x86_64, saving all regs at every
function call has a bit more overhead than saving the minimum. The
DYNAMIC_FTRACE_WITH_ARGS, means that the minimum is still saved, but now
exposes the arguments and the stack pointer, which function_graph_tracer
needs.
-- Steve
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
On 2022/2/22 11:52 下午, Steven Rostedt wrote:
> On Tue, 22 Feb 2022 21:00:49 +0800
> Chengming Zhou <zhouchengming@bytedance.com> wrote:
>
>> As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph
>> use ftrace directly"), we don't need special hook for graph tracer,
>> but instead we use graph_ops:func function to install return_hooker.
>>
>> Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add
>> implementation for FTRACE_WITH_REGS on arm64, we can easily adopt
>> the same optimization on arm64.
>
> Note. Ideally we want it to hook with DYNAMIC_FTARCE_WITH_ARGS, and not
> FTRACE_WITH_REGS. If arm64 is like x86_64, saving all regs at every
> function call has a bit more overhead than saving the minimum. The
> DYNAMIC_FTRACE_WITH_ARGS, means that the minimum is still saved, but now
> exposes the arguments and the stack pointer, which function_graph_tracer
> needs.
Yes, it would be better to implement DYNAMIC_FTRACE_WITH_ARGS on arm64 too,
and this patch just use DYNAMIC_FTRACE_WITH_REGS to install return_hooker
for graph tracer, so it's a code cleanup, no performance optimization.
Thanks.
>
> -- Steve
>
>
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
On Wed, 23 Feb 2022 16:00:27 +0800
Chengming Zhou <zhouchengming@bytedance.com> wrote:
> Yes, it would be better to implement DYNAMIC_FTRACE_WITH_ARGS on arm64 too,
> and this patch just use DYNAMIC_FTRACE_WITH_REGS to install return_hooker
> for graph tracer, so it's a code cleanup, no performance optimization.
I'm worried that a clean up is either breaking the design or hurting
performance.
You have:
> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *op, struct ftrace_regs *fregs)
> +{
> + struct pt_regs *regs = arch_ftrace_get_regs(fregs);
Now, technically, arch_ftrace_get_regs() is to return NULL if the
ftrace_ops was not registered with ops->flags |= FTRACE_OPS_FL_SAVE_REGS.
Which function graph does not do.
But this is in arch specific code so you have more control of this
"undefined behavior". But you really should have a comment saying that
this needs to be fixed when DYNAMIC_FTRACE_WITH_ARGS is implemented.
-- Steve
> + unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
> +
> + prepare_ftrace_return(ip, parent, frame_pointer(regs));
> +}
-- Steve
On 2022/2/24 9:30 上午, Steven Rostedt wrote:
> On Wed, 23 Feb 2022 16:00:27 +0800
> Chengming Zhou <zhouchengming@bytedance.com> wrote:
>
>> Yes, it would be better to implement DYNAMIC_FTRACE_WITH_ARGS on arm64 too,
>> and this patch just use DYNAMIC_FTRACE_WITH_REGS to install return_hooker
>> for graph tracer, so it's a code cleanup, no performance optimization.
>
> I'm worried that a clean up is either breaking the design or hurting
> performance.
>
> You have:
>
>> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
>> + struct ftrace_ops *op, struct ftrace_regs *fregs)
>> +{
>> + struct pt_regs *regs = arch_ftrace_get_regs(fregs);
>
> Now, technically, arch_ftrace_get_regs() is to return NULL if the
> ftrace_ops was not registered with ops->flags |= FTRACE_OPS_FL_SAVE_REGS.
>
> Which function graph does not do.
>
> But this is in arch specific code so you have more control of this
> "undefined behavior". But you really should have a comment saying that
> this needs to be fixed when DYNAMIC_FTRACE_WITH_ARGS is implemented.
>
Ok, I will add a comment noting that.
Thanks.
> -- Steve
>
>
>> + unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
>> +
>> + prepare_ftrace_return(ip, parent, frame_pointer(regs));
>> +}
>
> -- Steve
On Tue, Feb 22, 2022 at 09:00:49PM +0800, Chengming Zhou wrote:
> As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph
> use ftrace directly"), we don't need special hook for graph tracer,
> but instead we use graph_ops:func function to install return_hooker.
>
> Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add
> implementation for FTRACE_WITH_REGS on arm64, we can easily adopt
> the same optimization on arm64.
This is a nice cleanup/refactoring, but I don't think this is an
optimization as such; we're still doing the same work, just in
marginally different place. So I'd suggest s/optimization/cleanup/ here.
It's probably worth noting that this *only* changes the FTRACE_WITH_REGS
implementation, and the mcount-based implementation is unaffected by
this patch.
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
> arch/arm64/include/asm/ftrace.h | 7 +++++++
> arch/arm64/kernel/entry-ftrace.S | 6 ------
> arch/arm64/kernel/ftrace.c | 21 +++++++++++++++++++++
> 3 files changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> index 1494cfa8639b..dbc45a4157fa 100644
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
>
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> struct dyn_ftrace;
> +struct ftrace_ops;
> +struct ftrace_regs;
> +
> int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> #define ftrace_init_nop ftrace_init_nop
> +
> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *op, struct ftrace_regs *fregs);
> +#define ftrace_graph_func ftrace_graph_func
> #endif
>
> #define ftrace_return_address(n) return_address(n)
> diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
> index e535480a4069..eb4a69b1f84d 100644
> --- a/arch/arm64/kernel/entry-ftrace.S
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common)
> SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
> bl ftrace_stub
>
> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
> - nop // If enabled, this will be replaced
> - // "b ftrace_graph_caller"
> -#endif
> -
You should also be able to delete the FTRACE_WITH_REGS implementation of
ftrace_graph_caller since that's now unused.
Having that in the diff would also make it easier to compare to the
logic in ftrace_graph_func().
> /*
> * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
> * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> index 4506c4a90ac1..1b5da231b1de 100644
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -268,6 +268,26 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
> }
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +int ftrace_enable_ftrace_graph_caller(void)
> +{
> + return 0;
> +}
> +
> +int ftrace_disable_ftrace_graph_caller(void)
> +{
> + return 0;
> +}
It's a shame the core code doesn't provide this if we provide an
implementation of ftrace_graph_func.
> +
> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *op, struct ftrace_regs *fregs)
> +{
> + struct pt_regs *regs = arch_ftrace_get_regs(fregs);
> + unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
> +
> + prepare_ftrace_return(ip, parent, frame_pointer(regs));
> +}
Other than my comments above, this looks about right, but I'd like to
give this some testing before I give any tags.
Could you respin this with the FTRACE_WITH_REGS ftrace_graph_caller asm
removed?
Thanks,
Mark.
> +#else
> /*
> * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
> * depending on @enable.
> @@ -297,5 +317,6 @@ int ftrace_disable_ftrace_graph_caller(void)
> {
> return ftrace_modify_graph_caller(false);
> }
> +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> #endif /* CONFIG_DYNAMIC_FTRACE */
> #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> --
> 2.20.1
>
On 2022/2/22 11:54 下午, Mark Rutland wrote:
> On Tue, Feb 22, 2022 at 09:00:49PM +0800, Chengming Zhou wrote:
>> As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph
>> use ftrace directly"), we don't need special hook for graph tracer,
>> but instead we use graph_ops:func function to install return_hooker.
>>
>> Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add
>> implementation for FTRACE_WITH_REGS on arm64, we can easily adopt
>> the same optimization on arm64.
>
> This is a nice cleanup/refactoring, but I don't think this is an
> optimization as such; we're still doing the same work, just in
> marginally different place. So I'd suggest s/optimization/cleanup/ here.
>
> It's probably worth noting that this *only* changes the FTRACE_WITH_REGS
> implementation, and the mcount-based implementation is unaffected by
> this patch.
>
Agree, I will change the commit message in the next version.
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
>> arch/arm64/include/asm/ftrace.h | 7 +++++++
>> arch/arm64/kernel/entry-ftrace.S | 6 ------
>> arch/arm64/kernel/ftrace.c | 21 +++++++++++++++++++++
>> 3 files changed, 28 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
>> index 1494cfa8639b..dbc45a4157fa 100644
>> --- a/arch/arm64/include/asm/ftrace.h
>> +++ b/arch/arm64/include/asm/ftrace.h
>> @@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
>>
>> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>> struct dyn_ftrace;
>> +struct ftrace_ops;
>> +struct ftrace_regs;
>> +
>> int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
>> #define ftrace_init_nop ftrace_init_nop
>> +
>> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
>> + struct ftrace_ops *op, struct ftrace_regs *fregs);
>> +#define ftrace_graph_func ftrace_graph_func
>> #endif
>>
>> #define ftrace_return_address(n) return_address(n)
>> diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
>> index e535480a4069..eb4a69b1f84d 100644
>> --- a/arch/arm64/kernel/entry-ftrace.S
>> +++ b/arch/arm64/kernel/entry-ftrace.S
>> @@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common)
>> SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
>> bl ftrace_stub
>>
>> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
>> - nop // If enabled, this will be replaced
>> - // "b ftrace_graph_caller"
>> -#endif
>> -
>
> You should also be able to delete the FTRACE_WITH_REGS implementation of
> ftrace_graph_caller since that's now unused.
>
> Having that in the diff would also make it easier to compare to the
> logic in ftrace_graph_func().
>
Yes, will do.
>> /*
>> * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
>> * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
>> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
>> index 4506c4a90ac1..1b5da231b1de 100644
>> --- a/arch/arm64/kernel/ftrace.c
>> +++ b/arch/arm64/kernel/ftrace.c
>> @@ -268,6 +268,26 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
>> }
>>
>> #ifdef CONFIG_DYNAMIC_FTRACE
>> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>> +int ftrace_enable_ftrace_graph_caller(void)
>> +{
>> + return 0;
>> +}
>> +
>> +int ftrace_disable_ftrace_graph_caller(void)
>> +{
>> + return 0;
>> +}
>
> It's a shame the core code doesn't provide this if we provide an
> implementation of ftrace_graph_func.
>
Maybe I can provide these weak version functions in the ftrace core code
with an additional patch.
>> +
>> +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
>> + struct ftrace_ops *op, struct ftrace_regs *fregs)
>> +{
>> + struct pt_regs *regs = arch_ftrace_get_regs(fregs);
>> + unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
>> +
>> + prepare_ftrace_return(ip, parent, frame_pointer(regs));
>> +}
>
> Other than my comments above, this looks about right, but I'd like to
> give this some testing before I give any tags.
>
> Could you respin this with the FTRACE_WITH_REGS ftrace_graph_caller asm
> removed?
>
> Thanks,
> Mark.
>
Of course, will do.
Thanks!
>> +#else
>> /*
>> * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
>> * depending on @enable.
>> @@ -297,5 +317,6 @@ int ftrace_disable_ftrace_graph_caller(void)
>> {
>> return ftrace_modify_graph_caller(false);
>> }
>> +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
>> #endif /* CONFIG_DYNAMIC_FTRACE */
>> #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>> --
>> 2.20.1
>>
On Tue, 22 Feb 2022 15:54:44 +0000
Mark Rutland <mark.rutland@arm.com> wrote:
> > #ifdef CONFIG_DYNAMIC_FTRACE
> > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +int ftrace_enable_ftrace_graph_caller(void)
> > +{
> > + return 0;
> > +}
> > +
> > +int ftrace_disable_ftrace_graph_caller(void)
> > +{
> > + return 0;
> > +}
>
> It's a shame the core code doesn't provide this if we provide an
> implementation of ftrace_graph_func.
As it was only x86_64 that took it out, I wasn't about to remove it. But
now other archs are implementing it, I'm fine with making these weak
functions in the core code.
-- Steve
© 2016 - 2026 Red Hat, Inc.