[RFC PATCH] function_graph: Support recording and printing the function return address

Donglin Peng posted 1 patch 2 months, 3 weeks ago
There is a newer version of this series
include/linux/ftrace.h               |   1 +
kernel/trace/fgraph.c                |   1 +
kernel/trace/trace.h                 |   1 +
kernel/trace/trace_entries.h         |  19 ++++-
kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
5 files changed, 92 insertions(+), 35 deletions(-)
[RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Donglin Peng 2 months, 3 weeks ago
When using function_graph tracer to analyze the flow of kernel function
execution, it is often necessary to quickly locate the exact line of code
where the call occurs. While this may be easy at times, it can be more
time-consuming when some functions are inlined or the flow is too long.

This feature aims to simplify the process by recording the return address
of traced funcions and printing it when outputing trace logs.

To distinguish the return value, 'V=' is used as the prefix for the kernel
return value, and 'A=' is used as the prefix for the return address in trace
logs. A new trace option named 'funcgraph-retaddr' has been added, and the
option 'sym-addr' can control the format of the return address.

See below logs with both funcgraph-retval and funcgraph-retaddr enabled.

4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
4)               |          security_file_permission() {
4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
4) + 39.834 us   |          } /* security_file_permission V=0x0 */
4) + 42.710 us   |        } /* rw_verify_area V=0x0 */

Then, we can use the faddr2line to locate the source code, for example:

$ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
load_elf_phdrs+0x6c/0xb0:
elf_read at fs/binfmt_elf.c:471
(inlined by) load_elf_phdrs at fs/binfmt_elf.c:531

Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
---
 include/linux/ftrace.h               |   1 +
 kernel/trace/fgraph.c                |   1 +
 kernel/trace/trace.h                 |   1 +
 kernel/trace/trace_entries.h         |  19 ++++-
 kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
 5 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fd5e84d0ec47..bdf51163b3b8 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
  */
 struct ftrace_graph_ent {
 	unsigned long func; /* Current function */
+	unsigned long retaddr;  /* Return address */
 	int depth;
 } __packed;
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d7d4fb403f6f..fcc4162c10f6 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 
 	trace.func = func;
 	trace.depth = ++current->curr_ret_depth;
+	trace.retaddr = ret;
 
 	offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
 	if (offset < 0)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index bd3e3069300e..87e02815b030 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
 #define TRACE_GRAPH_GRAPH_TIME          0x400
 #define TRACE_GRAPH_PRINT_RETVAL        0x800
 #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
+#define TRACE_GRAPH_PRINT_RETADDR       0x2000
 #define TRACE_GRAPH_PRINT_FILL_SHIFT	28
 #define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c47422b20908..8b8753319dd3 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
 	perf_ftrace_event_register
 );
 
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
 /* Function call entry */
 FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 
@@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 	F_STRUCT(
 		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
 		__field_packed(	unsigned long,	graph_ent,	func		)
+		__field_packed(	unsigned long,	graph_ent,	retaddr		)
 		__field_packed(	int,		graph_ent,	depth		)
 	),
 
@@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 );
 
 /* Function return entry */
-#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
-
 FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
 	TRACE_GRAPH_RET,
@@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
 #else
 
+/* Function call entry */
+FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
+
+	TRACE_GRAPH_ENT,
+
+	F_STRUCT(
+		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
+		__field_packed(	unsigned long,	graph_ent,	func		)
+		__field_packed(	int,		graph_ent,	depth		)
+	),
+
+	F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
+);
+
+/* Function return entry */
 FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
 	TRACE_GRAPH_RET,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 13d0387ac6a6..655535d57763 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -63,6 +63,8 @@ static struct tracer_opt trace_opts[] = {
 	{ TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
 	/* Display function return value in hexadecimal format ? */
 	{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
+	/* Display function return address ? */
+	{ TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
 #endif
 	/* Include sleep time (scheduled out) between entry and return */
 	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
@@ -651,50 +653,86 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
 
 #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
+#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
 
-static void print_graph_retval(struct trace_seq *s, unsigned long retval,
-				bool leaf, void *func, bool hex_format)
+static bool print_graph_retaddr(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
+				u32 trace_flags, bool comment)
 {
-	unsigned long err_code = 0;
+	if (unlikely(graph_ent->retaddr ==
+		 (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
+		return false;
 
-	if (retval == 0 || hex_format)
-		goto done;
+	if (comment)
+		trace_seq_puts(s, " /*");
 
-	/* Check if the return value matches the negative format */
-	if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
-		(((u64)retval) >> 32) == 0) {
-		/* sign extension */
-		err_code = (unsigned long)(s32)retval;
-	} else {
-		err_code = retval;
+	trace_seq_puts(s, " A=");
+	seq_print_ip_sym(s, graph_ent->retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
+
+	if (comment)
+		trace_seq_puts(s, " */");
+
+	return true;
+}
+
+static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
+				struct ftrace_graph_ret *graph_ret,
+				u32 opt_flags, u32 trace_flags)
+{
+	unsigned long err_code = 0;
+	unsigned long retval = graph_ret->retval;
+	bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
+	bool print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
+	bool print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
+	void *func = (void *)graph_ret->func;
+
+	if (print_retval && retval && !hex_format) {
+		/* Check if the return value matches the negative format */
+		if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
+			(((u64)retval) >> 32) == 0) {
+			err_code = sign_extend64(retval, 31);
+		} else {
+			err_code = retval;
+		}
+
+		if (!IS_ERR_VALUE(err_code))
+			err_code = 0;
 	}
 
-	if (!IS_ERR_VALUE(err_code))
-		err_code = 0;
+	if (print_retaddr && graph_ent && unlikely(graph_ent->retaddr ==
+		 (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
+		print_retaddr = false;
 
-done:
-	if (leaf) {
-		if (hex_format || (err_code == 0))
-			trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
-					func, retval);
+	if (graph_ent) {
+		trace_seq_printf(s, "%ps();", func);
+		if (print_retval || print_retaddr)
+			trace_seq_puts(s, " /*");
 		else
-			trace_seq_printf(s, "%ps(); /* = %ld */\n",
-					func, err_code);
+			trace_seq_putc(s, '\n');
 	} else {
+		print_retaddr = false;
+		trace_seq_printf(s, "} /* %ps", func);
+	}
+
+	if (print_retval) {
 		if (hex_format || (err_code == 0))
-			trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
-					func, retval);
+			trace_seq_printf(s, " V=0x%lx", retval);
 		else
-			trace_seq_printf(s, "} /* %ps = %ld */\n",
-					func, err_code);
+			trace_seq_printf(s, " V=%ld", err_code);
 	}
+
+	if (print_retaddr)
+		print_graph_retaddr(s, graph_ent, trace_flags, false);
+
+	if (!graph_ent || print_retval || print_retaddr)
+		trace_seq_puts(s, " */\n");
 }
 
 #else
 
 #define __TRACE_GRAPH_PRINT_RETVAL 0
+#define __TRACE_GRAPH_PRINT_RETADDR 0
 
-#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
+#define print_graph_retval(_seq, _ent, _ret, _opt_flags, _trace_flags) do {} while (0)
 
 #endif
 
@@ -746,9 +784,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	 * Write out the function return value if the option function-retval is
 	 * enabled.
 	 */
-	if (flags & __TRACE_GRAPH_PRINT_RETVAL)
-		print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
-				!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+	if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR))
+		print_graph_retval(s, call, graph_ret, flags, tr->trace_flags);
 	else
 		trace_seq_printf(s, "%ps();\n", (void *)call->func);
 
@@ -788,7 +825,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
 		trace_seq_putc(s, ' ');
 
-	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
+	trace_seq_printf(s, "%ps() {", (void *)call->func);
+	if (flags & __TRACE_GRAPH_PRINT_RETADDR)
+		print_graph_retaddr(s, call, tr->trace_flags, true);
+	trace_seq_putc(s, '\n');
 
 	if (trace_seq_has_overflowed(s))
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -1032,9 +1072,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 	 * Always write out the function name and its return value if the
 	 * function-retval option is enabled.
 	 */
-	if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
-		print_graph_retval(s, trace->retval, false, (void *)trace->func,
-			!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+	if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
+		print_graph_retval(s, NULL, trace, flags, tr->trace_flags);
 	} else {
 		/*
 		 * If the return function does not have a matching entry,
-- 
2.25.1
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Jeff Xie 2 months, 3 weeks ago
On Sun, Sep 8, 2024 at 10:26 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> When using function_graph tracer to analyze the flow of kernel function
> execution, it is often necessary to quickly locate the exact line of code
> where the call occurs. While this may be easy at times, it can be more
> time-consuming when some functions are inlined or the flow is too long.
>
> This feature aims to simplify the process by recording the return address
> of traced funcions and printing it when outputing trace logs.
>
> To distinguish the return value, 'V=' is used as the prefix for the kernel
> return value, and 'A=' is used as the prefix for the return address in trace
> logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> option 'sym-addr' can control the format of the return address.
>
> See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
>
> 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
> 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> 4)               |          security_file_permission() {
> 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
>
> Then, we can use the faddr2line to locate the source code, for example:
>
> $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> load_elf_phdrs+0x6c/0xb0:
> elf_read at fs/binfmt_elf.c:471
> (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531

That's a great idea; I often use it that way. However, I used both the
function tracer and the function graph simultaneously
because the function tracer can also show context messages, such as
displaying 'd.h3.'

Example:
<idle>-0 [001] d.h3. 41.119330: __napi_schedule+0x4/0x90 <-e1000_intr+0x8d/0x110

I suggest that we consider displaying the results of both the function
tracer and the function graph together in the same instance ;-)

> Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> ---
>  include/linux/ftrace.h               |   1 +
>  kernel/trace/fgraph.c                |   1 +
>  kernel/trace/trace.h                 |   1 +
>  kernel/trace/trace_entries.h         |  19 ++++-
>  kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
>  5 files changed, 92 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index fd5e84d0ec47..bdf51163b3b8 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
>   */
>  struct ftrace_graph_ent {
>         unsigned long func; /* Current function */
> +       unsigned long retaddr;  /* Return address */
>         int depth;
>  } __packed;
>
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index d7d4fb403f6f..fcc4162c10f6 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
>
>         trace.func = func;
>         trace.depth = ++current->curr_ret_depth;
> +       trace.retaddr = ret;
>
>         offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
>         if (offset < 0)
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index bd3e3069300e..87e02815b030 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
>  #define TRACE_GRAPH_GRAPH_TIME          0x400
>  #define TRACE_GRAPH_PRINT_RETVAL        0x800
>  #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
>  #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
>  #define TRACE_GRAPH_PRINT_FILL_MASK    (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
>
> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index c47422b20908..8b8753319dd3 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
>         perf_ftrace_event_register
>  );
>
> +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
>  /* Function call entry */
>  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>
> @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>         F_STRUCT(
>                 __field_struct( struct ftrace_graph_ent,        graph_ent       )
>                 __field_packed( unsigned long,  graph_ent,      func            )
> +               __field_packed( unsigned long,  graph_ent,      retaddr         )
>                 __field_packed( int,            graph_ent,      depth           )
>         ),
>
> @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>  );
>
>  /* Function return entry */
> -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> -
>  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>
>         TRACE_GRAPH_RET,
> @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>
>  #else
>
> +/* Function call entry */
> +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> +
> +       TRACE_GRAPH_ENT,
> +
> +       F_STRUCT(
> +               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> +               __field_packed( unsigned long,  graph_ent,      func            )
> +               __field_packed( int,            graph_ent,      depth           )
> +       ),
> +
> +       F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> +);
> +
> +/* Function return entry */
>  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>
>         TRACE_GRAPH_RET,
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 13d0387ac6a6..655535d57763 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -63,6 +63,8 @@ static struct tracer_opt trace_opts[] = {
>         { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
>         /* Display function return value in hexadecimal format ? */
>         { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
> +       /* Display function return address ? */
> +       { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
>  #endif
>         /* Include sleep time (scheduled out) between entry and return */
>         { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
> @@ -651,50 +653,86 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
>  #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
>
>  #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
> +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
>
> -static void print_graph_retval(struct trace_seq *s, unsigned long retval,
> -                               bool leaf, void *func, bool hex_format)
> +static bool print_graph_retaddr(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> +                               u32 trace_flags, bool comment)
>  {
> -       unsigned long err_code = 0;
> +       if (unlikely(graph_ent->retaddr ==
> +                (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> +               return false;
>
> -       if (retval == 0 || hex_format)
> -               goto done;
> +       if (comment)
> +               trace_seq_puts(s, " /*");
>
> -       /* Check if the return value matches the negative format */
> -       if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> -               (((u64)retval) >> 32) == 0) {
> -               /* sign extension */
> -               err_code = (unsigned long)(s32)retval;
> -       } else {
> -               err_code = retval;
> +       trace_seq_puts(s, " A=");
> +       seq_print_ip_sym(s, graph_ent->retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
> +
> +       if (comment)
> +               trace_seq_puts(s, " */");
> +
> +       return true;
> +}
> +
> +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> +                               struct ftrace_graph_ret *graph_ret,
> +                               u32 opt_flags, u32 trace_flags)
> +{
> +       unsigned long err_code = 0;
> +       unsigned long retval = graph_ret->retval;
> +       bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
> +       bool print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
> +       bool print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
> +       void *func = (void *)graph_ret->func;
> +
> +       if (print_retval && retval && !hex_format) {
> +               /* Check if the return value matches the negative format */
> +               if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> +                       (((u64)retval) >> 32) == 0) {
> +                       err_code = sign_extend64(retval, 31);
> +               } else {
> +                       err_code = retval;
> +               }
> +
> +               if (!IS_ERR_VALUE(err_code))
> +                       err_code = 0;
>         }
>
> -       if (!IS_ERR_VALUE(err_code))
> -               err_code = 0;
> +       if (print_retaddr && graph_ent && unlikely(graph_ent->retaddr ==
> +                (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> +               print_retaddr = false;
>
> -done:
> -       if (leaf) {
> -               if (hex_format || (err_code == 0))
> -                       trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
> -                                       func, retval);
> +       if (graph_ent) {
> +               trace_seq_printf(s, "%ps();", func);
> +               if (print_retval || print_retaddr)
> +                       trace_seq_puts(s, " /*");
>                 else
> -                       trace_seq_printf(s, "%ps(); /* = %ld */\n",
> -                                       func, err_code);
> +                       trace_seq_putc(s, '\n');
>         } else {
> +               print_retaddr = false;
> +               trace_seq_printf(s, "} /* %ps", func);
> +       }
> +
> +       if (print_retval) {
>                 if (hex_format || (err_code == 0))
> -                       trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
> -                                       func, retval);
> +                       trace_seq_printf(s, " V=0x%lx", retval);
>                 else
> -                       trace_seq_printf(s, "} /* %ps = %ld */\n",
> -                                       func, err_code);
> +                       trace_seq_printf(s, " V=%ld", err_code);
>         }
> +
> +       if (print_retaddr)
> +               print_graph_retaddr(s, graph_ent, trace_flags, false);
> +
> +       if (!graph_ent || print_retval || print_retaddr)
> +               trace_seq_puts(s, " */\n");
>  }
>
>  #else
>
>  #define __TRACE_GRAPH_PRINT_RETVAL 0
> +#define __TRACE_GRAPH_PRINT_RETADDR 0
>
> -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
> +#define print_graph_retval(_seq, _ent, _ret, _opt_flags, _trace_flags) do {} while (0)
>
>  #endif
>
> @@ -746,9 +784,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
>          * Write out the function return value if the option function-retval is
>          * enabled.
>          */
> -       if (flags & __TRACE_GRAPH_PRINT_RETVAL)
> -               print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
> -                               !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> +       if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR))
> +               print_graph_retval(s, call, graph_ret, flags, tr->trace_flags);
>         else
>                 trace_seq_printf(s, "%ps();\n", (void *)call->func);
>
> @@ -788,7 +825,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
>         for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
>                 trace_seq_putc(s, ' ');
>
> -       trace_seq_printf(s, "%ps() {\n", (void *)call->func);
> +       trace_seq_printf(s, "%ps() {", (void *)call->func);
> +       if (flags & __TRACE_GRAPH_PRINT_RETADDR)
> +               print_graph_retaddr(s, call, tr->trace_flags, true);
> +       trace_seq_putc(s, '\n');
>
>         if (trace_seq_has_overflowed(s))
>                 return TRACE_TYPE_PARTIAL_LINE;
> @@ -1032,9 +1072,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
>          * Always write out the function name and its return value if the
>          * function-retval option is enabled.
>          */
> -       if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
> -               print_graph_retval(s, trace->retval, false, (void *)trace->func,
> -                       !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> +       if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
> +               print_graph_retval(s, NULL, trace, flags, tr->trace_flags);
>         } else {
>                 /*
>                  * If the return function does not have a matching entry,
> --
> 2.25.1
>
>


-- 
Thanks,
JeffXie
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Donglin Peng 2 months, 3 weeks ago
On Tue, Sep 10, 2024 at 9:07 AM Jeff Xie <xiehuan09@gmail.com> wrote:
>
> On Sun, Sep 8, 2024 at 10:26 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > When using function_graph tracer to analyze the flow of kernel function
> > execution, it is often necessary to quickly locate the exact line of code
> > where the call occurs. While this may be easy at times, it can be more
> > time-consuming when some functions are inlined or the flow is too long.
> >
> > This feature aims to simplify the process by recording the return address
> > of traced funcions and printing it when outputing trace logs.
> >
> > To distinguish the return value, 'V=' is used as the prefix for the kernel
> > return value, and 'A=' is used as the prefix for the return address in trace
> > logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> > option 'sym-addr' can control the format of the return address.
> >
> > See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> >
> > 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
> > 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> > 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> > 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> > 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> > 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> > 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> > 4)               |          security_file_permission() {
> > 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> > 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> > 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> > 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> > 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> > 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> > 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> > 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> >
> > Then, we can use the faddr2line to locate the source code, for example:
> >
> > $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> > load_elf_phdrs+0x6c/0xb0:
> > elf_read at fs/binfmt_elf.c:471
> > (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531
>
> That's a great idea; I often use it that way. However, I used both the
> function tracer and the function graph simultaneously
> because the function tracer can also show context messages, such as
> displaying 'd.h3.'
>
> Example:
> <idle>-0 [001] d.h3. 41.119330: __napi_schedule+0x4/0x90 <-e1000_intr+0x8d/0x110
>
> I suggest that we consider displaying the results of both the function
> tracer and the function graph together in the same instance ;-)
>

Good idea. I will have a try.

> > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> > ---
> >  include/linux/ftrace.h               |   1 +
> >  kernel/trace/fgraph.c                |   1 +
> >  kernel/trace/trace.h                 |   1 +
> >  kernel/trace/trace_entries.h         |  19 ++++-
> >  kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
> >  5 files changed, 92 insertions(+), 35 deletions(-)
> >
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index fd5e84d0ec47..bdf51163b3b8 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
> >   */
> >  struct ftrace_graph_ent {
> >         unsigned long func; /* Current function */
> > +       unsigned long retaddr;  /* Return address */
> >         int depth;
> >  } __packed;
> >
> > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > index d7d4fb403f6f..fcc4162c10f6 100644
> > --- a/kernel/trace/fgraph.c
> > +++ b/kernel/trace/fgraph.c
> > @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
> >
> >         trace.func = func;
> >         trace.depth = ++current->curr_ret_depth;
> > +       trace.retaddr = ret;
> >
> >         offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
> >         if (offset < 0)
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index bd3e3069300e..87e02815b030 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
> >  #define TRACE_GRAPH_GRAPH_TIME          0x400
> >  #define TRACE_GRAPH_PRINT_RETVAL        0x800
> >  #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> > +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
> >  #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
> >  #define TRACE_GRAPH_PRINT_FILL_MASK    (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
> >
> > diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> > index c47422b20908..8b8753319dd3 100644
> > --- a/kernel/trace/trace_entries.h
> > +++ b/kernel/trace/trace_entries.h
> > @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
> >         perf_ftrace_event_register
> >  );
> >
> > +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> >  /* Function call entry */
> >  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >
> > @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >         F_STRUCT(
> >                 __field_struct( struct ftrace_graph_ent,        graph_ent       )
> >                 __field_packed( unsigned long,  graph_ent,      func            )
> > +               __field_packed( unsigned long,  graph_ent,      retaddr         )
> >                 __field_packed( int,            graph_ent,      depth           )
> >         ),
> >
> > @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >  );
> >
> >  /* Function return entry */
> > -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> > -
> >  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >         TRACE_GRAPH_RET,
> > @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >  #else
> >
> > +/* Function call entry */
> > +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > +
> > +       TRACE_GRAPH_ENT,
> > +
> > +       F_STRUCT(
> > +               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> > +               __field_packed( unsigned long,  graph_ent,      func            )
> > +               __field_packed( int,            graph_ent,      depth           )
> > +       ),
> > +
> > +       F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> > +);
> > +
> > +/* Function return entry */
> >  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >         TRACE_GRAPH_RET,
> > diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> > index 13d0387ac6a6..655535d57763 100644
> > --- a/kernel/trace/trace_functions_graph.c
> > +++ b/kernel/trace/trace_functions_graph.c
> > @@ -63,6 +63,8 @@ static struct tracer_opt trace_opts[] = {
> >         { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
> >         /* Display function return value in hexadecimal format ? */
> >         { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
> > +       /* Display function return address ? */
> > +       { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
> >  #endif
> >         /* Include sleep time (scheduled out) between entry and return */
> >         { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
> > @@ -651,50 +653,86 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
> >  #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> >
> >  #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
> > +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
> >
> > -static void print_graph_retval(struct trace_seq *s, unsigned long retval,
> > -                               bool leaf, void *func, bool hex_format)
> > +static bool print_graph_retaddr(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> > +                               u32 trace_flags, bool comment)
> >  {
> > -       unsigned long err_code = 0;
> > +       if (unlikely(graph_ent->retaddr ==
> > +                (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> > +               return false;
> >
> > -       if (retval == 0 || hex_format)
> > -               goto done;
> > +       if (comment)
> > +               trace_seq_puts(s, " /*");
> >
> > -       /* Check if the return value matches the negative format */
> > -       if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> > -               (((u64)retval) >> 32) == 0) {
> > -               /* sign extension */
> > -               err_code = (unsigned long)(s32)retval;
> > -       } else {
> > -               err_code = retval;
> > +       trace_seq_puts(s, " A=");
> > +       seq_print_ip_sym(s, graph_ent->retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
> > +
> > +       if (comment)
> > +               trace_seq_puts(s, " */");
> > +
> > +       return true;
> > +}
> > +
> > +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> > +                               struct ftrace_graph_ret *graph_ret,
> > +                               u32 opt_flags, u32 trace_flags)
> > +{
> > +       unsigned long err_code = 0;
> > +       unsigned long retval = graph_ret->retval;
> > +       bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
> > +       bool print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
> > +       bool print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
> > +       void *func = (void *)graph_ret->func;
> > +
> > +       if (print_retval && retval && !hex_format) {
> > +               /* Check if the return value matches the negative format */
> > +               if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> > +                       (((u64)retval) >> 32) == 0) {
> > +                       err_code = sign_extend64(retval, 31);
> > +               } else {
> > +                       err_code = retval;
> > +               }
> > +
> > +               if (!IS_ERR_VALUE(err_code))
> > +                       err_code = 0;
> >         }
> >
> > -       if (!IS_ERR_VALUE(err_code))
> > -               err_code = 0;
> > +       if (print_retaddr && graph_ent && unlikely(graph_ent->retaddr ==
> > +                (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> > +               print_retaddr = false;
> >
> > -done:
> > -       if (leaf) {
> > -               if (hex_format || (err_code == 0))
> > -                       trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
> > -                                       func, retval);
> > +       if (graph_ent) {
> > +               trace_seq_printf(s, "%ps();", func);
> > +               if (print_retval || print_retaddr)
> > +                       trace_seq_puts(s, " /*");
> >                 else
> > -                       trace_seq_printf(s, "%ps(); /* = %ld */\n",
> > -                                       func, err_code);
> > +                       trace_seq_putc(s, '\n');
> >         } else {
> > +               print_retaddr = false;
> > +               trace_seq_printf(s, "} /* %ps", func);
> > +       }
> > +
> > +       if (print_retval) {
> >                 if (hex_format || (err_code == 0))
> > -                       trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
> > -                                       func, retval);
> > +                       trace_seq_printf(s, " V=0x%lx", retval);
> >                 else
> > -                       trace_seq_printf(s, "} /* %ps = %ld */\n",
> > -                                       func, err_code);
> > +                       trace_seq_printf(s, " V=%ld", err_code);
> >         }
> > +
> > +       if (print_retaddr)
> > +               print_graph_retaddr(s, graph_ent, trace_flags, false);
> > +
> > +       if (!graph_ent || print_retval || print_retaddr)
> > +               trace_seq_puts(s, " */\n");
> >  }
> >
> >  #else
> >
> >  #define __TRACE_GRAPH_PRINT_RETVAL 0
> > +#define __TRACE_GRAPH_PRINT_RETADDR 0
> >
> > -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
> > +#define print_graph_retval(_seq, _ent, _ret, _opt_flags, _trace_flags) do {} while (0)
> >
> >  #endif
> >
> > @@ -746,9 +784,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
> >          * Write out the function return value if the option function-retval is
> >          * enabled.
> >          */
> > -       if (flags & __TRACE_GRAPH_PRINT_RETVAL)
> > -               print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
> > -                               !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> > +       if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR))
> > +               print_graph_retval(s, call, graph_ret, flags, tr->trace_flags);
> >         else
> >                 trace_seq_printf(s, "%ps();\n", (void *)call->func);
> >
> > @@ -788,7 +825,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
> >         for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
> >                 trace_seq_putc(s, ' ');
> >
> > -       trace_seq_printf(s, "%ps() {\n", (void *)call->func);
> > +       trace_seq_printf(s, "%ps() {", (void *)call->func);
> > +       if (flags & __TRACE_GRAPH_PRINT_RETADDR)
> > +               print_graph_retaddr(s, call, tr->trace_flags, true);
> > +       trace_seq_putc(s, '\n');
> >
> >         if (trace_seq_has_overflowed(s))
> >                 return TRACE_TYPE_PARTIAL_LINE;
> > @@ -1032,9 +1072,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
> >          * Always write out the function name and its return value if the
> >          * function-retval option is enabled.
> >          */
> > -       if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
> > -               print_graph_retval(s, trace->retval, false, (void *)trace->func,
> > -                       !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> > +       if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
> > +               print_graph_retval(s, NULL, trace, flags, tr->trace_flags);
> >         } else {
> >                 /*
> >                  * If the return function does not have a matching entry,
> > --
> > 2.25.1
> >
> >
>
>
> --
> Thanks,
> JeffXie
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Steven Rostedt 2 months, 3 weeks ago
On Sun,  8 Sep 2024 07:25:44 -0700
Donglin Peng <dolinux.peng@gmail.com> wrote:

Hi Donglin!

> When using function_graph tracer to analyze the flow of kernel function
> execution, it is often necessary to quickly locate the exact line of code
> where the call occurs. While this may be easy at times, it can be more
> time-consuming when some functions are inlined or the flow is too long.
> 
> This feature aims to simplify the process by recording the return address
> of traced funcions and printing it when outputing trace logs.
> 
> To distinguish the return value, 'V=' is used as the prefix for the kernel
> return value, and 'A=' is used as the prefix for the return address in trace
> logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> option 'sym-addr' can control the format of the return address.
> 
> See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> 
> 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */

I wonder if we should make this look more like the function tracer when it
shows the parent. That is:

  4)               |  load_elf_binary() { /* <-bprm_execve+0x249/0x600 */

> 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> 4)               |          security_file_permission() {
> 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> 
> Then, we can use the faddr2line to locate the source code, for example:
> 
> $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> load_elf_phdrs+0x6c/0xb0:
> elf_read at fs/binfmt_elf.c:471
> (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531
> 
> Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> ---
>  include/linux/ftrace.h               |   1 +
>  kernel/trace/fgraph.c                |   1 +
>  kernel/trace/trace.h                 |   1 +
>  kernel/trace/trace_entries.h         |  19 ++++-
>  kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
>  5 files changed, 92 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index fd5e84d0ec47..bdf51163b3b8 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
>   */
>  struct ftrace_graph_ent {
>  	unsigned long func; /* Current function */
> +	unsigned long retaddr;  /* Return address */
>  	int depth;
>  } __packed;
>  
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index d7d4fb403f6f..fcc4162c10f6 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
>  
>  	trace.func = func;
>  	trace.depth = ++current->curr_ret_depth;
> +	trace.retaddr = ret;
>  
>  	offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
>  	if (offset < 0)
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index bd3e3069300e..87e02815b030 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
>  #define TRACE_GRAPH_GRAPH_TIME          0x400
>  #define TRACE_GRAPH_PRINT_RETVAL        0x800
>  #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
>  #define TRACE_GRAPH_PRINT_FILL_SHIFT	28
>  #define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
>  
> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index c47422b20908..8b8753319dd3 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
>  	perf_ftrace_event_register
>  );
>  
> +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
>  /* Function call entry */
>  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>  
> @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>  	F_STRUCT(
>  		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
>  		__field_packed(	unsigned long,	graph_ent,	func		)
> +		__field_packed(	unsigned long,	graph_ent,	retaddr		)
>  		__field_packed(	int,		graph_ent,	depth		)
>  	),
>  

Let's make this a new event, so that when this option is not enabled, we
don't waste the ring buffer. For function tracing, every element added to
the event will add megabytes extra to the ring buffer.

It should be possible to switch what event gets created at the time of the
trace. Even calling different functions to do it.

Thanks,

-- Steve


> @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>  );
>  
>  /* Function return entry */
> -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> -
>  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>  
>  	TRACE_GRAPH_RET,
> @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>  
>  #else
>  
> +/* Function call entry */
> +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> +
> +	TRACE_GRAPH_ENT,
> +
> +	F_STRUCT(
> +		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
> +		__field_packed(	unsigned long,	graph_ent,	func		)
> +		__field_packed(	int,		graph_ent,	depth		)
> +	),
> +
> +	F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> +);
> +
> +/* Function return entry */
>  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>  
>  	TRACE_GRAPH_RET,
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Donglin Peng 2 months, 3 weeks ago
On Mon, Sep 9, 2024 at 11:04 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Sun,  8 Sep 2024 07:25:44 -0700
> Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> Hi Donglin!
>
> > When using function_graph tracer to analyze the flow of kernel function
> > execution, it is often necessary to quickly locate the exact line of code
> > where the call occurs. While this may be easy at times, it can be more
> > time-consuming when some functions are inlined or the flow is too long.
> >
> > This feature aims to simplify the process by recording the return address
> > of traced funcions and printing it when outputing trace logs.
> >
> > To distinguish the return value, 'V=' is used as the prefix for the kernel
> > return value, and 'A=' is used as the prefix for the return address in trace
> > logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> > option 'sym-addr' can control the format of the return address.
> >
> > See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> >
> > 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
>
> I wonder if we should make this look more like the function tracer when it
> shows the parent. That is:
>
>   4)               |  load_elf_binary() { /* <-bprm_execve+0x249/0x600 */

Nice, Is it necessary to add a prefix letter for the return value? For
example, when
 both funcgraph-retval and funcgraph-retaddr are enabled, like this:

         avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 V=0x0 */

>
> > 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> > 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> > 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> > 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> > 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> > 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> > 4)               |          security_file_permission() {
> > 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> > 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> > 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> > 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> > 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> > 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> > 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> > 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> >
> > Then, we can use the faddr2line to locate the source code, for example:
> >
> > $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> > load_elf_phdrs+0x6c/0xb0:
> > elf_read at fs/binfmt_elf.c:471
> > (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531
> >
> > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> > ---
> >  include/linux/ftrace.h               |   1 +
> >  kernel/trace/fgraph.c                |   1 +
> >  kernel/trace/trace.h                 |   1 +
> >  kernel/trace/trace_entries.h         |  19 ++++-
> >  kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
> >  5 files changed, 92 insertions(+), 35 deletions(-)
> >
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index fd5e84d0ec47..bdf51163b3b8 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
> >   */
> >  struct ftrace_graph_ent {
> >       unsigned long func; /* Current function */
> > +     unsigned long retaddr;  /* Return address */
> >       int depth;
> >  } __packed;
> >
> > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > index d7d4fb403f6f..fcc4162c10f6 100644
> > --- a/kernel/trace/fgraph.c
> > +++ b/kernel/trace/fgraph.c
> > @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
> >
> >       trace.func = func;
> >       trace.depth = ++current->curr_ret_depth;
> > +     trace.retaddr = ret;
> >
> >       offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
> >       if (offset < 0)
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index bd3e3069300e..87e02815b030 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
> >  #define TRACE_GRAPH_GRAPH_TIME          0x400
> >  #define TRACE_GRAPH_PRINT_RETVAL        0x800
> >  #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> > +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
> >  #define TRACE_GRAPH_PRINT_FILL_SHIFT 28
> >  #define TRACE_GRAPH_PRINT_FILL_MASK  (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
> >
> > diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> > index c47422b20908..8b8753319dd3 100644
> > --- a/kernel/trace/trace_entries.h
> > +++ b/kernel/trace/trace_entries.h
> > @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
> >       perf_ftrace_event_register
> >  );
> >
> > +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> >  /* Function call entry */
> >  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >
> > @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >       F_STRUCT(
> >               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> >               __field_packed( unsigned long,  graph_ent,      func            )
> > +             __field_packed( unsigned long,  graph_ent,      retaddr         )
> >               __field_packed( int,            graph_ent,      depth           )
> >       ),
> >
>
> Let's make this a new event, so that when this option is not enabled, we
> don't waste the ring buffer. For function tracing, every element added to
> the event will add megabytes extra to the ring buffer.
>
> It should be possible to switch what event gets created at the time of the
> trace. Even calling different functions to do it.

Sounds good, we may create a new event as follows:

struct ftrace_graph_ent {
unsigned long func; /* Current function */
unsigned long retaddr;
int depth;
} __packed;

FTRACE_ENTRY_PACKED(funcgraph_retaddr_entry, ftrace_graph_ent_readdr_entry,

TRACE_GRAPH_RETADDR_ENT,

F_STRUCT(
__field_struct( struct ftrace_graph_retaddr_ent, graph_retaddr_ent )
__field_packed( unsigned long, graph_retaddr_ent, func )
__field_packed( unsigned long, graph_retaddr_ent, retaddr )
__field_packed( int, graph_retaddr_ent, depth )
),

F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
);

If we were to introduce another new feature such as funcgraph-args which
adds a `ftrace_regs` to the `ftrace_graph_ent`, would it be necessary to
create yet another new event?

 In the scenario where both funcgrah-retaddr and funcgraph-args are enabled,
it is possible that an additional new event may also be required.

>
> Thanks,
>
> -- Steve
>
>
> > @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >  );
> >
> >  /* Function return entry */
> > -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> > -
> >  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >       TRACE_GRAPH_RET,
> > @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >  #else
> >
> > +/* Function call entry */
> > +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > +
> > +     TRACE_GRAPH_ENT,
> > +
> > +     F_STRUCT(
> > +             __field_struct( struct ftrace_graph_ent,        graph_ent       )
> > +             __field_packed( unsigned long,  graph_ent,      func            )
> > +             __field_packed( int,            graph_ent,      depth           )
> > +     ),
> > +
> > +     F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> > +);
> > +
> > +/* Function return entry */
> >  FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >       TRACE_GRAPH_RET,
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Steven Rostedt 2 months, 2 weeks ago
On Tue, 10 Sep 2024 14:15:39 +0800
Donglin Peng <dolinux.peng@gmail.com> wrote:
> > >
> > > See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> > >
> > > 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */  
> >
> > I wonder if we should make this look more like the function tracer when it
> > shows the parent. That is:
> >
> >   4)               |  load_elf_binary() { /* <-bprm_execve+0x249/0x600 */  
> 
> Nice, Is it necessary to add a prefix letter for the return value? For
> example, when
>  both funcgraph-retval and funcgraph-retaddr are enabled, like this:
> 
>          avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 V=0x0 */

How about:

>          avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */

As "ret" is more obvious than "V".

> 
> >  
> > > 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> > > 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> > > 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> > > 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> > > 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> > > 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> > > 4)               |          security_file_permission() {
> > > 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> > > 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> > > 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> > > 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> > > 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> > > 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> > > 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> > > 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> > >

> > > +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> > >  /* Function call entry */
> > >  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > >
> > > @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > >       F_STRUCT(
> > >               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> > >               __field_packed( unsigned long,  graph_ent,      func            )
> > > +             __field_packed( unsigned long,  graph_ent,      retaddr         )
> > >               __field_packed( int,            graph_ent,      depth           )
> > >       ),
> > >  
> >
> > Let's make this a new event, so that when this option is not enabled, we
> > don't waste the ring buffer. For function tracing, every element added to
> > the event will add megabytes extra to the ring buffer.
> >
> > It should be possible to switch what event gets created at the time of the
> > trace. Even calling different functions to do it.  
> 
> Sounds good, we may create a new event as follows:
> 
> struct ftrace_graph_ent {
> unsigned long func; /* Current function */
> unsigned long retaddr;
> int depth;
> } __packed;
> 
> FTRACE_ENTRY_PACKED(funcgraph_retaddr_entry, ftrace_graph_ent_readdr_entry,
> 
> TRACE_GRAPH_RETADDR_ENT,
> 
> F_STRUCT(
> __field_struct( struct ftrace_graph_retaddr_ent, graph_retaddr_ent )
> __field_packed( unsigned long, graph_retaddr_ent, func )
> __field_packed( unsigned long, graph_retaddr_ent, retaddr )
> __field_packed( int, graph_retaddr_ent, depth )
> ),
> 
> F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)

Why not add the ret in the printk?

  F_printk("--> %ps (%d) <-%ps", (void *)__entry->func, __entry->depth, (void *)__entry->retaddr)

Then user space parsing tools could use this by default, when it doesn't
know how to parse it.

> );
> 
> If we were to introduce another new feature such as funcgraph-args which
> adds a `ftrace_regs` to the `ftrace_graph_ent`, would it be necessary to
> create yet another new event?

Probably yes.

> 
>  In the scenario where both funcgrah-retaddr and funcgraph-args are enabled,
> it is possible that an additional new event may also be required.

Yes, that would likely be another event.

-- Steve
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Donglin Peng 2 months, 2 weeks ago
On Wed, Sep 11, 2024 at 12:50 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Tue, 10 Sep 2024 14:15:39 +0800
> Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > >
> > > > See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> > > >
> > > > 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
> > >
> > > I wonder if we should make this look more like the function tracer when it
> > > shows the parent. That is:
> > >
> > >   4)               |  load_elf_binary() { /* <-bprm_execve+0x249/0x600 */
> >
> > Nice, Is it necessary to add a prefix letter for the return value? For
> > example, when
> >  both funcgraph-retval and funcgraph-retaddr are enabled, like this:
> >
> >          avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 V=0x0 */
>
> How about:
>
> >          avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */
>
> As "ret" is more obvious than "V".

I agree, and will modify it, thanks.

>
> >
> > >
> > > > 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> > > > 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> > > > 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> > > > 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> > > > 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> > > > 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> > > > 4)               |          security_file_permission() {
> > > > 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> > > > 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> > > > 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> > > > 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> > > > 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> > > > 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> > > > 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> > > > 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> > > >
>
> > > > +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> > > >  /* Function call entry */
> > > >  FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > > >
> > > > @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > > >       F_STRUCT(
> > > >               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> > > >               __field_packed( unsigned long,  graph_ent,      func            )
> > > > +             __field_packed( unsigned long,  graph_ent,      retaddr         )
> > > >               __field_packed( int,            graph_ent,      depth           )
> > > >       ),
> > > >
> > >
> > > Let's make this a new event, so that when this option is not enabled, we
> > > don't waste the ring buffer. For function tracing, every element added to
> > > the event will add megabytes extra to the ring buffer.
> > >
> > > It should be possible to switch what event gets created at the time of the
> > > trace. Even calling different functions to do it.
> >
> > Sounds good, we may create a new event as follows:
> >
> > struct ftrace_graph_ent {
> > unsigned long func; /* Current function */
> > unsigned long retaddr;
> > int depth;
> > } __packed;
> >
> > FTRACE_ENTRY_PACKED(funcgraph_retaddr_entry, ftrace_graph_ent_readdr_entry,
> >
> > TRACE_GRAPH_RETADDR_ENT,
> >
> > F_STRUCT(
> > __field_struct( struct ftrace_graph_retaddr_ent, graph_retaddr_ent )
> > __field_packed( unsigned long, graph_retaddr_ent, func )
> > __field_packed( unsigned long, graph_retaddr_ent, retaddr )
> > __field_packed( int, graph_retaddr_ent, depth )
> > ),
> >
> > F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
>
> Why not add the ret in the printk?
>
>   F_printk("--> %ps (%d) <-%ps", (void *)__entry->func, __entry->depth, (void *)__entry->retaddr)
>
> Then user space parsing tools could use this by default, when it doesn't
> know how to parse it.

Thanks, I will modify it.

>
> > );
> >
> > If we were to introduce another new feature such as funcgraph-args which
> > adds a `ftrace_regs` to the `ftrace_graph_ent`, would it be necessary to
> > create yet another new event?
>
> Probably yes.
>
> >
> >  In the scenario where both funcgrah-retaddr and funcgraph-args are enabled,
> > it is possible that an additional new event may also be required.
>
> Yes, that would likely be another event.
>
> -- Steve
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Song Chen 2 months, 3 weeks ago
it's a practical improvement for me, save time to read code. btw, how is 
the overhead?

/Song

在 2024/9/8 22:25, Donglin Peng 写道:
> When using function_graph tracer to analyze the flow of kernel function
> execution, it is often necessary to quickly locate the exact line of code
> where the call occurs. While this may be easy at times, it can be more
> time-consuming when some functions are inlined or the flow is too long.
> 
> This feature aims to simplify the process by recording the return address
> of traced funcions and printing it when outputing trace logs.
> 
> To distinguish the return value, 'V=' is used as the prefix for the kernel
> return value, and 'A=' is used as the prefix for the return address in trace
> logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> option 'sym-addr' can control the format of the return address.
> 
> See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> 
> 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
> 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> 4)               |          security_file_permission() {
> 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> 
> Then, we can use the faddr2line to locate the source code, for example:
> 
> $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> load_elf_phdrs+0x6c/0xb0:
> elf_read at fs/binfmt_elf.c:471
> (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531
> 
> Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> ---
>   include/linux/ftrace.h               |   1 +
>   kernel/trace/fgraph.c                |   1 +
>   kernel/trace/trace.h                 |   1 +
>   kernel/trace/trace_entries.h         |  19 ++++-
>   kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
>   5 files changed, 92 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index fd5e84d0ec47..bdf51163b3b8 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
>    */
>   struct ftrace_graph_ent {
>   	unsigned long func; /* Current function */
> +	unsigned long retaddr;  /* Return address */
>   	int depth;
>   } __packed;
>   
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index d7d4fb403f6f..fcc4162c10f6 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
>   
>   	trace.func = func;
>   	trace.depth = ++current->curr_ret_depth;
> +	trace.retaddr = ret;
>   
>   	offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
>   	if (offset < 0)
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index bd3e3069300e..87e02815b030 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
>   #define TRACE_GRAPH_GRAPH_TIME          0x400
>   #define TRACE_GRAPH_PRINT_RETVAL        0x800
>   #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
>   #define TRACE_GRAPH_PRINT_FILL_SHIFT	28
>   #define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
>   
> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index c47422b20908..8b8753319dd3 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
>   	perf_ftrace_event_register
>   );
>   
> +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
>   /* Function call entry */
>   FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>   
> @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>   	F_STRUCT(
>   		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
>   		__field_packed(	unsigned long,	graph_ent,	func		)
> +		__field_packed(	unsigned long,	graph_ent,	retaddr		)
>   		__field_packed(	int,		graph_ent,	depth		)
>   	),
>   
> @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
>   );
>   
>   /* Function return entry */
> -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> -
>   FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>   
>   	TRACE_GRAPH_RET,
> @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>   
>   #else
>   
> +/* Function call entry */
> +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> +
> +	TRACE_GRAPH_ENT,
> +
> +	F_STRUCT(
> +		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
> +		__field_packed(	unsigned long,	graph_ent,	func		)
> +		__field_packed(	int,		graph_ent,	depth		)
> +	),
> +
> +	F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> +);
> +
> +/* Function return entry */
>   FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>   
>   	TRACE_GRAPH_RET,
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 13d0387ac6a6..655535d57763 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -63,6 +63,8 @@ static struct tracer_opt trace_opts[] = {
>   	{ TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
>   	/* Display function return value in hexadecimal format ? */
>   	{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
> +	/* Display function return address ? */
> +	{ TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
>   #endif
>   	/* Include sleep time (scheduled out) between entry and return */
>   	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
> @@ -651,50 +653,86 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
>   #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
>   
>   #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
> +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
>   
> -static void print_graph_retval(struct trace_seq *s, unsigned long retval,
> -				bool leaf, void *func, bool hex_format)
> +static bool print_graph_retaddr(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> +				u32 trace_flags, bool comment)
>   {
> -	unsigned long err_code = 0;
> +	if (unlikely(graph_ent->retaddr ==
> +		 (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> +		return false;
>   
> -	if (retval == 0 || hex_format)
> -		goto done;
> +	if (comment)
> +		trace_seq_puts(s, " /*");
>   
> -	/* Check if the return value matches the negative format */
> -	if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> -		(((u64)retval) >> 32) == 0) {
> -		/* sign extension */
> -		err_code = (unsigned long)(s32)retval;
> -	} else {
> -		err_code = retval;
> +	trace_seq_puts(s, " A=");
> +	seq_print_ip_sym(s, graph_ent->retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
> +
> +	if (comment)
> +		trace_seq_puts(s, " */");
> +
> +	return true;
> +}
> +
> +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> +				struct ftrace_graph_ret *graph_ret,
> +				u32 opt_flags, u32 trace_flags)
> +{
> +	unsigned long err_code = 0;
> +	unsigned long retval = graph_ret->retval;
> +	bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
> +	bool print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
> +	bool print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
> +	void *func = (void *)graph_ret->func;
> +
> +	if (print_retval && retval && !hex_format) {
> +		/* Check if the return value matches the negative format */
> +		if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> +			(((u64)retval) >> 32) == 0) {
> +			err_code = sign_extend64(retval, 31);
> +		} else {
> +			err_code = retval;
> +		}
> +
> +		if (!IS_ERR_VALUE(err_code))
> +			err_code = 0;
>   	}
>   
> -	if (!IS_ERR_VALUE(err_code))
> -		err_code = 0;
> +	if (print_retaddr && graph_ent && unlikely(graph_ent->retaddr ==
> +		 (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> +		print_retaddr = false;
>   
> -done:
> -	if (leaf) {
> -		if (hex_format || (err_code == 0))
> -			trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
> -					func, retval);
> +	if (graph_ent) {
> +		trace_seq_printf(s, "%ps();", func);
> +		if (print_retval || print_retaddr)
> +			trace_seq_puts(s, " /*");
>   		else
> -			trace_seq_printf(s, "%ps(); /* = %ld */\n",
> -					func, err_code);
> +			trace_seq_putc(s, '\n');
>   	} else {
> +		print_retaddr = false;
> +		trace_seq_printf(s, "} /* %ps", func);
> +	}
> +
> +	if (print_retval) {
>   		if (hex_format || (err_code == 0))
> -			trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
> -					func, retval);
> +			trace_seq_printf(s, " V=0x%lx", retval);
>   		else
> -			trace_seq_printf(s, "} /* %ps = %ld */\n",
> -					func, err_code);
> +			trace_seq_printf(s, " V=%ld", err_code);
>   	}
> +
> +	if (print_retaddr)
> +		print_graph_retaddr(s, graph_ent, trace_flags, false);
> +
> +	if (!graph_ent || print_retval || print_retaddr)
> +		trace_seq_puts(s, " */\n");
>   }
>   
>   #else
>   
>   #define __TRACE_GRAPH_PRINT_RETVAL 0
> +#define __TRACE_GRAPH_PRINT_RETADDR 0
>   
> -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
> +#define print_graph_retval(_seq, _ent, _ret, _opt_flags, _trace_flags) do {} while (0)
>   
>   #endif
>   
> @@ -746,9 +784,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
>   	 * Write out the function return value if the option function-retval is
>   	 * enabled.
>   	 */
> -	if (flags & __TRACE_GRAPH_PRINT_RETVAL)
> -		print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
> -				!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> +	if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR))
> +		print_graph_retval(s, call, graph_ret, flags, tr->trace_flags);
>   	else
>   		trace_seq_printf(s, "%ps();\n", (void *)call->func);
>   
> @@ -788,7 +825,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
>   	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
>   		trace_seq_putc(s, ' ');
>   
> -	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
> +	trace_seq_printf(s, "%ps() {", (void *)call->func);
> +	if (flags & __TRACE_GRAPH_PRINT_RETADDR)
> +		print_graph_retaddr(s, call, tr->trace_flags, true);
> +	trace_seq_putc(s, '\n');
>   
>   	if (trace_seq_has_overflowed(s))
>   		return TRACE_TYPE_PARTIAL_LINE;
> @@ -1032,9 +1072,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
>   	 * Always write out the function name and its return value if the
>   	 * function-retval option is enabled.
>   	 */
> -	if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
> -		print_graph_retval(s, trace->retval, false, (void *)trace->func,
> -			!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> +	if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
> +		print_graph_retval(s, NULL, trace, flags, tr->trace_flags);
>   	} else {
>   		/*
>   		 * If the return function does not have a matching entry,
Re: [RFC PATCH] function_graph: Support recording and printing the function return address
Posted by Donglin Peng 2 months, 3 weeks ago
On Mon, Sep 9, 2024 at 9:58 AM Song Chen <chensong_2000@189.cn> wrote:
>
> it's a practical improvement for me, save time to read code. btw, how is
> the overhead?

Thank you. I believe that the overhead can be almost disregarded, with the
 exception of adding an unsigned long member to the ftrace_graph_ent.

>
> /Song
>
> 在 2024/9/8 22:25, Donglin Peng 写道:
> > When using function_graph tracer to analyze the flow of kernel function
> > execution, it is often necessary to quickly locate the exact line of code
> > where the call occurs. While this may be easy at times, it can be more
> > time-consuming when some functions are inlined or the flow is too long.
> >
> > This feature aims to simplify the process by recording the return address
> > of traced funcions and printing it when outputing trace logs.
> >
> > To distinguish the return value, 'V=' is used as the prefix for the kernel
> > return value, and 'A=' is used as the prefix for the return address in trace
> > logs. A new trace option named 'funcgraph-retaddr' has been added, and the
> > option 'sym-addr' can control the format of the return address.
> >
> > See below logs with both funcgraph-retval and funcgraph-retaddr enabled.
> >
> > 4)               |  load_elf_binary() { /* A=bprm_execve+0x249/0x600 */
> > 4)               |    load_elf_phdrs() { /* A=load_elf_binary+0x84/0x1730 */
> > 4)               |      __kmalloc_noprof() { /* A=load_elf_phdrs+0x4a/0xb0 */
> > 4) + 47.910 us   |        __cond_resched(); /* V=0x0 A=__kmalloc_noprof+0x28c/0x390 */
> > 4) ! 204.142 us  |      } /* __kmalloc_noprof V=0xffff888201e32c00 */
> > 4)               |      kernel_read() { /* A=load_elf_phdrs+0x6c/0xb0 */
> > 4)               |        rw_verify_area() { /* A=kernel_read+0x2b/0x50 */
> > 4)               |          security_file_permission() {
> > 4)               |            selinux_file_permission() { /* A=security_file_permission+0x26/0x40 */
> > 4)               |              __inode_security_revalidate() { /* A=selinux_file_permission+0x6d/0x140 */
> > 4)   1.182 us    |                __cond_resched(); /* V=0x0 A=__inode_security_revalidate+0x5f/0x80 */
> > 4)   4.138 us    |              } /* __inode_security_revalidate V=0x0 */
> > 4)   1.513 us    |              avc_policy_seqno(); /* V=0x0 A=selinux_file_permission+0x107/0x140 */
> > 4) + 12.133 us   |            } /* selinux_file_permission V=0x0 */
> > 4) + 39.834 us   |          } /* security_file_permission V=0x0 */
> > 4) + 42.710 us   |        } /* rw_verify_area V=0x0 */
> >
> > Then, we can use the faddr2line to locate the source code, for example:
> >
> > $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
> > load_elf_phdrs+0x6c/0xb0:
> > elf_read at fs/binfmt_elf.c:471
> > (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531
> >
> > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
> > ---
> >   include/linux/ftrace.h               |   1 +
> >   kernel/trace/fgraph.c                |   1 +
> >   kernel/trace/trace.h                 |   1 +
> >   kernel/trace/trace_entries.h         |  19 ++++-
> >   kernel/trace/trace_functions_graph.c | 105 ++++++++++++++++++---------
> >   5 files changed, 92 insertions(+), 35 deletions(-)
> >
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index fd5e84d0ec47..bdf51163b3b8 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -1011,6 +1011,7 @@ static inline void ftrace_init(void) { }
> >    */
> >   struct ftrace_graph_ent {
> >       unsigned long func; /* Current function */
> > +     unsigned long retaddr;  /* Return address */
> >       int depth;
> >   } __packed;
> >
> > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > index d7d4fb403f6f..fcc4162c10f6 100644
> > --- a/kernel/trace/fgraph.c
> > +++ b/kernel/trace/fgraph.c
> > @@ -622,6 +622,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
> >
> >       trace.func = func;
> >       trace.depth = ++current->curr_ret_depth;
> > +     trace.retaddr = ret;
> >
> >       offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
> >       if (offset < 0)
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index bd3e3069300e..87e02815b030 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -870,6 +870,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
> >   #define TRACE_GRAPH_GRAPH_TIME          0x400
> >   #define TRACE_GRAPH_PRINT_RETVAL        0x800
> >   #define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
> > +#define TRACE_GRAPH_PRINT_RETADDR       0x2000
> >   #define TRACE_GRAPH_PRINT_FILL_SHIFT        28
> >   #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
> >
> > diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> > index c47422b20908..8b8753319dd3 100644
> > --- a/kernel/trace/trace_entries.h
> > +++ b/kernel/trace/trace_entries.h
> > @@ -71,6 +71,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
> >       perf_ftrace_event_register
> >   );
> >
> > +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> >   /* Function call entry */
> >   FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >
> > @@ -79,6 +80,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >       F_STRUCT(
> >               __field_struct( struct ftrace_graph_ent,        graph_ent       )
> >               __field_packed( unsigned long,  graph_ent,      func            )
> > +             __field_packed( unsigned long,  graph_ent,      retaddr         )
> >               __field_packed( int,            graph_ent,      depth           )
> >       ),
> >
> > @@ -86,8 +88,6 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> >   );
> >
> >   /* Function return entry */
> > -#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> > -
> >   FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >       TRACE_GRAPH_RET,
> > @@ -110,6 +110,21 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >   #else
> >
> > +/* Function call entry */
> > +FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
> > +
> > +     TRACE_GRAPH_ENT,
> > +
> > +     F_STRUCT(
> > +             __field_struct( struct ftrace_graph_ent,        graph_ent       )
> > +             __field_packed( unsigned long,  graph_ent,      func            )
> > +             __field_packed( int,            graph_ent,      depth           )
> > +     ),
> > +
> > +     F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
> > +);
> > +
> > +/* Function return entry */
> >   FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> >
> >       TRACE_GRAPH_RET,
> > diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> > index 13d0387ac6a6..655535d57763 100644
> > --- a/kernel/trace/trace_functions_graph.c
> > +++ b/kernel/trace/trace_functions_graph.c
> > @@ -63,6 +63,8 @@ static struct tracer_opt trace_opts[] = {
> >       { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
> >       /* Display function return value in hexadecimal format ? */
> >       { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
> > +     /* Display function return address ? */
> > +     { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
> >   #endif
> >       /* Include sleep time (scheduled out) between entry and return */
> >       { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
> > @@ -651,50 +653,86 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
> >   #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
> >
> >   #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
> > +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
> >
> > -static void print_graph_retval(struct trace_seq *s, unsigned long retval,
> > -                             bool leaf, void *func, bool hex_format)
> > +static bool print_graph_retaddr(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> > +                             u32 trace_flags, bool comment)
> >   {
> > -     unsigned long err_code = 0;
> > +     if (unlikely(graph_ent->retaddr ==
> > +              (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> > +             return false;
> >
> > -     if (retval == 0 || hex_format)
> > -             goto done;
> > +     if (comment)
> > +             trace_seq_puts(s, " /*");
> >
> > -     /* Check if the return value matches the negative format */
> > -     if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> > -             (((u64)retval) >> 32) == 0) {
> > -             /* sign extension */
> > -             err_code = (unsigned long)(s32)retval;
> > -     } else {
> > -             err_code = retval;
> > +     trace_seq_puts(s, " A=");
> > +     seq_print_ip_sym(s, graph_ent->retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
> > +
> > +     if (comment)
> > +             trace_seq_puts(s, " */");
> > +
> > +     return true;
> > +}
> > +
> > +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent *graph_ent,
> > +                             struct ftrace_graph_ret *graph_ret,
> > +                             u32 opt_flags, u32 trace_flags)
> > +{
> > +     unsigned long err_code = 0;
> > +     unsigned long retval = graph_ret->retval;
> > +     bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
> > +     bool print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
> > +     bool print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
> > +     void *func = (void *)graph_ret->func;
> > +
> > +     if (print_retval && retval && !hex_format) {
> > +             /* Check if the return value matches the negative format */
> > +             if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
> > +                     (((u64)retval) >> 32) == 0) {
> > +                     err_code = sign_extend64(retval, 31);
> > +             } else {
> > +                     err_code = retval;
> > +             }
> > +
> > +             if (!IS_ERR_VALUE(err_code))
> > +                     err_code = 0;
> >       }
> >
> > -     if (!IS_ERR_VALUE(err_code))
> > -             err_code = 0;
> > +     if (print_retaddr && graph_ent && unlikely(graph_ent->retaddr ==
> > +              (unsigned long)dereference_kernel_function_descriptor(return_to_handler)))
> > +             print_retaddr = false;
> >
> > -done:
> > -     if (leaf) {
> > -             if (hex_format || (err_code == 0))
> > -                     trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
> > -                                     func, retval);
> > +     if (graph_ent) {
> > +             trace_seq_printf(s, "%ps();", func);
> > +             if (print_retval || print_retaddr)
> > +                     trace_seq_puts(s, " /*");
> >               else
> > -                     trace_seq_printf(s, "%ps(); /* = %ld */\n",
> > -                                     func, err_code);
> > +                     trace_seq_putc(s, '\n');
> >       } else {
> > +             print_retaddr = false;
> > +             trace_seq_printf(s, "} /* %ps", func);
> > +     }
> > +
> > +     if (print_retval) {
> >               if (hex_format || (err_code == 0))
> > -                     trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
> > -                                     func, retval);
> > +                     trace_seq_printf(s, " V=0x%lx", retval);
> >               else
> > -                     trace_seq_printf(s, "} /* %ps = %ld */\n",
> > -                                     func, err_code);
> > +                     trace_seq_printf(s, " V=%ld", err_code);
> >       }
> > +
> > +     if (print_retaddr)
> > +             print_graph_retaddr(s, graph_ent, trace_flags, false);
> > +
> > +     if (!graph_ent || print_retval || print_retaddr)
> > +             trace_seq_puts(s, " */\n");
> >   }
> >
> >   #else
> >
> >   #define __TRACE_GRAPH_PRINT_RETVAL 0
> > +#define __TRACE_GRAPH_PRINT_RETADDR 0
> >
> > -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
> > +#define print_graph_retval(_seq, _ent, _ret, _opt_flags, _trace_flags) do {} while (0)
> >
> >   #endif
> >
> > @@ -746,9 +784,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
> >        * Write out the function return value if the option function-retval is
> >        * enabled.
> >        */
> > -     if (flags & __TRACE_GRAPH_PRINT_RETVAL)
> > -             print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
> > -                             !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> > +     if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR))
> > +             print_graph_retval(s, call, graph_ret, flags, tr->trace_flags);
> >       else
> >               trace_seq_printf(s, "%ps();\n", (void *)call->func);
> >
> > @@ -788,7 +825,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
> >       for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
> >               trace_seq_putc(s, ' ');
> >
> > -     trace_seq_printf(s, "%ps() {\n", (void *)call->func);
> > +     trace_seq_printf(s, "%ps() {", (void *)call->func);
> > +     if (flags & __TRACE_GRAPH_PRINT_RETADDR)
> > +             print_graph_retaddr(s, call, tr->trace_flags, true);
> > +     trace_seq_putc(s, '\n');
> >
> >       if (trace_seq_has_overflowed(s))
> >               return TRACE_TYPE_PARTIAL_LINE;
> > @@ -1032,9 +1072,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
> >        * Always write out the function name and its return value if the
> >        * function-retval option is enabled.
> >        */
> > -     if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
> > -             print_graph_retval(s, trace->retval, false, (void *)trace->func,
> > -                     !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
> > +     if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
> > +             print_graph_retval(s, NULL, trace, flags, tr->trace_flags);
> >       } else {
> >               /*
> >                * If the return function does not have a matching entry,