include/linux/ftrace.h | 2 ++ kernel/trace/trace_entries.h | 8 ++++---- kernel/trace/trace_functions_graph.c | 19 +++++++++---------- 3 files changed, 15 insertions(+), 14 deletions(-)
This issue is reproducible on the QEMU vexpress-a9 platform with the
following steps:
Step 1: Launch the QEMU vexpress-a9 VM
~# qemu-system-arm -M vexpress-a9 -smp 1 -m 256 -kernel zImage -dtb \
vexpress-v2p-ca9.dtb -drive file=rootfs.ext2,if=sd,format=raw \
-append "console=ttyAMA0,115200 rootwait" -net nic,model=lan9118 \
-net user -serial stdio
Step 2: Capture and view the trace with trace-cmd
~# trace-cmd record -p function_graph -g do_open_execat \
--max-graph-depth 1 dd if=/dev/zero of=/tmp/1.bin \
bs=1M count=1
~# trace-cmd report
Step 3: Observed behavior
Before:
dd-89 ... 5.764026: funcgraph_entry: $ 2757369004 us | do_open_execat()
dd-89 ... 5.765222: funcgraph_entry: $ 2034790547 us | do_open_execat();
After:
dd-91 ... 891.945577: funcgraph_entry: ! 185.542 us | do_open_execat();
dd-91 ... 891.946840: funcgraph_entry: + 98.625 us | do_open_execat();
Signed-off-by: jempty.liang <imntjempty@163.com>
---
include/linux/ftrace.h | 2 ++
kernel/trace/trace_entries.h | 8 ++++----
kernel/trace/trace_functions_graph.c | 19 +++++++++----------
3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3a8989e3268..52727a342273 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1191,6 +1191,8 @@ struct ftrace_graph_ret {
int depth;
/* Number of functions that overran the depth limit for current task */
unsigned int overrun;
+ unsigned long long calltime;
+ unsigned long long rettime;
} __packed;
struct fgraph_ops;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index f6a8d29c0d76..362a757e65a2 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -127,8 +127,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, retval )
__field_packed( unsigned int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field(unsigned long long, calltime )
- __field(unsigned long long, rettime )
+ __field_packed(unsigned long long, ret, calltime)
+ __field_packed(unsigned long long, ret, rettime)
),
F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u retval: %lx",
@@ -149,8 +149,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, func )
__field_packed( unsigned int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field(unsigned long long, calltime )
- __field(unsigned long long, rettime )
+ __field_packed(unsigned long long, ret, calltime)
+ __field_packed(unsigned long long, ret, rettime)
),
F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u",
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 1de6f1573621..5798aeb31f05 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -317,6 +317,8 @@ __trace_graph_function(struct trace_array *tr,
struct ftrace_graph_ret ret = {
.func = ip,
.depth = 0,
+ .calltime = time,
+ .rettime = time,
};
__trace_graph_entry(tr, &ent, trace_ctx);
@@ -346,8 +348,8 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
- entry->calltime = calltime;
- entry->rettime = rettime;
+ entry->ret.calltime = calltime;
+ entry->ret.rettime = rettime;
trace_buffer_unlock_commit_nostack(buffer, event);
}
@@ -372,10 +374,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
struct trace_array *tr = gops->private;
struct fgraph_times *ftimes;
unsigned int trace_ctx;
- u64 calltime, rettime;
int size;
- rettime = trace_clock_local();
+ trace->rettime = trace_clock_local();
ftrace_graph_addr_finish(gops, trace);
@@ -390,10 +391,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
handle_nosleeptime(tr, trace, ftimes, size);
- calltime = ftimes->calltime;
+ trace->calltime = ftimes->calltime;
trace_ctx = tracing_gen_ctx();
- __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
+ __trace_graph_return(tr, trace, trace_ctx, trace->calltime, trace->rettime);
}
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
@@ -956,7 +957,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
- duration = ret_entry->rettime - ret_entry->calltime;
+ duration = graph_ret->rettime - graph_ret->calltime;
if (data) {
struct fgraph_cpu_data *cpu_data;
@@ -1280,9 +1281,7 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
u32 flags)
{
struct ftrace_graph_ret *trace = &retentry->ret;
- u64 calltime = retentry->calltime;
- u64 rettime = retentry->rettime;
- unsigned long long duration = rettime - calltime;
+ unsigned long long duration = trace->rettime - trace->calltime;
struct fgraph_data *data = iter->private;
struct trace_array *tr = iter->tr;
unsigned long func;
--
2.25.1
On Fri, 30 Jan 2026 01:57:40 +0000
"jempty.liang" <imntjempty@163.com> wrote:
> This issue is reproducible on the QEMU vexpress-a9 platform with the
> following steps:
>
> Step 1: Launch the QEMU vexpress-a9 VM
How is this unique to arm? I'm not able to duplicate this on x86, but the
code here is all generic code. What's different?
>
> ~# qemu-system-arm -M vexpress-a9 -smp 1 -m 256 -kernel zImage -dtb \
> vexpress-v2p-ca9.dtb -drive file=rootfs.ext2,if=sd,format=raw \
> -append "console=ttyAMA0,115200 rootwait" -net nic,model=lan9118 \
> -net user -serial stdio
>
> Step 2: Capture and view the trace with trace-cmd
>
> ~# trace-cmd record -p function_graph -g do_open_execat \
> --max-graph-depth 1 dd if=/dev/zero of=/tmp/1.bin \
> bs=1M count=1
> ~# trace-cmd report
Note, you are using trace-cmd that records the raw data directly thus some
of the updates to the prints below wouldn't affect this.
>
> Step 3: Observed behavior
>
> Before:
>
> dd-89 ... 5.764026: funcgraph_entry: $ 2757369004 us | do_open_execat()
> dd-89 ... 5.765222: funcgraph_entry: $ 2034790547 us | do_open_execat();
>
> After:
>
> dd-91 ... 891.945577: funcgraph_entry: ! 185.542 us | do_open_execat();
> dd-91 ... 891.946840: funcgraph_entry: + 98.625 us | do_open_execat();
You show what the wrong output is and the correct output, but you don't
mention what was the bug and how this fixes it. From the change log, it
looks like you just tried something and it worked, but do not know why it
worked.
I'd like to know what the bug was and how this fixes it. It's not obvious.
>
> Signed-off-by: jempty.liang <imntjempty@163.com>
> ---
> include/linux/ftrace.h | 2 ++
> kernel/trace/trace_entries.h | 8 ++++----
> kernel/trace/trace_functions_graph.c | 19 +++++++++----------
> 3 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index a3a8989e3268..52727a342273 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -1191,6 +1191,8 @@ struct ftrace_graph_ret {
> int depth;
> /* Number of functions that overran the depth limit for current task */
> unsigned int overrun;
> + unsigned long long calltime;
> + unsigned long long rettime;
> } __packed;
>
> struct fgraph_ops;
> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index f6a8d29c0d76..362a757e65a2 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -127,8 +127,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> __field_packed( unsigned long, ret, retval )
> __field_packed( unsigned int, ret, depth )
> __field_packed( unsigned int, ret, overrun )
> - __field(unsigned long long, calltime )
> - __field(unsigned long long, rettime )
> + __field_packed(unsigned long long, ret, calltime)
> + __field_packed(unsigned long long, ret, rettime)
> ),
>
> F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u retval: %lx",
> @@ -149,8 +149,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
> __field_packed( unsigned long, ret, func )
> __field_packed( unsigned int, ret, depth )
> __field_packed( unsigned int, ret, overrun )
> - __field(unsigned long long, calltime )
> - __field(unsigned long long, rettime )
> + __field_packed(unsigned long long, ret, calltime)
> + __field_packed(unsigned long long, ret, rettime)
> ),
>
> F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u",
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 1de6f1573621..5798aeb31f05 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -317,6 +317,8 @@ __trace_graph_function(struct trace_array *tr,
> struct ftrace_graph_ret ret = {
> .func = ip,
> .depth = 0,
> + .calltime = time,
> + .rettime = time,
If you assign here, then you should change the line below this code.
> };
>
> __trace_graph_entry(tr, &ent, trace_ctx);
The next line is this:
__trace_graph_return(tr, &ret, trace_ctx, time, time);
Where that function has:
entry->ret = *trace;
entry->ret.calltime = calltime;
entry->ret.rettime = rettime;
That is, ret is assigned "*trace" which includes the calltime and rettime,
and now it's doing more work to override it with the current data?
> @@ -346,8 +348,8 @@ void __trace_graph_return(struct trace_array *tr,
> return;
> entry = ring_buffer_event_data(event);
> entry->ret = *trace;
> - entry->calltime = calltime;
> - entry->rettime = rettime;
> + entry->ret.calltime = calltime;
> + entry->ret.rettime = rettime;
> trace_buffer_unlock_commit_nostack(buffer, event);
> }
>
> @@ -372,10 +374,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
> struct trace_array *tr = gops->private;
> struct fgraph_times *ftimes;
> unsigned int trace_ctx;
> - u64 calltime, rettime;
> int size;
>
> - rettime = trace_clock_local();
> + trace->rettime = trace_clock_local();
>
> ftrace_graph_addr_finish(gops, trace);
>
> @@ -390,10 +391,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
>
> handle_nosleeptime(tr, trace, ftimes, size);
>
> - calltime = ftimes->calltime;
> + trace->calltime = ftimes->calltime;
>
> trace_ctx = tracing_gen_ctx();
> - __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
> + __trace_graph_return(tr, trace, trace_ctx, trace->calltime, trace->rettime);
And here again, the passed in calltime and rettime is just being
overwritten.
> }
>
> static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
> @@ -956,7 +957,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
>
> graph_ret = &ret_entry->ret;
> call = &entry->graph_ent;
> - duration = ret_entry->rettime - ret_entry->calltime;
> + duration = graph_ret->rettime - graph_ret->calltime;
>
> if (data) {
> struct fgraph_cpu_data *cpu_data;
> @@ -1280,9 +1281,7 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
> u32 flags)
> {
> struct ftrace_graph_ret *trace = &retentry->ret;
> - u64 calltime = retentry->calltime;
> - u64 rettime = retentry->rettime;
> - unsigned long long duration = rettime - calltime;
> + unsigned long long duration = trace->rettime - trace->calltime;
> struct fgraph_data *data = iter->private;
> struct trace_array *tr = iter->tr;
> unsigned long func;
Now moving rettime and calltime into ftrace_graph_ret may be a nice
cleanup, but you are saying this is a fix. What was the bug, and what did
this fix?
-- Steve
Hi, stevven
Thanks for your reply,
At 2026-01-31 04:55:48, "Steven Rostedt" <rostedt@goodmis.org> wrote:
>On Fri, 30 Jan 2026 01:57:40 +0000
>"jempty.liang" <imntjempty@163.com> wrote:
>
>> This issue is reproducible on the QEMU vexpress-a9 platform with the
>> following steps:
>>
>> Step 1: Launch the QEMU vexpress-a9 VM
>
>How is this unique to arm? I'm not able to duplicate this on x86, but the
>code here is all generic code. What's different?
>
You're right, this bug is only reproducible on ARM.
>>
>> ~# qemu-system-arm -M vexpress-a9 -smp 1 -m 256 -kernel zImage -dtb \
>> vexpress-v2p-ca9.dtb -drive file=rootfs.ext2,if=sd,format=raw \
>> -append "console=ttyAMA0,115200 rootwait" -net nic,model=lan9118 \
>> -net user -serial stdio
>>
>> Step 2: Capture and view the trace with trace-cmd
>>
>> ~# trace-cmd record -p function_graph -g do_open_execat \
>> --max-graph-depth 1 dd if=/dev/zero of=/tmp/1.bin \
>> bs=1M count=1
>> ~# trace-cmd report
>
>Note, you are using trace-cmd that records the raw data directly thus some
>of the updates to the prints below wouldn't affect this.
>
>>
>> Step 3: Observed behavior
>>
>> Before:
>>
>> dd-89 ... 5.764026: funcgraph_entry: $ 2757369004 us | do_open_execat()
>> dd-89 ... 5.765222: funcgraph_entry: $ 2034790547 us | do_open_execat();
>>
>> After:
>>
>> dd-91 ... 891.945577: funcgraph_entry: ! 185.542 us | do_open_execat();
>> dd-91 ... 891.946840: funcgraph_entry: + 98.625 us | do_open_execat();
>
>You show what the wrong output is and the correct output, but you don't
>mention what was the bug and how this fixes it. From the change log, it
>looks like you just tried something and it worked, but do not know why it
>worked.
>
>I'd like to know what the bug was and how this fixes it. It's not obvious.
>
Before the patch, do_open_execat had an abnormal execution duration
of 2757369004 us (over 1 second); the patch resolves this and restores the
function to a normal duration range.
>>
>> Signed-off-by: jempty.liang <imntjempty@163.com>
>> ---
>> include/linux/ftrace.h | 2 ++
>> kernel/trace/trace_entries.h | 8 ++++----
>> kernel/trace/trace_functions_graph.c | 19 +++++++++----------
>> 3 files changed, 15 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index a3a8989e3268..52727a342273 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -1191,6 +1191,8 @@ struct ftrace_graph_ret {
>> int depth;
>> /* Number of functions that overran the depth limit for current task */
>> unsigned int overrun;
>> + unsigned long long calltime;
>> + unsigned long long rettime;
>> } __packed;
>>
>> struct fgraph_ops;
>> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
>> index f6a8d29c0d76..362a757e65a2 100644
>> --- a/kernel/trace/trace_entries.h
>> +++ b/kernel/trace/trace_entries.h
>> @@ -127,8 +127,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>> __field_packed( unsigned long, ret, retval )
>> __field_packed( unsigned int, ret, depth )
>> __field_packed( unsigned int, ret, overrun )
>> - __field(unsigned long long, calltime )
>> - __field(unsigned long long, rettime )
>> + __field_packed(unsigned long long, ret, calltime)
>> + __field_packed(unsigned long long, ret, rettime)
>> ),
>>
>> F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u retval: %lx",
>> @@ -149,8 +149,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
>> __field_packed( unsigned long, ret, func )
>> __field_packed( unsigned int, ret, depth )
>> __field_packed( unsigned int, ret, overrun )
>> - __field(unsigned long long, calltime )
>> - __field(unsigned long long, rettime )
>> + __field_packed(unsigned long long, ret, calltime)
>> + __field_packed(unsigned long long, ret, rettime)
>> ),
>>
>> F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u",
>> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
>> index 1de6f1573621..5798aeb31f05 100644
>> --- a/kernel/trace/trace_functions_graph.c
>> +++ b/kernel/trace/trace_functions_graph.c
>> @@ -317,6 +317,8 @@ __trace_graph_function(struct trace_array *tr,
>> struct ftrace_graph_ret ret = {
>> .func = ip,
>> .depth = 0,
>> + .calltime = time,
>> + .rettime = time,
>
>If you assign here, then you should change the line below this code.
>
>> };
>>
>> __trace_graph_entry(tr, &ent, trace_ctx);
>
>The next line is this:
>
> __trace_graph_return(tr, &ret, trace_ctx, time, time);
>
>Where that function has:
>
> entry->ret = *trace;
> entry->ret.calltime = calltime;
> entry->ret.rettime = rettime;
>
>That is, ret is assigned "*trace" which includes the calltime and rettime,
>and now it's doing more work to override it with the current data?
>
Thanks for the thorough review — this is a good point, I'll look into this
and adjust accordingly.
>> @@ -346,8 +348,8 @@ void __trace_graph_return(struct trace_array *tr,
>> return;
>> entry = ring_buffer_event_data(event);
>> entry->ret = *trace;
>> - entry->calltime = calltime;
>> - entry->rettime = rettime;
>> + entry->ret.calltime = calltime;
>> + entry->ret.rettime = rettime;
>> trace_buffer_unlock_commit_nostack(buffer, event);
>> }
>>
>> @@ -372,10 +374,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
>> struct trace_array *tr = gops->private;
>> struct fgraph_times *ftimes;
>> unsigned int trace_ctx;
>> - u64 calltime, rettime;
>> int size;
>>
>> - rettime = trace_clock_local();
>> + trace->rettime = trace_clock_local();
>>
>> ftrace_graph_addr_finish(gops, trace);
>>
>> @@ -390,10 +391,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
>>
>> handle_nosleeptime(tr, trace, ftimes, size);
>>
>> - calltime = ftimes->calltime;
>> + trace->calltime = ftimes->calltime;
>>
>> trace_ctx = tracing_gen_ctx();
>> - __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
>> + __trace_graph_return(tr, trace, trace_ctx, trace->calltime, trace->rettime);
>
>And here again, the passed in calltime and rettime is just being
>overwritten.
>
>
Thanks for reviewing! Will address this point.
>> }
>>
>> static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
>> @@ -956,7 +957,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
>>
>> graph_ret = &ret_entry->ret;
>> call = &entry->graph_ent;
>> - duration = ret_entry->rettime - ret_entry->calltime;
>> + duration = graph_ret->rettime - graph_ret->calltime;
>>
>> if (data) {
>> struct fgraph_cpu_data *cpu_data;
>> @@ -1280,9 +1281,7 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
>> u32 flags)
>> {
>> struct ftrace_graph_ret *trace = &retentry->ret;
>> - u64 calltime = retentry->calltime;
>> - u64 rettime = retentry->rettime;
>> - unsigned long long duration = rettime - calltime;
>> + unsigned long long duration = trace->rettime - trace->calltime;
>> struct fgraph_data *data = iter->private;
>> struct trace_array *tr = iter->tr;
>> unsigned long func;
>
>Now moving rettime and calltime into ftrace_graph_ret may be a nice
>cleanup, but you are saying this is a fix. What was the bug, and what did
>this fix?
>
Thanks for reviewing! Will address this point.
>-- Steve
On Sat, 31 Jan 2026 18:08:41 +0800 (CST) "jempty.liang" <imntjempty@163.com> wrote: > > > >How is this unique to arm? I'm not able to duplicate this on x86, but the > >code here is all generic code. What's different? > > > > You're right, this bug is only reproducible on ARM. You didn't answer my question. I understand that you said this only impacts ARM. I'm asking you why does it only impact ARM? There's nothing in this patch that is ARM specific? What is different about ARM that makes this code have an issue where it doesn't have an issue with x86? > > >You show what the wrong output is and the correct output, but you > >don't mention what was the bug and how this fixes it. From the > >change log, it looks like you just tried something and it worked, > >but do not know why it worked. > > > >I'd like to know what the bug was and how this fixes it. It's not > >obvious. > > > > Before the patch, do_open_execat had an abnormal execution duration > of 2757369004 us (over 1 second); the patch resolves this and > restores the function to a normal duration range. You still haven't answered my question. You are just telling me that the patch fixes it but you do not tell me what in the code was wrong and why the patch fixes it! -- Steve
© 2016 - 2026 Red Hat, Inc.