From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
function_graph_enter_regs() prevents itself from recursion by
ftrace_test_recursion_trylock(), but __ftrace_return_to_handler(),
which is called at the exit, does not prevent such recursion.
Therefore, while it can prevent recursive calls from
fgraph_ops::entryfunc(), it is not able to prevent recursive calls
to fgraph from fgraph_ops::retfunc(), resulting in a recursive loop.
This can lead an unexpected recursion bug reported by Menglong.
is_endbr() is called in __ftrace_return_to_handler -> fprobe_return
-> kprobe_multi_link_exit_handler -> is_endbr.
To fix this issue, acquire ftrace_test_recursion_trylock() in the
__ftrace_return_to_handler() after unwind the shadow stack to mark
this section must prevent recursive call of fgraph inside user-defined
fgraph_ops::retfunc().
This is essentially a fix to commit 4346ba160409 ("fprobe: Rewrite
fprobe on function-graph tracer"), because before that fgraph was
only used from the function graph tracer. Fprobe allowed user to run
any callbacks from fgraph after that commit.
Reported-by: Menglong Dong <menglong8.dong@gmail.com>
Closes: https://lore.kernel.org/all/20250918120939.1706585-1-dongml2@chinatelecom.cn/
Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
---
Changes in v2:
- Do not warn on failing ftrace_test_recursion_trylock() because it
allows one-level nest.
---
kernel/trace/fgraph.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 1e3b32b1e82c..484ad7a18463 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -815,6 +815,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
unsigned long bitmap;
unsigned long ret;
int offset;
+ int bit;
int i;
ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset);
@@ -829,6 +830,15 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
if (fregs)
ftrace_regs_set_instruction_pointer(fregs, ret);
+ bit = ftrace_test_recursion_trylock(trace.func, ret);
+ /*
+ * This can fail because ftrace_test_recursion_trylock() allows one nest
+ * call. If we are already in a nested call, then we don't probe this and
+ * just return the original return address.
+ */
+ if (unlikely(bit < 0))
+ goto out;
+
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = ftrace_regs_get_return_value(fregs);
#endif
@@ -852,6 +862,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
}
}
+ ftrace_test_recursion_unlock(bit);
+out:
/*
* The ftrace_graph_return() may still access the current
* ret_stack structure, we need to make sure the update of
Hi Steve, Can you pick this ? Or I will do? Thanks, On Mon, 22 Sep 2025 15:35:22 +0900 "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote: > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > function_graph_enter_regs() prevents itself from recursion by > ftrace_test_recursion_trylock(), but __ftrace_return_to_handler(), > which is called at the exit, does not prevent such recursion. > Therefore, while it can prevent recursive calls from > fgraph_ops::entryfunc(), it is not able to prevent recursive calls > to fgraph from fgraph_ops::retfunc(), resulting in a recursive loop. > This can lead an unexpected recursion bug reported by Menglong. > > is_endbr() is called in __ftrace_return_to_handler -> fprobe_return > -> kprobe_multi_link_exit_handler -> is_endbr. > > To fix this issue, acquire ftrace_test_recursion_trylock() in the > __ftrace_return_to_handler() after unwind the shadow stack to mark > this section must prevent recursive call of fgraph inside user-defined > fgraph_ops::retfunc(). > > This is essentially a fix to commit 4346ba160409 ("fprobe: Rewrite > fprobe on function-graph tracer"), because before that fgraph was > only used from the function graph tracer. Fprobe allowed user to run > any callbacks from fgraph after that commit. > > Reported-by: Menglong Dong <menglong8.dong@gmail.com> > Closes: https://lore.kernel.org/all/20250918120939.1706585-1-dongml2@chinatelecom.cn/ > Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") > Cc: stable@vger.kernel.org > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > Acked-by: Jiri Olsa <jolsa@kernel.org> > --- > Changes in v2: > - Do not warn on failing ftrace_test_recursion_trylock() because it > allows one-level nest. > --- > kernel/trace/fgraph.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c > index 1e3b32b1e82c..484ad7a18463 100644 > --- a/kernel/trace/fgraph.c > +++ b/kernel/trace/fgraph.c > @@ -815,6 +815,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > unsigned long bitmap; > unsigned long ret; > int offset; > + int bit; > int i; > > ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); > @@ -829,6 +830,15 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > if (fregs) > ftrace_regs_set_instruction_pointer(fregs, ret); > > + bit = ftrace_test_recursion_trylock(trace.func, ret); > + /* > + * This can fail because ftrace_test_recursion_trylock() allows one nest > + * call. If we are already in a nested call, then we don't probe this and > + * just return the original return address. > + */ > + if (unlikely(bit < 0)) > + goto out; > + > #ifdef CONFIG_FUNCTION_GRAPH_RETVAL > trace.retval = ftrace_regs_get_return_value(fregs); > #endif > @@ -852,6 +862,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > } > } > > + ftrace_test_recursion_unlock(bit); > +out: > /* > * The ftrace_graph_return() may still access the current > * ret_stack structure, we need to make sure the update of > -- Masami Hiramatsu (Google) <mhiramat@kernel.org>
On 2025/9/22 14:35 Masami Hiramatsu (Google) <mhiramat@kernel.org> write: > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > function_graph_enter_regs() prevents itself from recursion by > ftrace_test_recursion_trylock(), but __ftrace_return_to_handler(), > which is called at the exit, does not prevent such recursion. > Therefore, while it can prevent recursive calls from > fgraph_ops::entryfunc(), it is not able to prevent recursive calls > to fgraph from fgraph_ops::retfunc(), resulting in a recursive loop. > This can lead an unexpected recursion bug reported by Menglong. > > is_endbr() is called in __ftrace_return_to_handler -> fprobe_return > -> kprobe_multi_link_exit_handler -> is_endbr. > > To fix this issue, acquire ftrace_test_recursion_trylock() in the > __ftrace_return_to_handler() after unwind the shadow stack to mark > this section must prevent recursive call of fgraph inside user-defined > fgraph_ops::retfunc(). > > This is essentially a fix to commit 4346ba160409 ("fprobe: Rewrite > fprobe on function-graph tracer"), because before that fgraph was > only used from the function graph tracer. Fprobe allowed user to run > any callbacks from fgraph after that commit. > > Reported-by: Menglong Dong <menglong8.dong@gmail.com> > Closes: https://lore.kernel.org/all/20250918120939.1706585-1-dongml2@chinatelecom.cn/ > Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") > Cc: stable@vger.kernel.org > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > Acked-by: Jiri Olsa <jolsa@kernel.org> > --- > Changes in v2: > - Do not warn on failing ftrace_test_recursion_trylock() because it > allows one-level nest. > --- > kernel/trace/fgraph.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c > index 1e3b32b1e82c..484ad7a18463 100644 > --- a/kernel/trace/fgraph.c > +++ b/kernel/trace/fgraph.c > @@ -815,6 +815,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > unsigned long bitmap; > unsigned long ret; > int offset; > + int bit; > int i; The bpf bench testings work fine on this version :) Tested-by: Menglong Dong <menglong8.dong@gmail.com> Acked-by: Menglong Dong <menglong8.dong@gmail.com> Thanks! Menglong Dong > > ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); > @@ -829,6 +830,15 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > if (fregs) > ftrace_regs_set_instruction_pointer(fregs, ret); > > + bit = ftrace_test_recursion_trylock(trace.func, ret); > + /* > + * This can fail because ftrace_test_recursion_trylock() allows one nest > + * call. If we are already in a nested call, then we don't probe this and > + * just return the original return address. > + */ > + if (unlikely(bit < 0)) > + goto out; > + > #ifdef CONFIG_FUNCTION_GRAPH_RETVAL > trace.retval = ftrace_regs_get_return_value(fregs); > #endif > @@ -852,6 +862,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe > } > } > > + ftrace_test_recursion_unlock(bit); > +out: > /* > * The ftrace_graph_return() may still access the current > * ret_stack structure, we need to make sure the update of > > >
© 2016 - 2025 Red Hat, Inc.