kernel/rcu/rcu.h | 5 ++++- kernel/rcu/tree_stall.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-)
During some recent expedited RCU testing with rcutorture, I noticed RCU
stalls when using ftrace_dump() to analyze traces. The issue that since
ftrace_dump() can take a long time, any accumulated issue can show up
just after the stall is unsupressed. Due to this a stall can be
detected and reported.
The problem is something like this:
1. rcu_ftrace_dump_stall_suppress() sets suppression value.
2. ftrace_dump() runs for a long time.
3. rcu_ftrace_dump_stall_unsuppress() clears suppression.
4. Immediately later, stall is detected.
Fix this by calling rcu_cpu_stall_reset() when clearing suppression.
This resets jiffies_stall to ULONG_MAX, preventing the issue.
Tested with gp_exp=true enabled with rcutorture and ftrace_dump() and no
stall is reported, with only clean traces in the output.
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
This is a non-urgent fix and could go in either this or the next merge
window (Boqun's call).
kernel/rcu/rcu.h | 5 ++++-
kernel/rcu/tree_stall.h | 1 +
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9cf01832a6c3..87fcf528b9ac 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -288,6 +288,7 @@ extern int rcu_cpu_stall_cputime;
extern bool rcu_exp_stall_task_details __read_mostly;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
+void rcu_cpu_stall_reset(void);
static inline bool rcu_stall_is_suppressed(void)
{
@@ -302,8 +303,10 @@ do { \
#define rcu_ftrace_dump_stall_unsuppress() \
do { \
- if (rcu_cpu_stall_suppress == 3) \
+ if (rcu_cpu_stall_suppress == 3) { \
rcu_cpu_stall_suppress = 0; \
+ rcu_cpu_stall_reset(); \
+ } \
} while (0)
#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b67532cb8770..9a036fc27fed 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -192,6 +192,7 @@ void rcu_cpu_stall_reset(void)
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3);
WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX);
}
+EXPORT_SYMBOL_GPL(rcu_cpu_stall_reset);
//////////////////////////////////////////////////////////////////////////////
//
--
2.34.1
On Sun, Jan 04, 2026 at 03:36:42PM -0500, Joel Fernandes wrote:
> During some recent expedited RCU testing with rcutorture, I noticed RCU
> stalls when using ftrace_dump() to analyze traces. The issue that since
> ftrace_dump() can take a long time, any accumulated issue can show up
> just after the stall is unsupressed. Due to this a stall can be
> detected and reported.
>
> The problem is something like this:
>
> 1. rcu_ftrace_dump_stall_suppress() sets suppression value.
>
> 2. ftrace_dump() runs for a long time.
>
> 3. rcu_ftrace_dump_stall_unsuppress() clears suppression.
>
> 4. Immediately later, stall is detected.
>
> Fix this by calling rcu_cpu_stall_reset() when clearing suppression.
> This resets jiffies_stall to ULONG_MAX, preventing the issue.
>
> Tested with gp_exp=true enabled with rcutorture and ftrace_dump() and no
> stall is reported, with only clean traces in the output.
>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Good catch!
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> This is a non-urgent fix and could go in either this or the next merge
> window (Boqun's call).
>
> kernel/rcu/rcu.h | 5 ++++-
> kernel/rcu/tree_stall.h | 1 +
> 2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 9cf01832a6c3..87fcf528b9ac 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -288,6 +288,7 @@ extern int rcu_cpu_stall_cputime;
> extern bool rcu_exp_stall_task_details __read_mostly;
> int rcu_jiffies_till_stall_check(void);
> int rcu_exp_jiffies_till_stall_check(void);
> +void rcu_cpu_stall_reset(void);
>
> static inline bool rcu_stall_is_suppressed(void)
> {
> @@ -302,8 +303,10 @@ do { \
>
> #define rcu_ftrace_dump_stall_unsuppress() \
> do { \
> - if (rcu_cpu_stall_suppress == 3) \
> + if (rcu_cpu_stall_suppress == 3) { \
> rcu_cpu_stall_suppress = 0; \
> + rcu_cpu_stall_reset(); \
> + } \
> } while (0)
>
> #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index b67532cb8770..9a036fc27fed 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -192,6 +192,7 @@ void rcu_cpu_stall_reset(void)
> WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3);
> WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX);
> }
> +EXPORT_SYMBOL_GPL(rcu_cpu_stall_reset);
>
> //////////////////////////////////////////////////////////////////////////////
> //
> --
> 2.34.1
>
© 2016 - 2026 Red Hat, Inc.