[PATCH -next v2 2/4] rcu/nocb: Add warning if no rcuog wake up attempt happened during overload

Joel Fernandes posted 4 patches 3 weeks, 4 days ago
There is a newer version of this series
[PATCH -next v2 2/4] rcu/nocb: Add warning if no rcuog wake up attempt happened during overload
Posted by Joel Fernandes 3 weeks, 4 days ago
To be sure we have no rcog wake ups that were lost, add a warning
to cover the case where the rdp is overloaded with callbacks but
no wake up was attempted.

[applied Frederic's adjustment to clearing of nocb_gp_handling flag]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/tree.c      |  4 ++++
 kernel/rcu/tree.h      |  1 +
 kernel/rcu/tree_nocb.h | 11 ++++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2921ffb19939..958b61be87ea 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3767,6 +3767,10 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
 		debug_rcu_head_unqueue(&rdp->barrier_head);
 		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
 	}
+#ifdef CONFIG_RCU_NOCB_CPU
+	/* wake_nocb implies all CBs queued before were bypass/lazy. */
+	WARN_ON_ONCE(wake_nocb && !rdp->nocb_gp_handling);
+#endif
 	rcu_nocb_unlock(rdp);
 	if (wake_nocb)
 		wake_nocb_gp(rdp);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7dfc57e9adb1..af1d065e3215 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -257,6 +257,7 @@ struct rcu_data {
 	unsigned long nocb_gp_loops;	/* # passes through wait code. */
 	struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
 	bool nocb_cb_sleep;		/* Is the nocb CB thread asleep? */
+	bool nocb_gp_handling;		/* Is rcuog handling this rdp? */
 	struct task_struct *nocb_cb_kthread;
 	struct list_head nocb_head_rdp; /*
 					 * Head of rcu_data list in wakeup chain,
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index f525e4f7985b..acca24670a8c 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -546,6 +546,7 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 	lazy_len = READ_ONCE(rdp->lazy_len);
 	if (was_alldone) {
 		rdp->qlen_last_fqs_check = len;
+		rdp->nocb_gp_handling = true;
 		rcu_nocb_unlock(rdp);
 		// Only lazy CBs in bypass list
 		if (lazy_len && bypass_len == lazy_len) {
@@ -563,7 +564,8 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 
 		return;
 	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
-		/* ... or if many callbacks queued. */
+		/* Callback overload condition. */
+		WARN_ON_ONCE(!rdp->nocb_gp_handling);
 		rdp->qlen_last_fqs_check = len;
 		j = jiffies;
 		if (j != rdp->nocb_gp_adv_time &&
@@ -732,6 +734,12 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 			needwait_gp = true;
 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 					    TPS("NeedWaitGP"));
+		} else if (!rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
+			/*
+			 * No pending callbacks and no bypass callbacks.
+			 * The rcuog kthread is done handling this rdp.
+			 */
+			rdp->nocb_gp_handling = false;
 		}
 		if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
 			needwake = rdp->nocb_cb_sleep;
@@ -1254,6 +1262,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 			continue;
 		}
 		rcu_nocb_try_flush_bypass(rdp, jiffies);
+		rdp->nocb_gp_handling = true;
 		rcu_nocb_unlock_irqrestore(rdp, flags);
 		wake_nocb_gp(rdp);
 		sc->nr_to_scan -= _count;
-- 
2.34.1
Re: [PATCH -next v2 2/4] rcu/nocb: Add warning if no rcuog wake up attempt happened during overload
Posted by Frederic Weisbecker 3 weeks, 2 days ago
Le Wed, Jan 14, 2026 at 12:31:52PM -0500, Joel Fernandes a écrit :
> @@ -1254,6 +1262,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>  			continue;
>  		}
>  		rcu_nocb_try_flush_bypass(rdp, jiffies);
> +		rdp->nocb_gp_handling = true;

It should be true already, right?

>  		rcu_nocb_unlock_irqrestore(rdp, flags);
>  		wake_nocb_gp(rdp);
>  		sc->nr_to_scan -= _count;

Thanks.

-- 
Frederic Weisbecker
SUSE Labs
Re: [PATCH -next v2 2/4] rcu/nocb: Add warning if no rcuog wake up attempt happened during overload
Posted by Joel Fernandes 2 weeks, 6 days ago

On 1/16/2026 4:56 PM, Frederic Weisbecker wrote:
> Le Wed, Jan 14, 2026 at 12:31:52PM -0500, Joel Fernandes a écrit :
>> @@ -1254,6 +1262,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>>   			continue;
>>   		}
>>   		rcu_nocb_try_flush_bypass(rdp, jiffies);
>> +		rdp->nocb_gp_handling = true;
> 
> It should be true already, right?
> 
Yes! I will drop this hunk on respin, thanks.

  - Joel

Re: [PATCH -next v2 2/4] rcu/nocb: Add warning if no rcuog wake up attempt happened during overload
Posted by joelagnelf@nvidia.com 3 weeks, 2 days ago
> On Jan 14, 2026, at 12:32 PM, Joel Fernandes <joelagnelf@nvidia.com> wrote:
> To be sure we have no rcog wake ups that were lost, add a warning
> to cover the case where the rdp is overloaded with callbacks but
> no wake up was attempted.
> 
> [applied Frederic's adjustment to clearing of nocb_gp_handling flag]

Frederic,

Is it possible for you to do a quick review of these last few? They're mostly 
simple. It would be great if we can pick it for this upcoming merge window.

Thanks!

 - Joel 




> 
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
> kernel/rcu/tree.c      |  4 ++++
> kernel/rcu/tree.h      |  1 +
> kernel/rcu/tree_nocb.h | 11 ++++++++++-
> 3 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 2921ffb19939..958b61be87ea 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3767,6 +3767,10 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
>        debug_rcu_head_unqueue(&rdp->barrier_head);
>        rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
>    }
> +#ifdef CONFIG_RCU_NOCB_CPU
> +    /* wake_nocb implies all CBs queued before were bypass/lazy. */
> +    WARN_ON_ONCE(wake_nocb && !rdp->nocb_gp_handling);
> +#endif
>    rcu_nocb_unlock(rdp);
>    if (wake_nocb)
>        wake_nocb_gp(rdp);
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 7dfc57e9adb1..af1d065e3215 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -257,6 +257,7 @@ struct rcu_data {
>    unsigned long nocb_gp_loops;    /* # passes through wait code. */
>    struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
>    bool nocb_cb_sleep;        /* Is the nocb CB thread asleep? */
> +    bool nocb_gp_handling;        /* Is rcuog handling this rdp? */
>    struct task_struct *nocb_cb_kthread;
>    struct list_head nocb_head_rdp; /*
>                     * Head of rcu_data list in wakeup chain,
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index f525e4f7985b..acca24670a8c 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -546,6 +546,7 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
>    lazy_len = READ_ONCE(rdp->lazy_len);
>    if (was_alldone) {
>        rdp->qlen_last_fqs_check = len;
> +        rdp->nocb_gp_handling = true;
>        rcu_nocb_unlock(rdp);
>        // Only lazy CBs in bypass list
>        if (lazy_len && bypass_len == lazy_len) {
> @@ -563,7 +564,8 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> 
>        return;
>    } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> -        /* ... or if many callbacks queued. */
> +        /* Callback overload condition. */
> +        WARN_ON_ONCE(!rdp->nocb_gp_handling);
>        rdp->qlen_last_fqs_check = len;
>        j = jiffies;
>        if (j != rdp->nocb_gp_adv_time &&
> @@ -732,6 +734,12 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
>            needwait_gp = true;
>            trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
>                        TPS("NeedWaitGP"));
> +        } else if (!rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
> +            /*
> +             * No pending callbacks and no bypass callbacks.
> +             * The rcuog kthread is done handling this rdp.
> +             */
> +            rdp->nocb_gp_handling = false;
>        }
>        if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
>            needwake = rdp->nocb_cb_sleep;
> @@ -1254,6 +1262,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>            continue;
>        }
>        rcu_nocb_try_flush_bypass(rdp, jiffies);
> +        rdp->nocb_gp_handling = true;
>        rcu_nocb_unlock_irqrestore(rdp, flags);
>        wake_nocb_gp(rdp);
>        sc->nr_to_scan -= _count;
> --
> 2.34.1