[v1] rcutorture: Perform more frequent testing of ->gpwrap

[PATCH] rcutorture: Perform more frequent testing of ->gpwrap

Posted by Joel Fernandes 10 months, 2 weeks ago

Currently, the ->gpwrap is not tested (at all per my testing) due to the
requirement of a large delta between a CPU's rdp->gp_seq and its node's
rnp->gpseq.

This results in no testing of ->gpwrap being set. This patch by default
adds 5 minutes of testing with ->gpwrap forced by lowering the delta
between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
configurable, including the active time for the setting and a full
testing cycle.

By default, the first 25 minutes of a test will have the _default_
behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
this is reasonable since we at least add a little bit of testing for
usecases where ->gpwrap is set.

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/rcu/rcu.h        |  4 +++
 kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.c       | 34 ++++++++++++++++++++--
 kernel/rcu/tree.h       |  1 +
 4 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index eed2951a4962..9a15e9701e02 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 			       unsigned long c_old,
 			       unsigned long c);
 void rcu_gp_set_torture_wait(int duration);
+void rcu_set_torture_ovf_lag(unsigned long lag);
+int rcu_get_gpwrap_count(int cpu);
 #else
 static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
 {
@@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 	do { } while (0)
 #endif
 static inline void rcu_gp_set_torture_wait(int duration) { }
+static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
+static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
 #endif
 unsigned long long rcutorture_gather_gp_seqs(void);
 void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 895a27545ae1..79a72e70913e 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
 torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
+torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
+torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
 torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
 torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
 torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
@@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
 	int i;
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+	long n_gpwraps = 0;
 	struct rcu_torture *rtcp;
 	static unsigned long rtcv_snap = ULONG_MAX;
 	static bool splatted;
@@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
 			pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
 			batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
 		}
+		n_gpwraps += rcu_get_gpwrap_count(cpu);
 	}
 	for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
 		if (pipesummary[i] != 0)
@@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
 	pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
 	pr_cont("nocb-toggles: %ld:%ld\n",
 		atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
+	pr_cont("gpwraps: %ld\n", n_gpwraps);
 
 	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
 	if (atomic_read(&n_rcu_torture_mberror) ||
@@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
 
 static enum cpuhp_state rcutor_hp;
 
+static struct hrtimer ovf_lag_timer;
+static bool ovf_lag_active;
+
+/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
+static enum hrtimer_restart rcu_torture_ovf_lag_timer(struct hrtimer *timer)
+{
+	ktime_t next_delay;
+
+	if (ovf_lag_active) {
+		pr_alert("rcu-torture: Disabling ovf lag (value=0)\n");
+		rcu_set_torture_ovf_lag(0);
+		ovf_lag_active = false;
+		next_delay = ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0);
+	} else {
+		pr_alert("rcu-torture: Enabling ovf lag (value=%d)\n", ovf_lag_gps);
+		rcu_set_torture_ovf_lag(ovf_lag_gps);
+		ovf_lag_active = true;
+		next_delay = ktime_set(ovf_active_mins * 60, 0);
+	}
+
+	if (torture_must_stop())
+		return HRTIMER_NORESTART;
+
+	hrtimer_forward_now(timer, next_delay);
+	return HRTIMER_RESTART;
+}
+
+static int rcu_torture_ovf_lag_init(void)
+{
+	if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
+		pr_alert("rcu-torture: lag timing parameters must be positive\n");
+		return -EINVAL;
+	}
+
+	hrtimer_init(&ovf_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ovf_lag_timer.function = rcu_torture_ovf_lag_timer;
+	ovf_lag_active = false;
+	hrtimer_start(&ovf_lag_timer,
+		      ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0), HRTIMER_MODE_REL);
+
+	return 0;
+}
+
+static void rcu_torture_ovf_lag_cleanup(void)
+{
+	hrtimer_cancel(&ovf_lag_timer);
+
+	if (ovf_lag_active) {
+		rcu_set_torture_ovf_lag(0);
+		ovf_lag_active = false;
+	}
+}
 static void
 rcu_torture_cleanup(void)
 {
@@ -4015,6 +4073,8 @@ rcu_torture_cleanup(void)
 	torture_cleanup_end();
 	if (cur_ops->gp_slow_unregister)
 		cur_ops->gp_slow_unregister(NULL);
+
+	rcu_torture_ovf_lag_cleanup();
 }
 
 static void rcu_torture_leak_cb(struct rcu_head *rhp)
@@ -4508,6 +4568,10 @@ rcu_torture_init(void)
 	torture_init_end();
 	if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
 		cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
+
+	if (rcu_torture_ovf_lag_init())
+		goto unwind;
+
 	return 0;
 
 unwind:
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b77ccc55557b..7b17b578956a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
 	.gpwrap = true,
 };
+
+int rcu_get_gpwrap_count(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+	return READ_ONCE(rdp->gpwrap_count);
+}
+EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
+
 static struct rcu_state rcu_state = {
 	.level = { &rcu_state.node[0] },
 	.gp_state = RCU_GP_IDLE,
@@ -778,6 +787,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
 	smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
 }
 
+/**
+ * rcu_set_torture_ovf_lag - Set RCU GP sequence overflow lag value.
+ * @lag_gps: Set overflow lag to this many grace period worth of counters
+ * which is used by rcutorture to quickly force a gpwrap situation.
+ * @lag_gps = 0 means we reset it back to the boot-time value.
+ */
+static unsigned long seq_ovf_lag = ULONG_MAX / 4;
+
+void rcu_set_torture_ovf_lag(unsigned long lag_gps)
+{
+	unsigned long lag_seq_count;
+
+	lag_seq_count = (lag_gps == 0)
+			? ULONG_MAX / 4
+			: lag_gps << RCU_SEQ_CTR_SHIFT;
+	WRITE_ONCE(seq_ovf_lag, lag_seq_count);
+}
+EXPORT_SYMBOL_GPL(rcu_set_torture_ovf_lag);
+
 /*
  * When trying to report a quiescent state on behalf of some other CPU,
  * it is our responsibility to check for and handle potential overflow
@@ -788,9 +816,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
 static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
 {
 	raw_lockdep_assert_held_rcu_node(rnp);
-	if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
-			 rnp->gp_seq))
+	if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_ovf_lag,
+			 rnp->gp_seq)) {
 		WRITE_ONCE(rdp->gpwrap, true);
+		WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
+	}
 	if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
 		rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
 }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a9a811d9d7a3..63bea388c243 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -183,6 +183,7 @@ struct rcu_data {
 	bool		core_needs_qs;	/* Core waits for quiescent state. */
 	bool		beenonline;	/* CPU online at least once. */
 	bool		gpwrap;		/* Possible ->gp_seq wrap. */
+	unsigned int	gpwrap_count;	/* Count of GP sequence wrap. */
 	bool		cpu_started;	/* RCU watching this onlining CPU. */
 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
-- 
2.43.0

Re: [PATCH] rcutorture: Perform more frequent testing of ->gpwrap

Posted by Paul E. McKenney 10 months, 1 week ago

On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
> Currently, the ->gpwrap is not tested (at all per my testing) due to the
> requirement of a large delta between a CPU's rdp->gp_seq and its node's
> rnp->gpseq.
> 
> This results in no testing of ->gpwrap being set. This patch by default
> adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> configurable, including the active time for the setting and a full
> testing cycle.
> 
> By default, the first 25 minutes of a test will have the _default_
> behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> this is reasonable since we at least add a little bit of testing for
> usecases where ->gpwrap is set.
> 
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>

I ran this as follows:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make

Once I actually applied your patch, I did get this:

[  601.891042] gpwraps: 13745

Which seems to indicate some testing.  ;-)

Additional comments inline.

							Thanx, Paul

> ---
>  kernel/rcu/rcu.h        |  4 +++
>  kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
>  kernel/rcu/tree.c       | 34 ++++++++++++++++++++--
>  kernel/rcu/tree.h       |  1 +
>  4 files changed, 101 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index eed2951a4962..9a15e9701e02 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
>  			       unsigned long c_old,
>  			       unsigned long c);
>  void rcu_gp_set_torture_wait(int duration);
> +void rcu_set_torture_ovf_lag(unsigned long lag);
> +int rcu_get_gpwrap_count(int cpu);
>  #else
>  static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
>  {
> @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
>  	do { } while (0)
>  #endif
>  static inline void rcu_gp_set_torture_wait(int duration) { }
> +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }

Very good, you did remember CONFIG_SMP=n.  And yes, I did try it.  ;-)

But shouldn't these be function pointers in rcu_torture_ops?  That way if
some other flavor of RCU starts doing wrap protection for its grace-period
sequence numbers, this testing can apply directly to that flavor as well.

Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.

>  #endif
>  unsigned long long rcutorture_gather_gp_seqs(void);
>  void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index 895a27545ae1..79a72e70913e 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
>  torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
>  torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
>  torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");

Given that "ovf" means just "overflow", would it make sense to get a "gp"
in there?  Maybe just "gpwrap_..."?

"What is in a name?"  ;-)

I could argue with the defaults, but I run long tests often enough that
I am not worried about coverage.  As long as we remember to either run
long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
with ->gpwrap code.

>  torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
>  torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
>  torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
>  	int i;
>  	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
>  	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> +	long n_gpwraps = 0;
>  	struct rcu_torture *rtcp;
>  	static unsigned long rtcv_snap = ULONG_MAX;
>  	static bool splatted;
> @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
>  			pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
>  			batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
>  		}
> +		n_gpwraps += rcu_get_gpwrap_count(cpu);
>  	}
>  	for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
>  		if (pipesummary[i] != 0)
> @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
>  	pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
>  	pr_cont("nocb-toggles: %ld:%ld\n",

The "\n" on the above line needs to be deleted.

>  		atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> +	pr_cont("gpwraps: %ld\n", n_gpwraps);
>  
>  	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
>  	if (atomic_read(&n_rcu_torture_mberror) ||
> @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
>  
>  static enum cpuhp_state rcutor_hp;
>  
> +static struct hrtimer ovf_lag_timer;
> +static bool ovf_lag_active;

Same "ovf" naming complaint as before.

> +
> +/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
> +static enum hrtimer_restart rcu_torture_ovf_lag_timer(struct hrtimer *timer)
> +{
> +	ktime_t next_delay;
> +
> +	if (ovf_lag_active) {
> +		pr_alert("rcu-torture: Disabling ovf lag (value=0)\n");
> +		rcu_set_torture_ovf_lag(0);
> +		ovf_lag_active = false;
> +		next_delay = ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0);
> +	} else {
> +		pr_alert("rcu-torture: Enabling ovf lag (value=%d)\n", ovf_lag_gps);
> +		rcu_set_torture_ovf_lag(ovf_lag_gps);
> +		ovf_lag_active = true;
> +		next_delay = ktime_set(ovf_active_mins * 60, 0);
> +	}
> +
> +	if (torture_must_stop())
> +		return HRTIMER_NORESTART;
> +
> +	hrtimer_forward_now(timer, next_delay);
> +	return HRTIMER_RESTART;

OK, this does look to do cycles.

> +}
> +
> +static int rcu_torture_ovf_lag_init(void)
> +{
> +	if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> +		pr_alert("rcu-torture: lag timing parameters must be positive\n");
> +		return -EINVAL;
> +	}

Why not refuse to start this portion of the test when testing CONFIG_SMP=n
or something other than vanilla RCU?  No need to fail the test, just
print something saying that this testing won't be happening.

> +	hrtimer_init(&ovf_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ovf_lag_timer.function = rcu_torture_ovf_lag_timer;
> +	ovf_lag_active = false;
> +	hrtimer_start(&ovf_lag_timer,
> +		      ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0), HRTIMER_MODE_REL);
> +
> +	return 0;
> +}

All hrtimers, no kthreads.  Nice!

> +
> +static void rcu_torture_ovf_lag_cleanup(void)
> +{
> +	hrtimer_cancel(&ovf_lag_timer);
> +
> +	if (ovf_lag_active) {
> +		rcu_set_torture_ovf_lag(0);
> +		ovf_lag_active = false;
> +	}
> +}

Did you try the modprobe/rmmod testing to verify that this
cleans up appropriately?  You could use the drgn tool to check.
See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
rcu_data structures.

>  static void
>  rcu_torture_cleanup(void)
>  {
> @@ -4015,6 +4073,8 @@ rcu_torture_cleanup(void)
>  	torture_cleanup_end();
>  	if (cur_ops->gp_slow_unregister)
>  		cur_ops->gp_slow_unregister(NULL);
> +
> +	rcu_torture_ovf_lag_cleanup();
>  }
>  
>  static void rcu_torture_leak_cb(struct rcu_head *rhp)
> @@ -4508,6 +4568,10 @@ rcu_torture_init(void)
>  	torture_init_end();
>  	if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
>  		cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
> +
> +	if (rcu_torture_ovf_lag_init())
> +		goto unwind;
> +
>  	return 0;
>  
>  unwind:
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index b77ccc55557b..7b17b578956a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
>  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
>  	.gpwrap = true,
>  };
> +
> +int rcu_get_gpwrap_count(int cpu)
> +{
> +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> +
> +	return READ_ONCE(rdp->gpwrap_count);
> +}
> +EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
> +
>  static struct rcu_state rcu_state = {
>  	.level = { &rcu_state.node[0] },
>  	.gp_state = RCU_GP_IDLE,
> @@ -778,6 +787,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
>  	smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
>  }
>  
> +/**
> + * rcu_set_torture_ovf_lag - Set RCU GP sequence overflow lag value.
> + * @lag_gps: Set overflow lag to this many grace period worth of counters
> + * which is used by rcutorture to quickly force a gpwrap situation.
> + * @lag_gps = 0 means we reset it back to the boot-time value.
> + */
> +static unsigned long seq_ovf_lag = ULONG_MAX / 4;
> +
> +void rcu_set_torture_ovf_lag(unsigned long lag_gps)
> +{
> +	unsigned long lag_seq_count;
> +
> +	lag_seq_count = (lag_gps == 0)
> +			? ULONG_MAX / 4
> +			: lag_gps << RCU_SEQ_CTR_SHIFT;
> +	WRITE_ONCE(seq_ovf_lag, lag_seq_count);
> +}
> +EXPORT_SYMBOL_GPL(rcu_set_torture_ovf_lag);
> +
>  /*
>   * When trying to report a quiescent state on behalf of some other CPU,
>   * it is our responsibility to check for and handle potential overflow
> @@ -788,9 +816,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
>  static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
>  {
>  	raw_lockdep_assert_held_rcu_node(rnp);
> -	if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
> -			 rnp->gp_seq))
> +	if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_ovf_lag,
> +			 rnp->gp_seq)) {
>  		WRITE_ONCE(rdp->gpwrap, true);
> +		WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
> +	}
>  	if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
>  		rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
>  }

Looks plausible.

> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index a9a811d9d7a3..63bea388c243 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -183,6 +183,7 @@ struct rcu_data {
>  	bool		core_needs_qs;	/* Core waits for quiescent state. */
>  	bool		beenonline;	/* CPU online at least once. */
>  	bool		gpwrap;		/* Possible ->gp_seq wrap. */
> +	unsigned int	gpwrap_count;	/* Count of GP sequence wrap. */
>  	bool		cpu_started;	/* RCU watching this onlining CPU. */
>  	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
>  	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
> -- 
> 2.43.0
>