kernel/rcu/rcu.h | 4 +++ kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++ kernel/rcu/tree.c | 34 ++++++++++++++++++++-- kernel/rcu/tree.h | 1 + 4 files changed, 101 insertions(+), 2 deletions(-)
Currently, the ->gpwrap is not tested (at all per my testing) due to the
requirement of a large delta between a CPU's rdp->gp_seq and its node's
rnp->gpseq.
This results in no testing of ->gpwrap being set. This patch by default
adds 5 minutes of testing with ->gpwrap forced by lowering the delta
between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
configurable, including the active time for the setting and a full
testing cycle.
By default, the first 25 minutes of a test will have the _default_
behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
this is reasonable since we at least add a little bit of testing for
usecases where ->gpwrap is set.
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/rcu.h | 4 +++
kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
kernel/rcu/tree.c | 34 ++++++++++++++++++++--
kernel/rcu/tree.h | 1 +
4 files changed, 101 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index eed2951a4962..9a15e9701e02 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long c_old,
unsigned long c);
void rcu_gp_set_torture_wait(int duration);
+void rcu_set_torture_ovf_lag(unsigned long lag);
+int rcu_get_gpwrap_count(int cpu);
#else
static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
{
@@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
do { } while (0)
#endif
static inline void rcu_gp_set_torture_wait(int duration) { }
+static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
+static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
#endif
unsigned long long rcutorture_gather_gp_seqs(void);
void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 895a27545ae1..79a72e70913e 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
+torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
+torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
@@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+ long n_gpwraps = 0;
struct rcu_torture *rtcp;
static unsigned long rtcv_snap = ULONG_MAX;
static bool splatted;
@@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
}
+ n_gpwraps += rcu_get_gpwrap_count(cpu);
}
for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
if (pipesummary[i] != 0)
@@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
pr_cont("nocb-toggles: %ld:%ld\n",
atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
+ pr_cont("gpwraps: %ld\n", n_gpwraps);
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) ||
@@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
static enum cpuhp_state rcutor_hp;
+static struct hrtimer ovf_lag_timer;
+static bool ovf_lag_active;
+
+/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
+static enum hrtimer_restart rcu_torture_ovf_lag_timer(struct hrtimer *timer)
+{
+ ktime_t next_delay;
+
+ if (ovf_lag_active) {
+ pr_alert("rcu-torture: Disabling ovf lag (value=0)\n");
+ rcu_set_torture_ovf_lag(0);
+ ovf_lag_active = false;
+ next_delay = ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0);
+ } else {
+ pr_alert("rcu-torture: Enabling ovf lag (value=%d)\n", ovf_lag_gps);
+ rcu_set_torture_ovf_lag(ovf_lag_gps);
+ ovf_lag_active = true;
+ next_delay = ktime_set(ovf_active_mins * 60, 0);
+ }
+
+ if (torture_must_stop())
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward_now(timer, next_delay);
+ return HRTIMER_RESTART;
+}
+
+static int rcu_torture_ovf_lag_init(void)
+{
+ if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
+ pr_alert("rcu-torture: lag timing parameters must be positive\n");
+ return -EINVAL;
+ }
+
+ hrtimer_init(&ovf_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ ovf_lag_timer.function = rcu_torture_ovf_lag_timer;
+ ovf_lag_active = false;
+ hrtimer_start(&ovf_lag_timer,
+ ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0), HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+static void rcu_torture_ovf_lag_cleanup(void)
+{
+ hrtimer_cancel(&ovf_lag_timer);
+
+ if (ovf_lag_active) {
+ rcu_set_torture_ovf_lag(0);
+ ovf_lag_active = false;
+ }
+}
static void
rcu_torture_cleanup(void)
{
@@ -4015,6 +4073,8 @@ rcu_torture_cleanup(void)
torture_cleanup_end();
if (cur_ops->gp_slow_unregister)
cur_ops->gp_slow_unregister(NULL);
+
+ rcu_torture_ovf_lag_cleanup();
}
static void rcu_torture_leak_cb(struct rcu_head *rhp)
@@ -4508,6 +4568,10 @@ rcu_torture_init(void)
torture_init_end();
if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
+
+ if (rcu_torture_ovf_lag_init())
+ goto unwind;
+
return 0;
unwind:
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b77ccc55557b..7b17b578956a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
};
+
+int rcu_get_gpwrap_count(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ return READ_ONCE(rdp->gpwrap_count);
+}
+EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
+
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
.gp_state = RCU_GP_IDLE,
@@ -778,6 +787,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
+/**
+ * rcu_set_torture_ovf_lag - Set RCU GP sequence overflow lag value.
+ * @lag_gps: Set overflow lag to this many grace period worth of counters
+ * which is used by rcutorture to quickly force a gpwrap situation.
+ * @lag_gps = 0 means we reset it back to the boot-time value.
+ */
+static unsigned long seq_ovf_lag = ULONG_MAX / 4;
+
+void rcu_set_torture_ovf_lag(unsigned long lag_gps)
+{
+ unsigned long lag_seq_count;
+
+ lag_seq_count = (lag_gps == 0)
+ ? ULONG_MAX / 4
+ : lag_gps << RCU_SEQ_CTR_SHIFT;
+ WRITE_ONCE(seq_ovf_lag, lag_seq_count);
+}
+EXPORT_SYMBOL_GPL(rcu_set_torture_ovf_lag);
+
/*
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
@@ -788,9 +816,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
raw_lockdep_assert_held_rcu_node(rnp);
- if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
- rnp->gp_seq))
+ if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_ovf_lag,
+ rnp->gp_seq)) {
WRITE_ONCE(rdp->gpwrap, true);
+ WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
+ }
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a9a811d9d7a3..63bea388c243 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -183,6 +183,7 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiescent state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
+ unsigned int gpwrap_count; /* Count of GP sequence wrap. */
bool cpu_started; /* RCU watching this onlining CPU. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
--
2.43.0
On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
> Currently, the ->gpwrap is not tested (at all per my testing) due to the
> requirement of a large delta between a CPU's rdp->gp_seq and its node's
> rnp->gpseq.
>
> This results in no testing of ->gpwrap being set. This patch by default
> adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> configurable, including the active time for the setting and a full
> testing cycle.
>
> By default, the first 25 minutes of a test will have the _default_
> behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> this is reasonable since we at least add a little bit of testing for
> usecases where ->gpwrap is set.
>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
I ran this as follows:
tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make
Once I actually applied your patch, I did get this:
[ 601.891042] gpwraps: 13745
Which seems to indicate some testing. ;-)
Additional comments inline.
Thanx, Paul
> ---
> kernel/rcu/rcu.h | 4 +++
> kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
> kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> kernel/rcu/tree.h | 1 +
> 4 files changed, 101 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index eed2951a4962..9a15e9701e02 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> unsigned long c_old,
> unsigned long c);
> void rcu_gp_set_torture_wait(int duration);
> +void rcu_set_torture_ovf_lag(unsigned long lag);
> +int rcu_get_gpwrap_count(int cpu);
> #else
> static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
> {
> @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> do { } while (0)
> #endif
> static inline void rcu_gp_set_torture_wait(int duration) { }
> +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
But shouldn't these be function pointers in rcu_torture_ops? That way if
some other flavor of RCU starts doing wrap protection for its grace-period
sequence numbers, this testing can apply directly to that flavor as well.
Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.
> #endif
> unsigned long long rcutorture_gather_gp_seqs(void);
> void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index 895a27545ae1..79a72e70913e 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
> torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
> torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
> torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
Given that "ovf" means just "overflow", would it make sense to get a "gp"
in there? Maybe just "gpwrap_..."?
"What is in a name?" ;-)
I could argue with the defaults, but I run long tests often enough that
I am not worried about coverage. As long as we remember to either run
long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
with ->gpwrap code.
> torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
> torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
> torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> int i;
> long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> + long n_gpwraps = 0;
> struct rcu_torture *rtcp;
> static unsigned long rtcv_snap = ULONG_MAX;
> static bool splatted;
> @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> }
> + n_gpwraps += rcu_get_gpwrap_count(cpu);
> }
> for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> if (pipesummary[i] != 0)
> @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> pr_cont("nocb-toggles: %ld:%ld\n",
The "\n" on the above line needs to be deleted.
> atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> + pr_cont("gpwraps: %ld\n", n_gpwraps);
>
> pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> if (atomic_read(&n_rcu_torture_mberror) ||
> @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
>
> static enum cpuhp_state rcutor_hp;
>
> +static struct hrtimer ovf_lag_timer;
> +static bool ovf_lag_active;
Same "ovf" naming complaint as before.
> +
> +/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
> +static enum hrtimer_restart rcu_torture_ovf_lag_timer(struct hrtimer *timer)
> +{
> + ktime_t next_delay;
> +
> + if (ovf_lag_active) {
> + pr_alert("rcu-torture: Disabling ovf lag (value=0)\n");
> + rcu_set_torture_ovf_lag(0);
> + ovf_lag_active = false;
> + next_delay = ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0);
> + } else {
> + pr_alert("rcu-torture: Enabling ovf lag (value=%d)\n", ovf_lag_gps);
> + rcu_set_torture_ovf_lag(ovf_lag_gps);
> + ovf_lag_active = true;
> + next_delay = ktime_set(ovf_active_mins * 60, 0);
> + }
> +
> + if (torture_must_stop())
> + return HRTIMER_NORESTART;
> +
> + hrtimer_forward_now(timer, next_delay);
> + return HRTIMER_RESTART;
OK, this does look to do cycles.
> +}
> +
> +static int rcu_torture_ovf_lag_init(void)
> +{
> + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> + return -EINVAL;
> + }
Why not refuse to start this portion of the test when testing CONFIG_SMP=n
or something other than vanilla RCU? No need to fail the test, just
print something saying that this testing won't be happening.
> + hrtimer_init(&ovf_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + ovf_lag_timer.function = rcu_torture_ovf_lag_timer;
> + ovf_lag_active = false;
> + hrtimer_start(&ovf_lag_timer,
> + ktime_set((ovf_cycle_mins - ovf_active_mins) * 60, 0), HRTIMER_MODE_REL);
> +
> + return 0;
> +}
All hrtimers, no kthreads. Nice!
> +
> +static void rcu_torture_ovf_lag_cleanup(void)
> +{
> + hrtimer_cancel(&ovf_lag_timer);
> +
> + if (ovf_lag_active) {
> + rcu_set_torture_ovf_lag(0);
> + ovf_lag_active = false;
> + }
> +}
Did you try the modprobe/rmmod testing to verify that this
cleans up appropriately? You could use the drgn tool to check.
See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
rcu_data structures.
> static void
> rcu_torture_cleanup(void)
> {
> @@ -4015,6 +4073,8 @@ rcu_torture_cleanup(void)
> torture_cleanup_end();
> if (cur_ops->gp_slow_unregister)
> cur_ops->gp_slow_unregister(NULL);
> +
> + rcu_torture_ovf_lag_cleanup();
> }
>
> static void rcu_torture_leak_cb(struct rcu_head *rhp)
> @@ -4508,6 +4568,10 @@ rcu_torture_init(void)
> torture_init_end();
> if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
> cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
> +
> + if (rcu_torture_ovf_lag_init())
> + goto unwind;
> +
> return 0;
>
> unwind:
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index b77ccc55557b..7b17b578956a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
> static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
> .gpwrap = true,
> };
> +
> +int rcu_get_gpwrap_count(int cpu)
> +{
> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> +
> + return READ_ONCE(rdp->gpwrap_count);
> +}
> +EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
> +
> static struct rcu_state rcu_state = {
> .level = { &rcu_state.node[0] },
> .gp_state = RCU_GP_IDLE,
> @@ -778,6 +787,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
> smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
> }
>
> +/**
> + * rcu_set_torture_ovf_lag - Set RCU GP sequence overflow lag value.
> + * @lag_gps: Set overflow lag to this many grace period worth of counters
> + * which is used by rcutorture to quickly force a gpwrap situation.
> + * @lag_gps = 0 means we reset it back to the boot-time value.
> + */
> +static unsigned long seq_ovf_lag = ULONG_MAX / 4;
> +
> +void rcu_set_torture_ovf_lag(unsigned long lag_gps)
> +{
> + unsigned long lag_seq_count;
> +
> + lag_seq_count = (lag_gps == 0)
> + ? ULONG_MAX / 4
> + : lag_gps << RCU_SEQ_CTR_SHIFT;
> + WRITE_ONCE(seq_ovf_lag, lag_seq_count);
> +}
> +EXPORT_SYMBOL_GPL(rcu_set_torture_ovf_lag);
> +
> /*
> * When trying to report a quiescent state on behalf of some other CPU,
> * it is our responsibility to check for and handle potential overflow
> @@ -788,9 +816,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
> static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
> {
> raw_lockdep_assert_held_rcu_node(rnp);
> - if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
> - rnp->gp_seq))
> + if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_ovf_lag,
> + rnp->gp_seq)) {
> WRITE_ONCE(rdp->gpwrap, true);
> + WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
> + }
> if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
> rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
> }
Looks plausible.
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index a9a811d9d7a3..63bea388c243 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -183,6 +183,7 @@ struct rcu_data {
> bool core_needs_qs; /* Core waits for quiescent state. */
> bool beenonline; /* CPU online at least once. */
> bool gpwrap; /* Possible ->gp_seq wrap. */
> + unsigned int gpwrap_count; /* Count of GP sequence wrap. */
> bool cpu_started; /* RCU watching this onlining CPU. */
> struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
> unsigned long grpmask; /* Mask to apply to leaf qsmask. */
> --
> 2.43.0
>
Hello, Paul,
On Sat, 5 Apr 2025 12:26:12 GMT, "Paul E. McKenney" wrote:
> On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
> > Currently, the ->gpwrap is not tested (at all per my testing) due to the
> > requirement of a large delta between a CPU's rdp->gp_seq and its node's
> > rnp->gpseq.
> >
> > This results in no testing of ->gpwrap being set. This patch by default
> > adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> > between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> > configurable, including the active time for the setting and a full
> > testing cycle.
> >
> > By default, the first 25 minutes of a test will have the _default_
> > behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> > we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> > this is reasonable since we at least add a little bit of testing for
> > usecases where ->gpwrap is set.
> >
> > Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
>
> I ran this as follows:
>
> tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make
>
> Once I actually applied your patch, I did get this:
>
> [ 601.891042] gpwraps: 13745
>
> Which seems to indicate some testing. ;-)
Thanks a lot for running it. I am wondering if I should check in tree.c (only in
testing mode), if the wraps are too many and restrict testing if so. Otherwise,
it is hard to come up with a constant that ensures the wraps are under control.
On the other hand, since this is only for 5 minutes every 30 minutes, we can leave
it as is and avoid the complexity.
>
> Additional comments inline.
>
> Thanx, Paul
>
> > ---
> > kernel/rcu/rcu.h | 4 +++
> > kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
> > kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> > kernel/rcu/tree.h | 1 +
> > 4 files changed, 101 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> > index eed2951a4962..9a15e9701e02 100644
> > --- a/kernel/rcu/rcu.h
> > +++ b/kernel/rcu/rcu.h
> > @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> > unsigned long c_old,
> > unsigned long c);
> > void rcu_gp_set_torture_wait(int duration);
> > +void rcu_set_torture_ovf_lag(unsigned long lag);
> > +int rcu_get_gpwrap_count(int cpu);
> > #else
> > static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
> > {
> > @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> > do { } while (0)
> > #endif
> > static inline void rcu_gp_set_torture_wait(int duration) { }
> > +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> > +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
>
> Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
>
> But shouldn't these be function pointers in rcu_torture_ops? That way if
> some other flavor of RCU starts doing wrap protection for its grace-period
> sequence numbers, this testing can apply directly to that flavor as well.
These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
I could add wrappers to these and include them as pointers the a struct as well.
But I think these will still stay to access rdp.
>
> Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.
>
> > #endif
> > unsigned long long rcutorture_gather_gp_seqs(void);
> > void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> > diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> > index 895a27545ae1..79a72e70913e 100644
> > --- a/kernel/rcu/rcutorture.c
> > +++ b/kernel/rcu/rcutorture.c
> > @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
> > torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
> > torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
> > torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> > +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> > +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> > +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
>
> Given that "ovf" means just "overflow", would it make sense to get a "gp"
> in there? Maybe just "gpwrap_..."?
>
> "What is in a name?" ;-)
Sure, makes sense I will rename.
>
> I could argue with the defaults, but I run long tests often enough that
> I am not worried about coverage. As long as we remember to either run
> long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
> with ->gpwrap code.
>
> > torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
> > torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
> > torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> > @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> > int i;
> > long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > + long n_gpwraps = 0;
> > struct rcu_torture *rtcp;
> > static unsigned long rtcv_snap = ULONG_MAX;
> > static bool splatted;
> > @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> > pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> > batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> > }
> > + n_gpwraps += rcu_get_gpwrap_count(cpu);
> > }
> > for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> > if (pipesummary[i] != 0)
> > @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> > pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> > pr_cont("nocb-toggles: %ld:%ld\n",
>
> The "\n" on the above line needs to be deleted.
Ok.
> > atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> > + pr_cont("gpwraps: %ld\n", n_gpwraps);
> >
> > pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> > if (atomic_read(&n_rcu_torture_mberror) ||
> > @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
> >
> > static enum cpuhp_state rcutor_hp;
> >
> > +static struct hrtimer ovf_lag_timer;
> > +static bool ovf_lag_active;
>
> Same "ovf" naming complaint as before.
Ok.
> > +}
> > +
> > +static int rcu_torture_ovf_lag_init(void)
> > +{
> > + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> > + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> > + return -EINVAL;
> > + }
>
> Why not refuse to start this portion of the test when testing CONFIG_SMP=n
> or something other than vanilla RCU? No need to fail the test, just
> print something saying that this testing won't be happening.
Got it, will do.
> > +
> > +static void rcu_torture_ovf_lag_cleanup(void)
> > +{
> > + hrtimer_cancel(&ovf_lag_timer);
> > +
> > + if (ovf_lag_active) {
> > + rcu_set_torture_ovf_lag(0);
> > + ovf_lag_active = false;
> > + }
> > +}
>
> Did you try the modprobe/rmmod testing to verify that this
> cleans up appropriately? You could use the drgn tool to check.
> See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
> rcu_data structures.
Nice, will check!
Will work on this and provide v2.
thanks,
- Joel
On Sat, Apr 05, 2025 at 12:30:58PM -0000, Joel Fernandes wrote:
> Hello, Paul,
>
> On Sat, 5 Apr 2025 12:26:12 GMT, "Paul E. McKenney" wrote:
> > On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
> > > Currently, the ->gpwrap is not tested (at all per my testing) due to the
> > > requirement of a large delta between a CPU's rdp->gp_seq and its node's
> > > rnp->gpseq.
> > >
> > > This results in no testing of ->gpwrap being set. This patch by default
> > > adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> > > between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> > > configurable, including the active time for the setting and a full
> > > testing cycle.
> > >
> > > By default, the first 25 minutes of a test will have the _default_
> > > behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> > > we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> > > this is reasonable since we at least add a little bit of testing for
> > > usecases where ->gpwrap is set.
> > >
> > > Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> >
> > I ran this as follows:
> >
> > tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make
> >
> > Once I actually applied your patch, I did get this:
> >
> > [ 601.891042] gpwraps: 13745
> >
> > Which seems to indicate some testing. ;-)
>
> Thanks a lot for running it. I am wondering if I should check in tree.c (only in
> testing mode), if the wraps are too many and restrict testing if so. Otherwise,
> it is hard to come up with a constant that ensures the wraps are under control.
> On the other hand, since this is only for 5 minutes every 30 minutes, we can leave
> it as is and avoid the complexity.
I don't (yet) see a problem with lots of wraps.
> > Additional comments inline.
> >
> > Thanx, Paul
> >
> > > ---
> > > kernel/rcu/rcu.h | 4 +++
> > > kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
> > > kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> > > kernel/rcu/tree.h | 1 +
> > > 4 files changed, 101 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> > > index eed2951a4962..9a15e9701e02 100644
> > > --- a/kernel/rcu/rcu.h
> > > +++ b/kernel/rcu/rcu.h
> > > @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> > > unsigned long c_old,
> > > unsigned long c);
> > > void rcu_gp_set_torture_wait(int duration);
> > > +void rcu_set_torture_ovf_lag(unsigned long lag);
> > > +int rcu_get_gpwrap_count(int cpu);
> > > #else
> > > static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
> > > {
> > > @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> > > do { } while (0)
> > > #endif
> > > static inline void rcu_gp_set_torture_wait(int duration) { }
> > > +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> > > +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
> >
> > Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
> >
> > But shouldn't these be function pointers in rcu_torture_ops? That way if
> > some other flavor of RCU starts doing wrap protection for its grace-period
> > sequence numbers, this testing can apply directly to that flavor as well.
>
> These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
> I could add wrappers to these and include them as pointers the a struct as well.
> But I think these will still stay to access rdp.
Why not just pass in the CPU number and let the pointed-to function find
the rdp?
> > Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.
> >
> > > #endif
> > > unsigned long long rcutorture_gather_gp_seqs(void);
> > > void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> > > diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> > > index 895a27545ae1..79a72e70913e 100644
> > > --- a/kernel/rcu/rcutorture.c
> > > +++ b/kernel/rcu/rcutorture.c
> > > @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
> > > torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
> > > torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
> > > torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> > > +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> > > +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> > > +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
> >
> > Given that "ovf" means just "overflow", would it make sense to get a "gp"
> > in there? Maybe just "gpwrap_..."?
> >
> > "What is in a name?" ;-)
>
> Sure, makes sense I will rename.
Thank you!
> > I could argue with the defaults, but I run long tests often enough that
> > I am not worried about coverage. As long as we remember to either run
> > long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
> > with ->gpwrap code.
> >
> > > torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
> > > torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
> > > torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> > > @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> > > int i;
> > > long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > + long n_gpwraps = 0;
> > > struct rcu_torture *rtcp;
> > > static unsigned long rtcv_snap = ULONG_MAX;
> > > static bool splatted;
> > > @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> > > pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> > > batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> > > }
> > > + n_gpwraps += rcu_get_gpwrap_count(cpu);
> > > }
> > > for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> > > if (pipesummary[i] != 0)
> > > @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> > > pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> > > pr_cont("nocb-toggles: %ld:%ld\n",
> >
> > The "\n" on the above line needs to be deleted.
>
> Ok.
>
> > > atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> > > + pr_cont("gpwraps: %ld\n", n_gpwraps);
> > >
> > > pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> > > if (atomic_read(&n_rcu_torture_mberror) ||
> > > @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
> > >
> > > static enum cpuhp_state rcutor_hp;
> > >
> > > +static struct hrtimer ovf_lag_timer;
> > > +static bool ovf_lag_active;
> >
> > Same "ovf" naming complaint as before.
>
> Ok.
>
> > > +}
> > > +
> > > +static int rcu_torture_ovf_lag_init(void)
> > > +{
> > > + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> > > + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> > > + return -EINVAL;
> > > + }
> >
> > Why not refuse to start this portion of the test when testing CONFIG_SMP=n
> > or something other than vanilla RCU? No need to fail the test, just
> > print something saying that this testing won't be happening.
>
> Got it, will do.
Again, thank you!
> > > +static void rcu_torture_ovf_lag_cleanup(void)
> > > +{
> > > + hrtimer_cancel(&ovf_lag_timer);
> > > +
> > > + if (ovf_lag_active) {
> > > + rcu_set_torture_ovf_lag(0);
> > > + ovf_lag_active = false;
> > > + }
> > > +}
> >
> > Did you try the modprobe/rmmod testing to verify that this
> > cleans up appropriately? You could use the drgn tool to check.
> > See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
> > rcu_data structures.
>
> Nice, will check!
>
> Will work on this and provide v2.
Looking forward to it!
Thanx, Paul
Hello, Paul,
On Thu, 10 Apr 2025 13:35:35 GMT, "Paul E. McKenney" wrote:
[...]
> > > > kernel/rcu/rcu.h | 4 +++
> > > > kernel/rcu/rcutorture.c | 64 ++++++++++++++++++++++++++++++++++++++++
> > > > kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> > > > kernel/rcu/tree.h | 1 +
> > > > 4 files changed, 101 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> > > > index eed2951a4962..9a15e9701e02 100644
> > > > --- a/kernel/rcu/rcu.h
> > > > +++ b/kernel/rcu/rcu.h
> > > > @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutort
> > > > unsigned long c_old,
> > > > unsigned long c);
> > > > void rcu_gp_set_torture_wait(int duration);
> > > > +void rcu_set_torture_ovf_lag(unsigned long lag);
> > > > +int rcu_get_gpwrap_count(int cpu);
> > > > #else
> > > > static inline void rcutorture_get_gp_data(int *flags, unsigned long *
> > > > {
> > > > @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutort
> > > > do { } while (0)
> > > > #endif
> > > > static inline void rcu_gp_set_torture_wait(int duration) { }
> > > > +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> > > > +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
> > >
> > > Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
> > >
> > > But shouldn't these be function pointers in rcu_torture_ops? That way if
> > > some other flavor of RCU starts doing wrap protection for its grace-period
> > > sequence numbers, this testing can apply directly to that flavor as well.
> >
> > These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
> > I could add wrappers to these and include them as pointers the a struct as well.
> > But I think these will still stay to access rdp.
>
> Why not just pass in the CPU number and let the pointed-to function find
> the rdp?
Great, I did this and it looks good now, will post v2 shortly.
> > > I could argue with the defaults, but I run long tests often enough that
> > > I am not worried about coverage. As long as we remember to either run
> > > long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
> > > with ->gpwrap code.
> > >
> > > > torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads,
> > > > torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb st
> ate (ms)");
> > > > torture_param(int, preempt_duration, 0, "Preemption duration (ms), ze
> > > > @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> > > > int i;
> > > > long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > > long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > > + long n_gpwraps = 0;
> > > > struct rcu_torture *rtcp;
> > > > static unsigned long rtcv_snap = ULONG_MAX;
> > > > static bool splatted;
> > > > @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> > > > pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> > > > batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> > > > }
> > > > + n_gpwraps += rcu_get_gpwrap_count(cpu);
> > > > }
> > > > for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> > > > if (pipesummary[i] != 0)
> > > > @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> > > > pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> > > > pr_cont("nocb-toggles: %ld:%ld\n",
> > >
> > > The "\n" on the above line needs to be deleted.
> >
> > Ok.
Done.
> >
> > > > atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deofflo
> > > > + pr_cont("gpwraps: %ld\n", n_gpwraps);
> > > >
> > > > pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> > > > if (atomic_read(&n_rcu_torture_mberror) ||
> > > > @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
> > > >
> > > > static enum cpuhp_state rcutor_hp;
> > > >
> > > > +static struct hrtimer ovf_lag_timer;
> > > > +static bool ovf_lag_active;
> > >
> > > Same "ovf" naming complaint as before.
> >
> > Ok.
Done.
> >
> > > > +}
> > > > +
> > > > +static int rcu_torture_ovf_lag_init(void)
> > > > +{
> > > > + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> > > > + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> > > > + return -EINVAL;
> > > > + }
> > >
> > > Why not refuse to start this portion of the test when CONFIG_SMP =n
> > > or something other than vanilla RCU? No need to fail the test, just
> > > print something saying that this testing won't be happening.
> >
> > Got it, will do.
>
> Again, thank you!
I changed this to, something like:
if (cur_ops->set_gpwrap_lag && rcu_gpwrap_lag_init())
goto unwind;
So it will only test RCU flavor.
However, to your point - for TINY, we would still start the timer which will be
a NOOP. But considering it is 5 minutes every 30 minutes, maybe that's Ok? ;-)
We could also have a ops pointer ->can_test_gpwrap_lag() which returns FALSE
for Tiny, but then it is adding more ops pointers.
thanks,
- Joel
Hello, Paul,
On Thu, 10 Apr 2025 13:35:35 GMT, "Paul E. McKenney" wrote:
[...]
> > > > kernel/rcu/rcu.h | 4 +++
> > > > kernel/rcu/rcutorture.c | 64 ++++++++++++++++++++++++++++++++++++++++
> > > > kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> > > > kernel/rcu/tree.h | 1 +
> > > > 4 files changed, 101 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> > > > index eed2951a4962..9a15e9701e02 100644
> > > > --- a/kernel/rcu/rcu.h
> > > > +++ b/kernel/rcu/rcu.h
> > > > @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutort
> > > > unsigned long c_old,
> > > > unsigned long c);
> > > > void rcu_gp_set_torture_wait(int duration);
> > > > +void rcu_set_torture_ovf_lag(unsigned long lag);
> > > > +int rcu_get_gpwrap_count(int cpu);
> > > > #else
> > > > static inline void rcutorture_get_gp_data(int *flags, unsigned long *
> > > > {
> > > > @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutort
> > > > do { } while (0)
> > > > #endif
> > > > static inline void rcu_gp_set_torture_wait(int duration) { }
> > > > +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> > > > +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
> > >
> > > Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
> > >
> > > But shouldn't these be function pointers in rcu_torture_ops? That way if
> > > some other flavor of RCU starts doing wrap protection for its grace-period
> > > sequence numbers, this testing can apply directly to that flavor as well.
> >
> > These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
> > I could add wrappers to these and include them as pointers the a struct as well.
> > But I think these will still stay to access rdp.
>
> Why not just pass in the CPU number and let the pointed-to function find
> the rdp?
Great, I did this and it looks good now, will post v2 shortly.
> > > I could argue with the defaults, but I run long tests often enough that
> > > I am not worried about coverage. As long as we remember to either run
> > > long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
> > > with ->gpwrap code.
> > >
> > > > torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads,
> > > > torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb st
> ate (ms)");
> > > > torture_param(int, preempt_duration, 0, "Preemption duration (ms), ze
> > > > @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> > > > int i;
> > > > long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > > long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> > > > + long n_gpwraps = 0;
> > > > struct rcu_torture *rtcp;
> > > > static unsigned long rtcv_snap = ULONG_MAX;
> > > > static bool splatted;
> > > > @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> > > > pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> > > > batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> > > > }
> > > > + n_gpwraps += rcu_get_gpwrap_count(cpu);
> > > > }
> > > > for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> > > > if (pipesummary[i] != 0)
> > > > @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> > > > pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> > > > pr_cont("nocb-toggles: %ld:%ld\n",
> > >
> > > The "\n" on the above line needs to be deleted.
> >
> > Ok.
Done.
> >
> > > > atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deofflo
> > > > + pr_cont("gpwraps: %ld\n", n_gpwraps);
> > > >
> > > > pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> > > > if (atomic_read(&n_rcu_torture_mberror) ||
> > > > @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
> > > >
> > > > static enum cpuhp_state rcutor_hp;
> > > >
> > > > +static struct hrtimer ovf_lag_timer;
> > > > +static bool ovf_lag_active;
> > >
> > > Same "ovf" naming complaint as before.
> >
> > Ok.
Done.
> >
> > > > +}
> > > > +
> > > > +static int rcu_torture_ovf_lag_init(void)
> > > > +{
> > > > + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> > > > + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> > > > + return -EINVAL;
> > > > + }
> > >
> > > Why not refuse to start this portion of the test when CONFIG_SMP =n
> > > or something other than vanilla RCU? No need to fail the test, just
> > > print something saying that this testing won't be happening.
> >
> > Got it, will do.
>
> Again, thank you!
I changed this to, something like:
if (cur_ops->set_gpwrap_lag && rcu_gpwrap_lag_init())
goto unwind;
So it will only test RCU flavor.
However, to your point - for TINY, we would still start the timer which will be
a NOOP. But considering it is 5 minutes every 30 minutes, maybe that's Ok? ;-)
We could also have a ops pointer ->can_test_gpwrap_lag() which returns FALSE
for Tiny, but then it is adding more ops pointers.
thanks,
- Joel
> On Apr 5, 2025, at 1:23 PM, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Sat, Apr 05, 2025 at 12:30:58PM -0000, Joel Fernandes wrote:
>> Hello, Paul,
>>
>>> On Sat, 5 Apr 2025 12:26:12 GMT, "Paul E. McKenney" wrote:
>>> On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
>>>> Currently, the ->gpwrap is not tested (at all per my testing) due to the
>>>> requirement of a large delta between a CPU's rdp->gp_seq and its node's
>>>> rnp->gpseq.
>>>>
>>>> This results in no testing of ->gpwrap being set. This patch by default
>>>> adds 5 minutes of testing with ->gpwrap forced by lowering the delta
>>>> between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
>>>> configurable, including the active time for the setting and a full
>>>> testing cycle.
>>>>
>>>> By default, the first 25 minutes of a test will have the _default_
>>>> behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
>>>> we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
>>>> this is reasonable since we at least add a little bit of testing for
>>>> usecases where ->gpwrap is set.
>>>>
>>>> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
>>>
>>> I ran this as follows:
>>>
>>> tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make
>>>
>>> Once I actually applied your patch, I did get this:
>>>
>>> [ 601.891042] gpwraps: 13745
>>>
>>> Which seems to indicate some testing. ;-)
>>
>> Thanks a lot for running it. I am wondering if I should check in tree.c (only in
>> testing mode), if the wraps are too many and restrict testing if so. Otherwise,
>> it is hard to come up with a constant that ensures the wraps are under control.
>> On the other hand, since this is only for 5 minutes every 30 minutes, we can leave
>> it as is and avoid the complexity.
>
> I don't (yet) see a problem with lots of wraps.
>
>>> Additional comments inline.
>>>
>>> Thanx, Paul
>>>
>>>> ---
>>>> kernel/rcu/rcu.h | 4 +++
>>>> kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
>>>> kernel/rcu/tree.c | 34 ++++++++++++++++++++--
>>>> kernel/rcu/tree.h | 1 +
>>>> 4 files changed, 101 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
>>>> index eed2951a4962..9a15e9701e02 100644
>>>> --- a/kernel/rcu/rcu.h
>>>> +++ b/kernel/rcu/rcu.h
>>>> @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
>>>> unsigned long c_old,
>>>> unsigned long c);
>>>> void rcu_gp_set_torture_wait(int duration);
>>>> +void rcu_set_torture_ovf_lag(unsigned long lag);
>>>> +int rcu_get_gpwrap_count(int cpu);
>>>> #else
>>>> static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
>>>> {
>>>> @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
>>>> do { } while (0)
>>>> #endif
>>>> static inline void rcu_gp_set_torture_wait(int duration) { }
>>>> +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
>>>> +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
>>>
>>> Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
>>>
>>> But shouldn't these be function pointers in rcu_torture_ops? That way if
>>> some other flavor of RCU starts doing wrap protection for its grace-period
>>> sequence numbers, this testing can apply directly to that flavor as well.
>>
>> These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
>> I could add wrappers to these and include them as pointers the a struct as well.
>> But I think these will still stay to access rdp.
>
> Why not just pass in the CPU number and let the pointed-to function find
> the rdp?
You mean provide a helper from tree.c that is called in rcutorture, then helper function returning an rdp?
If so, I can certainly do that, was just not sure if this sort of thing was ok since nothing else in rcutorture accesses rdp directly :)
Thanks,
- Joel
>
>>> Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.
>>>
>>>> #endif
>>>> unsigned long long rcutorture_gather_gp_seqs(void);
>>>> void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
>>>> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
>>>> index 895a27545ae1..79a72e70913e 100644
>>>> --- a/kernel/rcu/rcutorture.c
>>>> +++ b/kernel/rcu/rcutorture.c
>>>> @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
>>>> torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
>>>> torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
>>>> torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
>>>> +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
>>>> +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
>>>> +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
>>>
>>> Given that "ovf" means just "overflow", would it make sense to get a "gp"
>>> in there? Maybe just "gpwrap_..."?
>>>
>>> "What is in a name?" ;-)
>>
>> Sure, makes sense I will rename.
>
> Thank you!
>
>>> I could argue with the defaults, but I run long tests often enough that
>>> I am not worried about coverage. As long as we remember to either run
>>> long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
>>> with ->gpwrap code.
>>>
>>>> torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
>>>> torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
>>>> torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
>>>> @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
>>>> int i;
>>>> long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
>>>> long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
>>>> + long n_gpwraps = 0;
>>>> struct rcu_torture *rtcp;
>>>> static unsigned long rtcv_snap = ULONG_MAX;
>>>> static bool splatted;
>>>> @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
>>>> pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
>>>> batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
>>>> }
>>>> + n_gpwraps += rcu_get_gpwrap_count(cpu);
>>>> }
>>>> for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
>>>> if (pipesummary[i] != 0)
>>>> @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
>>>> pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
>>>> pr_cont("nocb-toggles: %ld:%ld\n",
>>>
>>> The "\n" on the above line needs to be deleted.
>>
>> Ok.
>>
>>>> atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
>>>> + pr_cont("gpwraps: %ld\n", n_gpwraps);
>>>>
>>>> pr_alert("%s%s ", torture_type, TORTURE_FLAG);
>>>> if (atomic_read(&n_rcu_torture_mberror) ||
>>>> @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
>>>>
>>>> static enum cpuhp_state rcutor_hp;
>>>>
>>>> +static struct hrtimer ovf_lag_timer;
>>>> +static bool ovf_lag_active;
>>>
>>> Same "ovf" naming complaint as before.
>>
>> Ok.
>>
>>>> +}
>>>> +
>>>> +static int rcu_torture_ovf_lag_init(void)
>>>> +{
>>>> + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
>>>> + pr_alert("rcu-torture: lag timing parameters must be positive\n");
>>>> + return -EINVAL;
>>>> + }
>>>
>>> Why not refuse to start this portion of the test when testing CONFIG_SMP=n
>>> or something other than vanilla RCU? No need to fail the test, just
>>> print something saying that this testing won't be happening.
>>
>> Got it, will do.
>
> Again, thank you!
>
>>>> +static void rcu_torture_ovf_lag_cleanup(void)
>>>> +{
>>>> + hrtimer_cancel(&ovf_lag_timer);
>>>> +
>>>> + if (ovf_lag_active) {
>>>> + rcu_set_torture_ovf_lag(0);
>>>> + ovf_lag_active = false;
>>>> + }
>>>> +}
>>>
>>> Did you try the modprobe/rmmod testing to verify that this
>>> cleans up appropriately? You could use the drgn tool to check.
>>> See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
>>> rcu_data structures.
>>
>> Nice, will check!
>>
>> Will work on this and provide v2.
>
> Looking forward to it!
>
> Thanx, Paul
On Sat, Apr 05, 2025 at 05:54:00PM +0000, Joel Fernandes wrote:
> > On Apr 5, 2025, at 1:23 PM, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Sat, Apr 05, 2025 at 12:30:58PM -0000, Joel Fernandes wrote:
> >> Hello, Paul,
> >>
> >>> On Sat, 5 Apr 2025 12:26:12 GMT, "Paul E. McKenney" wrote:
> >>> On Sat, Mar 29, 2025 at 07:01:42PM -0400, Joel Fernandes wrote:
> >>>> Currently, the ->gpwrap is not tested (at all per my testing) due to the
> >>>> requirement of a large delta between a CPU's rdp->gp_seq and its node's
> >>>> rnp->gpseq.
> >>>>
> >>>> This results in no testing of ->gpwrap being set. This patch by default
> >>>> adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> >>>> between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> >>>> configurable, including the active time for the setting and a full
> >>>> testing cycle.
> >>>>
> >>>> By default, the first 25 minutes of a test will have the _default_
> >>>> behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> >>>> we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> >>>> this is reasonable since we at least add a little bit of testing for
> >>>> usecases where ->gpwrap is set.
> >>>>
> >>>> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> >>>
> >>> I ran this as follows:
> >>>
> >>> tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 10m --configs "TREE01" --bootargs "rcutorture.ovf_cycle_mins=7" --trust-make
> >>>
> >>> Once I actually applied your patch, I did get this:
> >>>
> >>> [ 601.891042] gpwraps: 13745
> >>>
> >>> Which seems to indicate some testing. ;-)
> >>
> >> Thanks a lot for running it. I am wondering if I should check in tree.c (only in
> >> testing mode), if the wraps are too many and restrict testing if so. Otherwise,
> >> it is hard to come up with a constant that ensures the wraps are under control.
> >> On the other hand, since this is only for 5 minutes every 30 minutes, we can leave
> >> it as is and avoid the complexity.
> >
> > I don't (yet) see a problem with lots of wraps.
> >
> >>> Additional comments inline.
> >>>
> >>> Thanx, Paul
> >>>
> >>>> ---
> >>>> kernel/rcu/rcu.h | 4 +++
> >>>> kernel/rcu/rcutorture.c | 64 +++++++++++++++++++++++++++++++++++++++++
> >>>> kernel/rcu/tree.c | 34 ++++++++++++++++++++--
> >>>> kernel/rcu/tree.h | 1 +
> >>>> 4 files changed, 101 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> >>>> index eed2951a4962..9a15e9701e02 100644
> >>>> --- a/kernel/rcu/rcu.h
> >>>> +++ b/kernel/rcu/rcu.h
> >>>> @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> >>>> unsigned long c_old,
> >>>> unsigned long c);
> >>>> void rcu_gp_set_torture_wait(int duration);
> >>>> +void rcu_set_torture_ovf_lag(unsigned long lag);
> >>>> +int rcu_get_gpwrap_count(int cpu);
> >>>> #else
> >>>> static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
> >>>> {
> >>>> @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> >>>> do { } while (0)
> >>>> #endif
> >>>> static inline void rcu_gp_set_torture_wait(int duration) { }
> >>>> +static inline void rcu_set_torture_ovf_lag(unsigned long lag) { }
> >>>> +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
> >>>
> >>> Very good, you did remember CONFIG_SMP=n. And yes, I did try it. ;-)
> >>>
> >>> But shouldn't these be function pointers in rcu_torture_ops? That way if
> >>> some other flavor of RCU starts doing wrap protection for its grace-period
> >>> sequence numbers, this testing can apply directly to that flavor as well.
> >>
> >> These are here because 'rdp' is not accessible AFAIK from rcutorture.c.
> >> I could add wrappers to these and include them as pointers the a struct as well.
> >> But I think these will still stay to access rdp.
> >
> > Why not just pass in the CPU number and let the pointed-to function find
> > the rdp?
>
> You mean provide a helper from tree.c that is called in rcutorture, then helper function returning an rdp?
>
> If so, I can certainly do that, was just not sure if this sort of thing was ok since nothing else in rcutorture accesses rdp directly :)
Actually, this is done quite a bit.
For example, the "rcu" .gp_kthread_dbg() function pointer is set to
show_rcu_gp_kthreads(), which is defined either in rcu.h (for Tiny) or
in tree_stall.h (for Tree). Alternatively, sometimes a small wrapper
function is defined in rcutorture.c which calls functions defined
differently for different RCU flavors.
So please feel free to create a function pointer and to define the
function in tree.c. But learn from my many mistakes and make sure to
provide a suitable definition for Tiny RCU! ;-)
Thanx, Paul
> Thanks,
>
> - Joel
>
>
> >
> >>> Then the pointers can simply be NULL in kernels built with CONFIG_SMP=n.
> >>>
> >>>> #endif
> >>>> unsigned long long rcutorture_gather_gp_seqs(void);
> >>>> void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> >>>> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> >>>> index 895a27545ae1..79a72e70913e 100644
> >>>> --- a/kernel/rcu/rcutorture.c
> >>>> +++ b/kernel/rcu/rcutorture.c
> >>>> @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
> >>>> torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
> >>>> torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
> >>>> torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> >>>> +torture_param(int, ovf_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> >>>> +torture_param(int, ovf_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> >>>> +torture_param(int, ovf_lag_gps, 8, "Value to set for set_torture_ovf_lag during an active testing period.");
> >>>
> >>> Given that "ovf" means just "overflow", would it make sense to get a "gp"
> >>> in there? Maybe just "gpwrap_..."?
> >>>
> >>> "What is in a name?" ;-)
> >>
> >> Sure, makes sense I will rename.
> >
> > Thank you!
> >
> >>> I could argue with the defaults, but I run long tests often enough that
> >>> I am not worried about coverage. As long as we remember to either run
> >>> long tests or specify appropriate rcutorture.ovf_cycle_mins when messing
> >>> with ->gpwrap code.
> >>>
> >>>> torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
> >>>> torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
> >>>> torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> >>>> @@ -2629,6 +2632,7 @@ rcu_torture_stats_print(void)
> >>>> int i;
> >>>> long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> >>>> long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> >>>> + long n_gpwraps = 0;
> >>>> struct rcu_torture *rtcp;
> >>>> static unsigned long rtcv_snap = ULONG_MAX;
> >>>> static bool splatted;
> >>>> @@ -2639,6 +2643,7 @@ rcu_torture_stats_print(void)
> >>>> pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> >>>> batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> >>>> }
> >>>> + n_gpwraps += rcu_get_gpwrap_count(cpu);
> >>>> }
> >>>> for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> >>>> if (pipesummary[i] != 0)
> >>>> @@ -2672,6 +2677,7 @@ rcu_torture_stats_print(void)
> >>>> pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> >>>> pr_cont("nocb-toggles: %ld:%ld\n",
> >>>
> >>> The "\n" on the above line needs to be deleted.
> >>
> >> Ok.
> >>
> >>>> atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> >>>> + pr_cont("gpwraps: %ld\n", n_gpwraps);
> >>>>
> >>>> pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> >>>> if (atomic_read(&n_rcu_torture_mberror) ||
> >>>> @@ -3842,6 +3848,58 @@ static int rcu_torture_preempt(void *unused)
> >>>>
> >>>> static enum cpuhp_state rcutor_hp;
> >>>>
> >>>> +static struct hrtimer ovf_lag_timer;
> >>>> +static bool ovf_lag_active;
> >>>
> >>> Same "ovf" naming complaint as before.
> >>
> >> Ok.
> >>
> >>>> +}
> >>>> +
> >>>> +static int rcu_torture_ovf_lag_init(void)
> >>>> +{
> >>>> + if (ovf_cycle_mins <= 0 || ovf_active_mins <= 0) {
> >>>> + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> >>>> + return -EINVAL;
> >>>> + }
> >>>
> >>> Why not refuse to start this portion of the test when testing CONFIG_SMP=n
> >>> or something other than vanilla RCU? No need to fail the test, just
> >>> print something saying that this testing won't be happening.
> >>
> >> Got it, will do.
> >
> > Again, thank you!
> >
> >>>> +static void rcu_torture_ovf_lag_cleanup(void)
> >>>> +{
> >>>> + hrtimer_cancel(&ovf_lag_timer);
> >>>> +
> >>>> + if (ovf_lag_active) {
> >>>> + rcu_set_torture_ovf_lag(0);
> >>>> + ovf_lag_active = false;
> >>>> + }
> >>>> +}
> >>>
> >>> Did you try the modprobe/rmmod testing to verify that this
> >>> cleans up appropriately? You could use the drgn tool to check.
> >>> See tools/rcu//rcu-cbs.py for an example drgn script that digs into the
> >>> rcu_data structures.
> >>
> >> Nice, will check!
> >>
> >> Will work on this and provide v2.
> >
> > Looking forward to it!
> >
> > Thanx, Paul
© 2016 - 2025 Red Hat, Inc.