[PATCH rcu 11/20] srcu: Move grace-period fields from srcu_struct to srcu_usage

Paul E. McKenney posted 20 patches 2 years, 10 months ago
[PATCH rcu 11/20] srcu: Move grace-period fields from srcu_struct to srcu_usage
Posted by Paul E. McKenney 2 years, 10 months ago
This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
from the srcu_struct structure to the srcu_usage structure to reduce
the size of the former in order to improve cache locality.

Suggested-by: Christoph Hellwig <hch@lst.de>
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/srcutree.h |  25 ++++----
 kernel/rcu/srcutree.c    | 128 +++++++++++++++++++--------------------
 2 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index d04e3da6181c..372e35b0e8b6 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -68,6 +68,11 @@ struct srcu_usage {
 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
 	spinlock_t __private lock;		/* Protect counters and size state. */
 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
+	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
+	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */
+	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
 };
 
 /*
@@ -75,11 +80,6 @@ struct srcu_usage {
  */
 struct srcu_struct {
 	unsigned int srcu_idx;			/* Current rdr array element. */
-	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
-	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
-	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
-	unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */
-	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
 	unsigned long srcu_size_jiffies;	/* Current contention-measurement interval. */
 	unsigned long srcu_n_lock_retries;	/* Contention events in current interval. */
 	unsigned long srcu_n_exp_nodelay;	/* # expedited no-delays in current GP phase. */
@@ -115,8 +115,13 @@ struct srcu_struct {
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
 
-#define __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
+#define __SRCU_USAGE_INIT(name)									\
+{												\
+	.lock = __SPIN_LOCK_UNLOCKED(name.lock),						\
 	.srcu_gp_seq_needed = -1UL,								\
+}
+
+#define __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
 	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),					\
 	.srcu_sup = &usage_name,								\
 	__SRCU_DEP_MAP_INIT(name)
@@ -153,9 +158,7 @@ struct srcu_struct {
  */
 #ifdef MODULE
 # define __DEFINE_SRCU(name, is_static)								\
-	static struct srcu_usage name##_srcu_usage = {						\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),					\
-	};											\
+	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage);	\
 	extern struct srcu_struct * const __srcu_struct_##name;					\
 	struct srcu_struct * const __srcu_struct_##name						\
@@ -163,9 +166,7 @@ struct srcu_struct {
 #else
 # define __DEFINE_SRCU(name, is_static)								\
 	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);				\
-	static struct srcu_usage name##_srcu_usage = {						\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),					\
-	};											\
+	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
 	is_static struct srcu_struct name =							\
 		__SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data)
 #endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index a36066798de7..340eb685cf64 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -135,8 +135,8 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
 		spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
-		sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
-		sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
 		sdp->mynode = NULL;
 		sdp->cpu = cpu;
 		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
@@ -247,7 +247,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 	mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
 	mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
 	ssp->srcu_idx = 0;
-	ssp->srcu_gp_seq = 0;
+	ssp->srcu_sup->srcu_gp_seq = 0;
 	ssp->srcu_barrier_seq = 0;
 	mutex_init(&ssp->srcu_barrier_mutex);
 	atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
@@ -261,8 +261,8 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 		return -ENOMEM;
 	}
 	init_srcu_struct_data(ssp);
-	ssp->srcu_gp_seq_needed_exp = 0;
-	ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	ssp->srcu_sup->srcu_gp_seq_needed_exp = 0;
+	ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
 	if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
 		if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
 			if (!ssp->sda_is_static) {
@@ -275,7 +275,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 			WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
 		}
 	}
-	smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
+	smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */
 	return 0;
 }
 
@@ -402,10 +402,10 @@ static void check_init_srcu_struct(struct srcu_struct *ssp)
 	unsigned long flags;
 
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
-	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
+	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
-	if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
+	if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
 		spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
 		return;
 	}
@@ -616,11 +616,11 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 	unsigned long j;
 	unsigned long jbase = SRCU_INTERVAL;
 
-	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp)))
 		jbase = 0;
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq))) {
 		j = jiffies - 1;
-		gpstart = READ_ONCE(ssp->srcu_gp_start);
+		gpstart = READ_ONCE(ssp->srcu_sup->srcu_gp_start);
 		if (time_after(j, gpstart))
 			jbase += j - gpstart;
 		if (!jbase) {
@@ -656,12 +656,12 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
 		if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
 			return; /* Forgot srcu_barrier(), so just leak it! */
 	}
-	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
-	    WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
+	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+	    WARN_ON(rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq) != ssp->srcu_sup->srcu_gp_seq_needed) ||
 	    WARN_ON(srcu_readers_active(ssp))) {
 		pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
-			__func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
-			rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
+			__func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)),
+			rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ssp->srcu_sup->srcu_gp_seq_needed);
 		return; /* Caller forgot to stop doing call_srcu()? */
 	}
 	kfree(ssp->srcu_sup->node);
@@ -775,18 +775,18 @@ static void srcu_gp_start(struct srcu_struct *ssp)
 	else
 		sdp = this_cpu_ptr(ssp->sda);
 	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
-	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
 	spin_lock_rcu_node(sdp);  /* Interrupts already disabled. */
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&ssp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
 	spin_unlock_rcu_node(sdp);  /* Interrupts remain disabled. */
-	WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+	WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
 	WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
 	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
-	rcu_seq_start(&ssp->srcu_gp_seq);
-	state = rcu_seq_state(ssp->srcu_gp_seq);
+	rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq);
+	state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
@@ -865,16 +865,16 @@ static void srcu_gp_end(struct srcu_struct *ssp)
 
 	/* End the current grace period. */
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	idx = rcu_seq_state(ssp->srcu_gp_seq);
+	idx = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp)))
 		cbdelay = 0;
 
-	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
-	rcu_seq_end(&ssp->srcu_gp_seq);
-	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq);
+	WRITE_ONCE(ssp->srcu_sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
+	rcu_seq_end(&ssp->srcu_sup->srcu_gp_seq);
+	gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq);
 	spin_unlock_irq_rcu_node(ssp->srcu_sup);
 	mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
@@ -925,9 +925,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
 
 	/* Start a new grace period if needed. */
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
+	gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
-	    ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
+	    ULONG_CMP_LT(gpseq, ssp->srcu_sup->srcu_gp_seq_needed)) {
 		srcu_gp_start(ssp);
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
 		srcu_reschedule(ssp, 0);
@@ -960,7 +960,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
 	if (snp)
 		for (; snp != NULL; snp = snp->srcu_parent) {
 			sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
-			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
+			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
 			    (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
 				return;
 			spin_lock_irqsave_rcu_node(snp, flags);
@@ -973,8 +973,8 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
 			spin_unlock_irqrestore_rcu_node(snp, flags);
 		}
 	spin_lock_irqsave_ssp_contention(ssp, &flags);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
 }
 
@@ -1010,7 +1010,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 	if (snp_leaf)
 		/* Each pass through the loop does one level of the srcu_node tree. */
 		for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
-			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
+			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) && snp != snp_leaf)
 				return; /* GP already done and CBs recorded. */
 			spin_lock_irqsave_rcu_node(snp, flags);
 			snp_seq = snp->srcu_have_cbs[idx];
@@ -1037,20 +1037,20 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 
 	/* Top of tree, must ensure the grace period will be started. */
 	spin_lock_irqsave_ssp_contention(ssp, &flags);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
 		 * acquire setting up for initialization.
 		 */
-		smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
+		smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, s); /*^^^*/
 	}
-	if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
+	if (!do_norm && ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
 
 	/* If grace period not already in progress, start it. */
-	if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
-	    rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
-		WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+	if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) &&
+	    rcu_seq_state(ssp->srcu_sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
+		WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
 		srcu_gp_start(ssp);
 
 		// And how can that list_add() in the "else" clause
@@ -1164,18 +1164,18 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
 
 	/* First, see if enough time has passed since the last GP. */
 	t = ktime_get_mono_fast_ns();
-	tlast = READ_ONCE(ssp->srcu_last_gp_end);
+	tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
 	if (exp_holdoff == 0 ||
 	    time_in_range_open(t, tlast, tlast + exp_holdoff))
 		return false; /* Too soon after last GP. */
 
 	/* Next, check for probable idleness. */
-	curseq = rcu_seq_current(&ssp->srcu_gp_seq);
+	curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
-	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
+	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
 		return false; /* Grace period in progress, so not idle. */
 	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
-	if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
+	if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq))
 		return false; /* GP # changed, so not idle. */
 	return true; /* With reasonable probability, idle! */
 }
@@ -1218,8 +1218,8 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 	if (rhp)
 		rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
-	s = rcu_seq_snap(&ssp->srcu_gp_seq);
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+	s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
 		sdp->srcu_gp_seq_needed = s;
@@ -1430,7 +1430,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
 	// Any prior manipulation of SRCU-protected data must happen
 	// before the load from ->srcu_gp_seq.
 	smp_mb();
-	return rcu_seq_snap(&ssp->srcu_gp_seq);
+	return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
 
@@ -1477,7 +1477,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
  */
 bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
 {
-	if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie))
+	if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
 		return false;
 	// Ensure that the end of the SRCU grace period happens before
 	// any subsequent code that the caller might execute.
@@ -1597,16 +1597,16 @@ static void srcu_advance_state(struct srcu_struct *ssp)
 	 * The load-acquire ensures that we see the accesses performed
 	 * by the prior grace period.
 	 */
-	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
+	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
 		spin_lock_irq_rcu_node(ssp->srcu_sup);
-		if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
-			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
+		if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
 			spin_unlock_irq_rcu_node(ssp->srcu_sup);
 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
 			return;
 		}
-		idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
+		idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(ssp);
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
@@ -1616,7 +1616,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
 		}
 	}
 
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
 		idx = 1 ^ (ssp->srcu_idx & 1);
 		if (!try_check_zero(ssp, idx, 1)) {
 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
@@ -1624,12 +1624,12 @@ static void srcu_advance_state(struct srcu_struct *ssp)
 		}
 		srcu_flip(ssp);
 		spin_lock_irq_rcu_node(ssp->srcu_sup);
-		rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+		rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
 		ssp->srcu_n_exp_nodelay = 0;
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
 	}
 
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
 
 		/*
 		 * SRCU read-side critical sections are normally short,
@@ -1666,7 +1666,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	rcu_cblist_init(&ready_cbs);
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
 		spin_unlock_irq_rcu_node(sdp);
@@ -1694,7 +1694,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&ssp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
 	spin_unlock_irq_rcu_node(sdp);
@@ -1711,12 +1711,12 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
 	bool pushgp = true;
 
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
-		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
+	if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
 			pushgp = false;
 		}
-	} else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
+	} else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) {
 		/* Outstanding request and no GP.  Start one. */
 		srcu_gp_start(ssp);
 	}
@@ -1762,7 +1762,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 	if (test_type != SRCU_FLAVOR)
 		return;
 	*flags = 0;
-	*gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
+	*gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
 
@@ -1791,7 +1791,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 	if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
 		ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
 	pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
-		 tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+		 tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
 		 srcu_size_state_name[ss_state_idx]);
 	if (!ssp->sda) {
 		// Called after cleanup_srcu_struct(), perhaps.
@@ -1905,7 +1905,7 @@ static void srcu_module_going(struct module *mod)
 
 	for (i = 0; i < mod->num_srcu_structs; i++) {
 		ssp = *(sspp++);
-		if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed)) &&
+		if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
 		    !WARN_ON_ONCE(!ssp->sda_is_static))
 			cleanup_srcu_struct(ssp);
 		free_percpu(ssp->sda);
-- 
2.40.0.rc2
Re: [PATCH rcu 11/20] srcu: Move grace-period fields from srcu_struct to srcu_usage
Posted by Jon Hunter 2 years, 8 months ago
Hi Paul,

On 30/03/2023 23:47, Paul E. McKenney wrote:
> This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
> ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
> from the srcu_struct structure to the srcu_usage structure to reduce
> the size of the former in order to improve cache locality.
> 
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Sachin Sant <sachinp@linux.ibm.com>
> Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>


I have noticed a suspend regression on some of our Tegra boards recently with v6.4-rc and interestingly bisect is pointing to this commit. I was unable revert this on top of the latest mainline but if I checkout this commit suspend fails and if I checkout the previous commit is passes.

Enabling more debug I was able to capture the following crash log I see on one of the boards ...

[   57.327645] PM: suspend entry (deep)
[   57.331660] Filesystems sync: 0.000 seconds
[   57.340147] Freezing user space processes
[   57.347470] Freezing user space processes completed (elapsed 0.007 seconds)
[   57.347501] OOM killer disabled.
[   57.347508] Freezing remaining freezable tasks
[   57.348834] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
[   57.349932] 8<--- cut here ---
[   57.349943] Unable to handle kernel NULL pointer dereference at virtual address 00000000 when write
[   57.349960] [00000000] *pgd=00000000
[   57.349986] Internal error: Oops: 805 [#1] PREEMPT SMP ARM
[   57.350007] Modules linked in: tegra30_tsensor
[   57.350033] CPU: 0 PID: 589 Comm: rtcwake Not tainted 6.3.0-rc1-00011-g03200b5ca3b4-dirty #3
[   57.350057] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
[   57.350067] PC is at rcu_segcblist_enqueue+0x2c/0x38
[   57.350120] LR is at srcu_gp_start_if_needed+0xe4/0x544
[   57.350169] pc : [<c01a5120>]    lr : [<c0198b5c>]    psr: a0070093
[   57.350183] sp : f0b2dd20  ip : 3b5870ef  fp : 00000000
[   57.350194] r10: ef787d84  r9 : 00000000  r8 : ef787d80
[   57.350205] r7 : 80070013  r6 : c131ec30  r5 : ef787d40  r4 : f0b2dd64
[   57.350217] r3 : 00000000  r2 : 00000000  r1 : f0b2dd64  r0 : ef787d84
[   57.350230] Flags: NzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   57.350251] Control: 10c5387d  Table: 81d8004a  DAC: 00000051
[   57.350261] Register r0 information: non-slab/vmalloc memory
[   57.350283] Register r1 information: 2-page vmalloc region starting at 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
[   57.350322] Register r2 information: NULL pointer
[   57.350337] Register r3 information: NULL pointer
[   57.350350] Register r4 information: 2-page vmalloc region starting at 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
[   57.350379] Register r5 information: non-slab/vmalloc memory
[   57.350394] Register r6 information: non-slab/vmalloc memory
[   57.350408] Register r7 information: non-paged memory
[   57.350422] Register r8 information: non-slab/vmalloc memory
[   57.350436] Register r9 information: NULL pointer
[   57.350449] Register r10 information: non-slab/vmalloc memory
[   57.350463] Register r11 information: NULL pointer
[   57.350477] Register r12 information: non-paged memory
[   57.350491] Process rtcwake (pid: 589, stack limit = 0x410bb531)
[   57.350510] Stack: (0xf0b2dd20 to 0xf0b2e000)
[   57.350534] dd20: 00000000 c1ee4a40 f0b2dd7c c0184f24 ef781495 3b5870ef c1ee4a40 c1ee4a40
[   57.350555] dd40: c131ec30 00000000 00000002 c0f3d1fc c3542ac0 c2abbb10 c1ee4a40 c0199044
[   57.350574] dd60: 60070013 00000000 c0195924 00000000 00000000 f0b2dd74 f0b2dd74 3b5870ef
[   57.350592] dd80: 00000000 c131ebc0 c120ab28 c0146d9c c2785b94 c2785b40 c0fee9f4 c0872590
[   57.350611] dda0: c2785b40 c08c39cc c2785b40 c08c3a3c c2788c00 c08c40b0 c0f3d1fc c066f028
[   57.350630] ddc0: f0b2de14 c2788c00 c1325ef4 c08c1d40 c2788c00 c1325ef4 c0fee9f4 c08c31cc
[   57.350648] dde0: c13708a0 0000000d 00000000 c0681c10 c16afe84 00000002 56508788 0000000d
[   57.350665] de00: 00000002 c13708a0 10624dd3 56409580 0000000d 00000000 00000002 c0f3d1fc
[   57.350685] de20: c3542ac0 c2abbb10 c1ee4a40 c06824e4 00000000 ffffa900 00000000 c1386510
[   57.350703] de40: 00000003 00000003 c1204f75 c017e8a8 c3542ac0 c2abbb10 00428228 c0171574
[   57.350721] de60: 00000000 00000000 00000003 3b5870ef c1204f75 00000000 00000003 c137aeb4
[   57.350739] de80: c1204f75 c0f3d1fc c3542ac0 c2abbb10 00428228 c017f380 00000003 c0f38a54
[   57.350757] dea0: 00000003 c1386524 00000004 c017d708 00000004 c2abbb00 00000000 00000000
[   57.350775] dec0: c3542ac0 f0b2df28 c2abbb10 c03305b4 00000000 00000000 c2953c00 c1ee4a40
[   57.350794] dee0: 00429438 00000004 c0d18488 00004004 00000000 c02b1094 00000a55 c1d80010
[   57.350812] df00: c1d80010 00000000 00000000 f0b2df78 01010006 00000004 00000000 00429438
[   57.350830] df20: 00000000 00000000 c2953c00 00000000 00000000 00000000 00000000 00000000
[   57.350848] df40: 00000000 00004004 00000000 00000000 0000006c 3b5870ef c2953c00 c2953c00
[   57.350866] df60: 00000000 00000000 c1ee4a40 00429438 00000004 c02b12c8 00000000 00000000
[   57.350885] df80: 00001008 3b5870ef 0000006c 00429438 00428228 00000004 c0100324 c1ee4a40
[   57.350902] dfa0: 00000004 c01000c0 0000006c 00429438 00000004 00429438 00000004 00000000
[   57.350920] dfc0: 0000006c 00429438 00428228 00000004 00000004 00000004 0041578c 00428228
[   57.350938] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206 600f0030 00000004 00000000 00000000
[   57.350960]  rcu_segcblist_enqueue from srcu_gp_start_if_needed+0xe4/0x544
[   57.351023]  srcu_gp_start_if_needed from __synchronize_srcu.part.6+0x70/0x98
[   57.351084]  __synchronize_srcu.part.6 from srcu_notifier_chain_unregister+0x6c/0xdc
[   57.351155]  srcu_notifier_chain_unregister from cpufreq_unregister_notifier+0x60/0xbc
[   57.351215]  cpufreq_unregister_notifier from tegra_actmon_pause.part.0+0x1c/0x54
[   57.351277]  tegra_actmon_pause.part.0 from tegra_actmon_stop+0x38/0x3c
[   57.351324]  tegra_actmon_stop from tegra_governor_event_handler+0x100/0x11c
[   57.351373]  tegra_governor_event_handler from devfreq_suspend_device+0x64/0xac
[   57.351423]  devfreq_suspend_device from devfreq_suspend+0x30/0x64
[   57.351467]  devfreq_suspend from dpm_suspend+0x34/0x33c
[   57.351506]  dpm_suspend from dpm_suspend_start+0x90/0x98
[   57.351528]  dpm_suspend_start from suspend_devices_and_enter+0xe4/0x93c
[   57.351573]  suspend_devices_and_enter from pm_suspend+0x280/0x3ac
[   57.351614]  pm_suspend from state_store+0x6c/0xc8
[   57.351654]  state_store from kernfs_fop_write_iter+0x118/0x1b4
[   57.351696]  kernfs_fop_write_iter from vfs_write+0x314/0x3d4
[   57.351733]  vfs_write from ksys_write+0xa0/0xd0
[   57.351760]  ksys_write from ret_fast_syscall+0x0/0x54
[   57.351788] Exception stack(0xf0b2dfa8 to 0xf0b2dff0)
[   57.351809] dfa0:                   0000006c 00429438 00000004 00429438 00000004 00000000
[   57.351828] dfc0: 0000006c 00429438 00428228 00000004 00000004 00000004 0041578c 00428228
[   57.351843] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206
[   57.351863] Code: e2833001 e5803034 e5812000 e5903010 (e5831000)
[   57.351875] ---[ end trace 0000000000000000 ]---


I have not dug into this yet and so wanted to see if you have any thoughts on this?

Thanks!
Jon

-- 
nvpublic
Re: [PATCH rcu 11/20] srcu: Move grace-period fields from srcu_struct to srcu_usage
Posted by Linux regression tracking #adding (Thorsten Leemhuis) 2 years, 8 months ago
[CCing the regression list, as it should be in the loop for regressions:
https://docs.kernel.org/admin-guide/reporting-regressions.html]

[TLDR: I'm adding this report to the list of tracked Linux kernel
regressions; the text you find below is based on a few templates
paragraphs you might have encountered already in similar form.
See link in footer if these mails annoy you.]

On 01.06.23 13:14, Jon Hunter wrote:
> 
> On 30/03/2023 23:47, Paul E. McKenney wrote:
>> This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
>> ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
>> from the srcu_struct structure to the srcu_usage structure to reduce
>> the size of the former in order to improve cache locality.
>>
>> Suggested-by: Christoph Hellwig <hch@lst.de>
>> Tested-by: Sachin Sant <sachinp@linux.ibm.com>
>> Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> 
> 
> I have noticed a suspend regression on some of our Tegra boards recently
> with v6.4-rc and interestingly bisect is pointing to this commit. I was
> unable revert this on top of the latest mainline but if I checkout this
> commit suspend fails and if I checkout the previous commit is passes.

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced 95433f726301
#regzbot title rcu: "spinlock bad magic" BUG when the SRCU notifier was
ever used
#regzbot monitor:
https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
#regzbot fix: notifier: Initialize new struct srcu_usage field
#regzbot ignore-activity

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.