This patch adds srcu_read_lock_lite() and srcu_read_unlock_lite(), which
dispense with the read-side smp_mb() but also are restricted to code
regions that RCU is watching. If a given srcu_struct structure uses
srcu_read_lock_lite() and srcu_read_unlock_lite(), it is not permitted
to use any other SRCU read-side marker, before, during, or after.
Another price of light-weight readers is heavier weight grace periods.
Such readers mean that SRCU grace periods on srcu_struct structures
used by light-weight readers will incur at least two calls to
synchronize_rcu(). In addition, normal SRCU grace periods for
light-weight-reader srcu_struct structures never auto-expedite.
Note that expedited SRCU grace periods for light-weight-reader
srcu_struct structures still invoke synchronize_rcu(), not
synchronize_srcu_expedited(). Something about wishing to keep
the IPIs down to a dull roar.
The srcu_read_lock_lite() and srcu_read_unlock_lite() functions may not
(repeat, *not*) be used from NMI handlers, but if this is needed, an
additional flavor of SRCU reader can be added by some future commit.
[ paulmck: Apply Alexei Starovoitov expediting feedback. ]
[ paulmck: Apply kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 51 ++++++++++++++++++++++++-
include/linux/srcutree.h | 1 +
kernel/rcu/srcutree.c | 82 ++++++++++++++++++++++++++++++++++------
3 files changed, 122 insertions(+), 12 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 84daaa33ea0ab..4ba96e2cfa405 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
+#ifdef CONFIG_TINY_SRCU
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+#else // #ifdef CONFIG_TINY_SRCU
+int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
@@ -179,7 +186,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
#else
-static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { }
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
#endif
@@ -249,6 +256,32 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
return retval;
}
+/**
+ * srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader. See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_lite() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after. Note that grace-period auto-expediting is disabled for _lite
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_lite() can be invoked only from those contexts
+ * where RCU is watching. Otherwise, lockdep will complain.
+ */
+static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
+{
+ int retval;
+
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ retval = __srcu_read_lock_lite(ssp);
+ rcu_try_lock_acquire(&ssp->dep_map);
+ return retval;
+}
+
/**
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
@@ -325,6 +358,22 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__srcu_read_unlock(ssp, idx);
}
+/**
+ * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ srcu_lock_release(&ssp->dep_map);
+ __srcu_read_unlock(ssp, idx);
+}
+
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 79ad809c7f035..8074138cbd624 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -46,6 +46,7 @@ struct srcu_data {
/* Values for ->srcu_reader_flavor. */
#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
+#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
/*
* Node in SRCU combining tree, similar in function to rcu_data.
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4c51be484b48a..bf51758cf4a64 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -429,20 +429,29 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp)
}
/*
- * Returns approximate total of the readers' ->srcu_lock_count[] values
- * for the rank of per-CPU counters specified by idx.
+ * Computes approximate total of the readers' ->srcu_lock_count[] values
+ * for the rank of per-CPU counters specified by idx, and returns true if
+ * the caller did the proper barrier (gp), and if the count of the locks
+ * matches that of the unlocks passed in.
*/
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
+static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
{
int cpu;
+ unsigned long mask = 0;
unsigned long sum = 0;
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
- return sum;
+ WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
+ "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
+ if (mask & SRCU_READ_FLAVOR_LITE && !gp)
+ return false;
+ return sum == unlocks;
}
/*
@@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
{
+ bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
unsigned long unlocks;
unlocks = srcu_readers_unlock_idx(ssp, idx);
@@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* unlock is counted. Needs to be a smp_mb() as the read side may
* contain a read from a variable that is written to before the
* synchronize_srcu() in the write side. In this case smp_mb()s
- * A and B act like the store buffering pattern.
+ * A and B (or X and Y) act like the store buffering pattern.
*
- * This smp_mb() also pairs with smp_mb() C to prevent accesses
- * after the synchronize_srcu() from being executed before the
- * grace period ends.
+ * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
+ * Z) to prevent accesses after the synchronize_srcu() from being
+ * executed before the grace period ends.
*/
- smp_mb(); /* A */
+ if (!did_gp)
+ smp_mb(); /* A */
+ else
+ synchronize_rcu(); /* X */
/*
* If the locks are the same as the unlocks, then there must have
@@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* which are unlikely to be configured with an address space fully
* populated with memory, at least not anytime soon.
*/
- return srcu_readers_lock_idx(ssp, idx) == unlocks;
+ return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
}
/**
@@ -750,6 +763,47 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Returns an index that must be passed to the matching
+ * srcu_read_unlock_lite().
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+int __srcu_read_lock_lite(struct srcu_struct *ssp)
+{
+ int idx;
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
+ idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+ this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+ barrier(); /* Avoid leaking the critical section. */
+ return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock_lite);
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct. Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_lite(), but it must be within the same task.
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+{
+ barrier(); /* Avoid leaking the critical section. */
+ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock_lite);
+
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
/*
@@ -1134,6 +1188,8 @@ static void srcu_flip(struct srcu_struct *ssp)
* it stays until either (1) Compilers learn about this sort of
* control dependency or (2) Some production workload running on
* a production system is unduly delayed by this slowpath smp_mb().
+ * Except for _lite() readers, where it is inoperative, which
+ * means that it is a good thing that it is redundant.
*/
smp_mb(); /* E */ /* Pairs with B and C. */
@@ -1152,7 +1208,8 @@ static void srcu_flip(struct srcu_struct *ssp)
/*
* If SRCU is likely idle, in other words, the next SRCU grace period
- * should be expedited, return true, otherwise return false.
+ * should be expedited, return true, otherwise return false. Except that
+ * in the presence of _lite() readers, always return false.
*
* Note that it is OK for several current from-idle requests for a new
* grace period from idle to specify expediting because they will all end
@@ -1181,6 +1238,9 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
unsigned long tlast;
check_init_srcu_struct(ssp);
+ /* If _lite() readers, don't do unsolicited expediting. */
+ if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
+ return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
spin_lock_irqsave_rcu_node(sdp, flags);
--
2.40.1
> > /* > - * Returns approximate total of the readers' ->srcu_lock_count[] values > - * for the rank of per-CPU counters specified by idx. > + * Computes approximate total of the readers' ->srcu_lock_count[] values > + * for the rank of per-CPU counters specified by idx, and returns true if > + * the caller did the proper barrier (gp), and if the count of the locks > + * matches that of the unlocks passed in. > */ > -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > { > int cpu; > + unsigned long mask = 0; > unsigned long sum = 0; > > for_each_possible_cpu(cpu) { > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > + if (IS_ENABLED(CONFIG_PROVE_RCU)) > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > } > - return sum; > + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); I am trying to understand the (unlikely) case where synchronize_srcu() is done before any srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the updates? > + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > + return false; So, srcu_readers_active_idx_check() can potentially return false for very long time, until the CPU executing srcu_readers_active_idx_check() does at least one read lock/unlock lite call? > + return sum == unlocks; > } > > /* > @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > */ > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > { > + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we need the reader flavor information for srcu lite variant to work. So, lite variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something obvious here? - Neeraj > unsigned long unlocks; > > unlocks = srcu_readers_unlock_idx(ssp, idx); > @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > * unlock is counted. Needs to be a smp_mb() as the read side may > * contain a read from a variable that is written to before the > * synchronize_srcu() in the write side. In this case smp_mb()s > - * A and B act like the store buffering pattern. > + * A and B (or X and Y) act like the store buffering pattern. > * > - * This smp_mb() also pairs with smp_mb() C to prevent accesses > - * after the synchronize_srcu() from being executed before the > - * grace period ends. > + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > + * Z) to prevent accesses after the synchronize_srcu() from being > + * executed before the grace period ends. > */ > - smp_mb(); /* A */ > + if (!did_gp) > + smp_mb(); /* A */ > + else > + synchronize_rcu(); /* X */ > > /* > * If the locks are the same as the unlocks, then there must have > @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > * which are unlikely to be configured with an address space fully > * populated with memory, at least not anytime soon. > */ > - return srcu_readers_lock_idx(ssp, idx) == unlocks; > + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > } >
On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: > > > > > /* > > - * Returns approximate total of the readers' ->srcu_lock_count[] values > > - * for the rank of per-CPU counters specified by idx. > > + * Computes approximate total of the readers' ->srcu_lock_count[] values > > + * for the rank of per-CPU counters specified by idx, and returns true if > > + * the caller did the proper barrier (gp), and if the count of the locks > > + * matches that of the unlocks passed in. > > */ > > -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > > +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > > { > > int cpu; > > + unsigned long mask = 0; > > unsigned long sum = 0; > > > > for_each_possible_cpu(cpu) { > > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > > > sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > > + if (IS_ENABLED(CONFIG_PROVE_RCU)) > > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > > } > > - return sum; > > + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > > + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > > I am trying to understand the (unlikely) case where synchronize_srcu() is done before any > srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the > updates? If a SRCU reader fail to observe the index flip, then isn't it the case that the synchronize_rcu() invoked from srcu_readers_active_idx_check() must wait on it? > > + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > > + return false; > > So, srcu_readers_active_idx_check() can potentially return false for very long > time, until the CPU executing srcu_readers_active_idx_check() does > at least one read lock/unlock lite call? That is correct. The theory is that until after an srcu_read_lock_lite() has executed, there is no need to wait on it. Does the practice match the theory in this case, or is there some sequence of events that I missed? > > + return sum == unlocks; > > } > > > > /* > > @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > > */ > > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > { > > + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > > sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we > need the reader flavor information for srcu lite variant to work. So, lite > variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something > obvious here? At first glance, it appears that I am the one who missed something obvious. Including in testing, which failed to uncover this issue. Thank you for the careful reviews! Thanx, Paul > - Neeraj > > > unsigned long unlocks; > > > > unlocks = srcu_readers_unlock_idx(ssp, idx); > > @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > * unlock is counted. Needs to be a smp_mb() as the read side may > > * contain a read from a variable that is written to before the > > * synchronize_srcu() in the write side. In this case smp_mb()s > > - * A and B act like the store buffering pattern. > > + * A and B (or X and Y) act like the store buffering pattern. > > * > > - * This smp_mb() also pairs with smp_mb() C to prevent accesses > > - * after the synchronize_srcu() from being executed before the > > - * grace period ends. > > + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > > + * Z) to prevent accesses after the synchronize_srcu() from being > > + * executed before the grace period ends. > > */ > > - smp_mb(); /* A */ > > + if (!did_gp) > > + smp_mb(); /* A */ > > + else > > + synchronize_rcu(); /* X */ > > > > /* > > * If the locks are the same as the unlocks, then there must have > > @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > * which are unlikely to be configured with an address space fully > > * populated with memory, at least not anytime soon. > > */ > > - return srcu_readers_lock_idx(ssp, idx) == unlocks; > > + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > > } > > >
On 11/11/2024 8:56 PM, Paul E. McKenney wrote: > On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: >> >>> >>> /* >>> - * Returns approximate total of the readers' ->srcu_lock_count[] values >>> - * for the rank of per-CPU counters specified by idx. >>> + * Computes approximate total of the readers' ->srcu_lock_count[] values >>> + * for the rank of per-CPU counters specified by idx, and returns true if >>> + * the caller did the proper barrier (gp), and if the count of the locks >>> + * matches that of the unlocks passed in. >>> */ >>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) >>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) >>> { >>> int cpu; >>> + unsigned long mask = 0; >>> unsigned long sum = 0; >>> >>> for_each_possible_cpu(cpu) { >>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); >>> >>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]); >>> + if (IS_ENABLED(CONFIG_PROVE_RCU)) >>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); >>> } >>> - return sum; >>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), >>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); >> >> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any >> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the >> updates? > > If a SRCU reader fail to observe the index flip, then isn't it the case > that the synchronize_rcu() invoked from srcu_readers_active_idx_check() > must wait on it? > Below is the sequence of operations I was thinking of, where at step 4 CPU2 reads old pointer ptr = old CPU1 CPU2 1. Update ptr = new 2. synchronize_srcu() <Does not use synchronize_rcu() as SRCU_READ_FLAVOR_LITE is not set for any sdp as srcu_read_lock_lite() hasn't been called by any CPU> 3. srcu_read_lock_lite() <No smp_mb() ordering> 4. Can read ptr == old ? >>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) >>> + return false; >> >> So, srcu_readers_active_idx_check() can potentially return false for very long >> time, until the CPU executing srcu_readers_active_idx_check() does >> at least one read lock/unlock lite call? > > That is correct. The theory is that until after an srcu_read_lock_lite() > has executed, there is no need to wait on it. Does the practice match the > theory in this case, or is there some sequence of events that I missed? > Below sequence CPU1 CPU2 1. srcu_read_lock_lite() 2. srcu_read_unlock_lite() 3. synchronize_srcu() 3.1 srcu_readers_lock_idx() is called with gp = false as srcu_read_lock_lite() was never called on this CPU for this srcu_struct. So ssp->sda->srcu_reader_flavor is not set for CPU1's sda. 3.2 Inside srcu_readers_lock_idx() "mask" contains SRCU_READ_FLAVOR_LITE as CPU2's sdp->srcu_reader_flavor has it. 3.3 CPU1 keeps returning false from below check until CPU1 does at least one srcu_read_lock_lite() call or the thread migrates. if (mask & SRCU_READ_FLAVOR_LITE && !gp) return false; >>> + return sum == unlocks; >>> } >>> >>> /* >>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) >>> */ >>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> { >>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); >> >> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we >> need the reader flavor information for srcu lite variant to work. So, lite >> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something >> obvious here? > > At first glance, it appears that I am the one who missed something obvious. > Including in testing, which failed to uncover this issue. > > Thank you for the careful reviews! > Sure thing, no problem! - Neeraj > Thanx, Paul > >> - Neeraj >> >>> unsigned long unlocks; >>> >>> unlocks = srcu_readers_unlock_idx(ssp, idx); >>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> * unlock is counted. Needs to be a smp_mb() as the read side may >>> * contain a read from a variable that is written to before the >>> * synchronize_srcu() in the write side. In this case smp_mb()s >>> - * A and B act like the store buffering pattern. >>> + * A and B (or X and Y) act like the store buffering pattern. >>> * >>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses >>> - * after the synchronize_srcu() from being executed before the >>> - * grace period ends. >>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, >>> + * Z) to prevent accesses after the synchronize_srcu() from being >>> + * executed before the grace period ends. >>> */ >>> - smp_mb(); /* A */ >>> + if (!did_gp) >>> + smp_mb(); /* A */ >>> + else >>> + synchronize_rcu(); /* X */ >>> >>> /* >>> * If the locks are the same as the unlocks, then there must have >>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>> * which are unlikely to be configured with an address space fully >>> * populated with memory, at least not anytime soon. >>> */ >>> - return srcu_readers_lock_idx(ssp, idx) == unlocks; >>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); >>> } >>> >>
On Mon, Nov 11, 2024 at 10:21:58PM +0530, Neeraj Upadhyay wrote: > On 11/11/2024 8:56 PM, Paul E. McKenney wrote: > > On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote: > >>> /* > >>> - * Returns approximate total of the readers' ->srcu_lock_count[] values > >>> - * for the rank of per-CPU counters specified by idx. > >>> + * Computes approximate total of the readers' ->srcu_lock_count[] values > >>> + * for the rank of per-CPU counters specified by idx, and returns true if > >>> + * the caller did the proper barrier (gp), and if the count of the locks > >>> + * matches that of the unlocks passed in. > >>> */ > >>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) > >>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) > >>> { > >>> int cpu; > >>> + unsigned long mask = 0; > >>> unsigned long sum = 0; > >>> > >>> for_each_possible_cpu(cpu) { > >>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > >>> > >>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]); > >>> + if (IS_ENABLED(CONFIG_PROVE_RCU)) > >>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > >>> } > >>> - return sum; > >>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > >>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > >> > >> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any > >> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the > >> updates? > > > > If a SRCU reader fail to observe the index flip, then isn't it the case > > that the synchronize_rcu() invoked from srcu_readers_active_idx_check() > > must wait on it? > > Below is the sequence of operations I was thinking of, where at step 4 CPU2 > reads old pointer > > ptr = old > > > CPU1 CPU2 > > 1. Update ptr = new > > 2. synchronize_srcu() > > <Does not use synchronize_rcu() > as SRCU_READ_FLAVOR_LITE is not > set for any sdp as srcu_read_lock_lite() > hasn't been called by any CPU> > > 3. srcu_read_lock_lite() > <No smp_mb() ordering> > > 4. Can read ptr == old ? As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix to the wrong-CPU issue you quite rightly point out below, no it cannot. The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update ->srcu_reader_flavor, which will place full ordering between that update and the later read from "ptr". So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE bit, then the reader must see the new value of "ptr". Similarly, if the reader can see the old value of "ptr", then synchronize_srcu() must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit. But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed for this to work. Please see the upcoming patches to be posted as a reply to this email. > >>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) > >>> + return false; > >> > >> So, srcu_readers_active_idx_check() can potentially return false for very long > >> time, until the CPU executing srcu_readers_active_idx_check() does > >> at least one read lock/unlock lite call? > > > > That is correct. The theory is that until after an srcu_read_lock_lite() > > has executed, there is no need to wait on it. Does the practice match the > > theory in this case, or is there some sequence of events that I missed? > > Below sequence > > CPU1 CPU2 > 1. srcu_read_lock_lite() > > > 2. srcu_read_unlock_lite() > > 3. synchronize_srcu() > > 3.1 srcu_readers_lock_idx() is > called with gp = false as > srcu_read_lock_lite() was never > called on this CPU for this > srcu_struct. So > ssp->sda->srcu_reader_flavor is not > set for CPU1's sda. Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters must also OR together the ->srcu_reader_flavor fields. > 3.2 Inside srcu_readers_lock_idx() > "mask" contains SRCU_READ_FLAVOR_LITE > as CPU2's sdp->srcu_reader_flavor has it. > > 3.3 CPU1 keeps returning false from > below check until CPU1 does at least > one srcu_read_lock_lite() call or > the thread migrates. > > if (mask & SRCU_READ_FLAVOR_LITE && !gp) > return false; This is also fixed by the OR of the ->srcu_reader_flavor fields, correct? I guess I could claim that this bug prevents the wrong-CPU bug above from resulting in a too-short SRCU grace period, but it is of course better to just fix the bugs. ;-) > >>> + return sum == unlocks; > >>> } > >>> > >>> /* > >>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > >>> */ > >>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> { > >>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > >> > >> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we > >> need the reader flavor information for srcu lite variant to work. So, lite > >> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something > >> obvious here? > > > > At first glance, it appears that I am the one who missed something obvious. > > Including in testing, which failed to uncover this issue. > > > > Thank you for the careful reviews! > > Sure thing, no problem! And again, thank you! Thanx, Paul > >>> unsigned long unlocks; > >>> > >>> unlocks = srcu_readers_unlock_idx(ssp, idx); > >>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> * unlock is counted. Needs to be a smp_mb() as the read side may > >>> * contain a read from a variable that is written to before the > >>> * synchronize_srcu() in the write side. In this case smp_mb()s > >>> - * A and B act like the store buffering pattern. > >>> + * A and B (or X and Y) act like the store buffering pattern. > >>> * > >>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses > >>> - * after the synchronize_srcu() from being executed before the > >>> - * grace period ends. > >>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X, > >>> + * Z) to prevent accesses after the synchronize_srcu() from being > >>> + * executed before the grace period ends. > >>> */ > >>> - smp_mb(); /* A */ > >>> + if (!did_gp) > >>> + smp_mb(); /* A */ > >>> + else > >>> + synchronize_rcu(); /* X */ > >>> > >>> /* > >>> * If the locks are the same as the unlocks, then there must have > >>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > >>> * which are unlikely to be configured with an address space fully > >>> * populated with memory, at least not anytime soon. > >>> */ > >>> - return srcu_readers_lock_idx(ssp, idx) == unlocks; > >>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks); > >>> } > >>> > >>
>>>> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any >>>> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the >>>> updates? >>> >>> If a SRCU reader fail to observe the index flip, then isn't it the case >>> that the synchronize_rcu() invoked from srcu_readers_active_idx_check() >>> must wait on it? >> >> Below is the sequence of operations I was thinking of, where at step 4 CPU2 >> reads old pointer >> >> ptr = old >> >> >> CPU1 CPU2 >> >> 1. Update ptr = new >> >> 2. synchronize_srcu() >> >> <Does not use synchronize_rcu() >> as SRCU_READ_FLAVOR_LITE is not >> set for any sdp as srcu_read_lock_lite() >> hasn't been called by any CPU> >> >> 3. srcu_read_lock_lite() >> <No smp_mb() ordering> >> >> 4. Can read ptr == old ? > > As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix > to the wrong-CPU issue you quite rightly point out below, no it cannot. > The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update > ->srcu_reader_flavor, which will place full ordering between that update > and the later read from "ptr". > > So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE > bit, then the reader must see the new value of "ptr". Similarly, > if the reader can see the old value of "ptr", then synchronize_srcu() > must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit. > > But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed > for this to work. Please see the upcoming patches to be posted as a > reply to this email. > Got it. It will be good to document how full ordering provided by cmpxchg() helps here. >>>>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp) >>>>> + return false; >>>> >>>> So, srcu_readers_active_idx_check() can potentially return false for very long >>>> time, until the CPU executing srcu_readers_active_idx_check() does >>>> at least one read lock/unlock lite call? >>> >>> That is correct. The theory is that until after an srcu_read_lock_lite() >>> has executed, there is no need to wait on it. Does the practice match the >>> theory in this case, or is there some sequence of events that I missed? >> >> Below sequence >> >> CPU1 CPU2 >> 1. srcu_read_lock_lite() >> >> >> 2. srcu_read_unlock_lite() >> >> 3. synchronize_srcu() >> >> 3.1 srcu_readers_lock_idx() is >> called with gp = false as >> srcu_read_lock_lite() was never >> called on this CPU for this >> srcu_struct. So >> ssp->sda->srcu_reader_flavor is not >> set for CPU1's sda. > > Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters > must also OR together the ->srcu_reader_flavor fields. > Agree >> 3.2 Inside srcu_readers_lock_idx() >> "mask" contains SRCU_READ_FLAVOR_LITE >> as CPU2's sdp->srcu_reader_flavor has it. >> >> 3.3 CPU1 keeps returning false from >> below check until CPU1 does at least >> one srcu_read_lock_lite() call or >> the thread migrates. >> >> if (mask & SRCU_READ_FLAVOR_LITE && !gp) >> return false; > > This is also fixed by the OR of the ->srcu_reader_flavor fields, correct? > Yes, agree. > I guess I could claim that this bug prevents the wrong-CPU bug above > from resulting in a too-short SRCU grace period, but it is of course > better to just fix the bugs. ;-) > >>>>> + return sum == unlocks; >>>>> } >>>>> >>>>> /* >>>>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) >>>>> */ >>>>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) >>>>> { >>>>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); >>>> >>>> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we >>>> need the reader flavor information for srcu lite variant to work. So, lite >>>> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something >>>> obvious here? >>> >>> At first glance, it appears that I am the one who missed something obvious. >>> Including in testing, which failed to uncover this issue. >>> >>> Thank you for the careful reviews! >> >> Sure thing, no problem! > > And again, thank you! > Again, no problem! - Neeraj
The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead
of __srcu_read_unlock_lite(), which means that it is doing an unnecessary
smp_mb(). This is harmless other than the performance degradation.
This commit therefore switches to __srcu_read_unlock_lite().
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
include/linux/srcu.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 512a8c54ba5ba..fff6265818d4f 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -373,7 +373,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
WARN_ON_ONCE(idx & ~0x1);
srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
srcu_lock_release(&ssp->dep_map);
- __srcu_read_unlock(ssp, idx);
+ __srcu_read_unlock_lite(ssp, idx);
}
/**
--
2.40.1
On 11/12/2024 7:01 AM, Paul E. McKenney wrote: > The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead > of __srcu_read_unlock_lite(), which means that it is doing an unnecessary > smp_mb(). This is harmless other than the performance degradation. > > This commit therefore switches to __srcu_read_unlock_lite(). > > Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> > Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/ > Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()") > Signed-off-by: Paul E. McKenney <paulmck@kernel.org> > Cc: Frederic Weisbecker <frederic@kernel.org> > --- Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> - Neeraj
If srcu_read_lock_lite() is used on a given srcu_struct structure, then
the grace-period processing must to synchronize_rcu() instead of smp_mb()
between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[]
counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE
bit of the ->srcu_reader_flavor mask, which works well. But only if
the CPU running that srcu_struct structure's grace period has previously
executed srcu_read_lock_lite(), which might not be the case, especially
just after that srcu_struct structure has been created and initialized.
This commit therefore updates the srcu_readers_unlock_idx() function
to OR together the ->srcu_reader_flavor masks from all CPUs, and
then make the srcu_readers_active_idx_check() function that test the
SRCU_READ_FLAVOR_LITE bit in the resulting mask.
Note that the srcu_readers_unlock_idx() function is already scanning all
the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on
the grace-period slow path, hence no concerns about the small amount of
extra work.
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
kernel/rcu/srcutree.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 70979f294768c..5991381b44383 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
* Returns approximate total of the readers' ->srcu_unlock_count[] values
* for the rank of per-CPU counters specified by idx.
*/
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
{
int cpu;
unsigned long mask = 0;
@@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
- if (IS_ENABLED(CONFIG_PROVE_RCU))
- mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
+ mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
+ *rdm = mask;
return sum;
}
@@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
{
- bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
+ unsigned long rdm;
unsigned long unlocks;
- unlocks = srcu_readers_unlock_idx(ssp, idx);
+ unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
+ bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
/*
* Make sure that a lock is always counted if the corresponding
--
2.40.1
On 11/12/2024 7:01 AM, Paul E. McKenney wrote: > If srcu_read_lock_lite() is used on a given srcu_struct structure, then > the grace-period processing must to synchronize_rcu() instead of smp_mb() s/to/do/ > between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[] > counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE > bit of the ->srcu_reader_flavor mask, which works well. But only if > the CPU running that srcu_struct structure's grace period has previously > executed srcu_read_lock_lite(), which might not be the case, especially > just after that srcu_struct structure has been created and initialized. > > This commit therefore updates the srcu_readers_unlock_idx() function > to OR together the ->srcu_reader_flavor masks from all CPUs, and > then make the srcu_readers_active_idx_check() function that test the > SRCU_READ_FLAVOR_LITE bit in the resulting mask. > > Note that the srcu_readers_unlock_idx() function is already scanning all > the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on > the grace-period slow path, hence no concerns about the small amount of > extra work. > > Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> > Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/ > Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()") > Signed-off-by: Paul E. McKenney <paulmck@kernel.org> > Cc: Frederic Weisbecker <frederic@kernel.org> > --- > kernel/rcu/srcutree.c | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c > index 70979f294768c..5991381b44383 100644 > --- a/kernel/rcu/srcutree.c > +++ b/kernel/rcu/srcutree.c > @@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns > * Returns approximate total of the readers' ->srcu_unlock_count[] values > * for the rank of per-CPU counters specified by idx. > */ > -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm) > { > int cpu; > unsigned long mask = 0; > @@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > sum += atomic_long_read(&sdp->srcu_unlock_count[idx]); > - if (IS_ENABLED(CONFIG_PROVE_RCU)) > - mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > } > WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > + *rdm = mask; > return sum; > } > > @@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > */ > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > { > - bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > + unsigned long rdm; > unsigned long unlocks; > > - unlocks = srcu_readers_unlock_idx(ssp, idx); > + unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm); > + bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE); Move "did_gp" declaration up? Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> -Neeraj
On Tue, Nov 12, 2024 at 08:58:08AM +0530, Neeraj Upadhyay wrote: > On 11/12/2024 7:01 AM, Paul E. McKenney wrote: > > If srcu_read_lock_lite() is used on a given srcu_struct structure, then > > the grace-period processing must to synchronize_rcu() instead of smp_mb() > > s/to/do/ Good eyes, fixed! > > between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[] > > counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE > > bit of the ->srcu_reader_flavor mask, which works well. But only if > > the CPU running that srcu_struct structure's grace period has previously > > executed srcu_read_lock_lite(), which might not be the case, especially > > just after that srcu_struct structure has been created and initialized. > > > > This commit therefore updates the srcu_readers_unlock_idx() function > > to OR together the ->srcu_reader_flavor masks from all CPUs, and > > then make the srcu_readers_active_idx_check() function that test the > > SRCU_READ_FLAVOR_LITE bit in the resulting mask. > > > > Note that the srcu_readers_unlock_idx() function is already scanning all > > the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on > > the grace-period slow path, hence no concerns about the small amount of > > extra work. > > > > Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> > > Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/ > > Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()") > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org> > > Cc: Frederic Weisbecker <frederic@kernel.org> > > --- > > kernel/rcu/srcutree.c | 11 ++++++----- > > 1 file changed, 6 insertions(+), 5 deletions(-) > > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c > > index 70979f294768c..5991381b44383 100644 > > --- a/kernel/rcu/srcutree.c > > +++ b/kernel/rcu/srcutree.c > > @@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns > > * Returns approximate total of the readers' ->srcu_unlock_count[] values > > * for the rank of per-CPU counters specified by idx. > > */ > > -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > > +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm) > > { > > int cpu; > > unsigned long mask = 0; > > @@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); > > > > sum += atomic_long_read(&sdp->srcu_unlock_count[idx]); > > - if (IS_ENABLED(CONFIG_PROVE_RCU)) > > - mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor); > > } > > WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), > > "Mixed reader flavors for srcu_struct at %ps.\n", ssp); > > + *rdm = mask; > > return sum; > > } > > > > @@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) > > */ > > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) > > { > > - bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE); > > + unsigned long rdm; > > unsigned long unlocks; > > > > - unlocks = srcu_readers_unlock_idx(ssp, idx); > > + unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm); > > + bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE); > > Move "did_gp" declaration up? C now allows this? ;-) Fixed! > Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> And applied all three, again, thank you! Thanx, Paul
Currently, srcu_read_lock_lite() uses the SRCU_READ_FLAVOR_LITE bit in
->srcu_reader_flavor to communicate to the grace-period processing in
srcu_readers_active_idx_check() that the smp_mb() must be replaced by a
synchronize_rcu(). Unfortunately, ->srcu_reader_flavor is not updated
unless the kernel is built with CONFIG_PROVE_RCU=y. Therefore in all
kernels built with CONFIG_PROVE_RCU=n, srcu_readers_active_idx_check()
incorrectly uses smp_mb() instead of synchronize_rcu() for srcu_struct
structures whose readers use srcu_read_lock_lite().
This commit therefore causes Tree SRCU srcu_read_lock_lite()
to unconditionally update ->srcu_reader_flavor so that
srcu_readers_active_idx_check() can make the correct choice.
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()"
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
include/linux/srcu.h | 8 +-------
include/linux/srcutiny.h | 3 +++
include/linux/srcutree.h | 21 +++++++++++++++++++++
kernel/rcu/srcutree.c | 6 ++----
4 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index fff6265818d4f..abaddd7e6ddf7 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -183,12 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
-void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-#else
-#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
-#endif
-
/**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
@@ -278,7 +272,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ srcu_check_read_flavor_lite(ssp);
retval = __srcu_read_lock_lite(ssp);
rcu_try_lock_acquire(&ssp->dep_map);
return retval;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4d96bbdb45f08..1321da803274d 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
synchronize_srcu(ssp);
}
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
+#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+
/* Defined here to avoid size increase for non-torture kernels. */
static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
char *tt, char *tf)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 778eb61542e18..490aeecc6bb47 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -248,4 +248,25 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
}
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
+
+// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
+static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+{
+ struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+ if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+ return;
+
+ // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
+ __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+}
+
+// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
+static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+{
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ __srcu_check_read_flavor(ssp, read_flavor);
+}
+
#endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 5991381b44383..ee3cbb8308664 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -711,11 +711,10 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-#ifdef CONFIG_PROVE_RCU
/*
* Check for consistent reader flavor.
*/
-void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
{
int old_read_flavor;
struct srcu_data *sdp;
@@ -733,8 +732,7 @@ void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
}
WARN_ONCE(old_read_flavor != read_flavor, "CPU %d old state %d new state %d\n", sdp->cpu, old_read_flavor, read_flavor);
}
-EXPORT_SYMBOL_GPL(srcu_check_read_flavor);
-#endif /* CONFIG_PROVE_RCU */
+EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
/*
* Counts the new reader in the appropriate per-CPU element of the
--
2.40.1
... > +void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); > + > +// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. > +static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) > +{ > + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); > + > + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) > + return; > + > + // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. Nit: s/srcu_check_read_flavor/__srcu_check_read_flavor/ Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> - Neeraj > + __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); > +} > +
© 2016 - 2024 Red Hat, Inc.