This patch adds srcu_read_lock_lite() and srcu_read_unlock_lite(), which
dispense with the read-side smp_mb() but also are restricted to code
regions that RCU is watching. If a given srcu_struct structure uses
srcu_read_lock_lite() and srcu_read_unlock_lite(), it is not permitted
to use any other SRCU read-side marker, before, during, or after.
Another price of light-weight readers is heavier weight grace periods.
Such readers mean that SRCU grace periods on srcu_struct structures
used by light-weight readers will incur at least two calls to
synchronize_rcu(). In addition, normal SRCU grace periods for
light-weight-reader srcu_struct structures never auto-expedite.
Note that expedited SRCU grace periods for light-weight-reader
srcu_struct structures still invoke synchronize_rcu(), not
synchronize_srcu_expedited(). Something about wishing to keep
the IPIs down to a dull roar.
The srcu_read_lock_lite() and srcu_read_unlock_lite() functions may not
(repeat, *not*) be used from NMI handlers, but if this is needed, an
additional flavor of SRCU reader can be added by some future commit.
[ paulmck: Apply Alexei Starovoitov expediting feedback. ]
[ paulmck: Apply kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 51 ++++++++++++++++++++++++-
include/linux/srcutree.h | 1 +
kernel/rcu/srcutree.c | 82 ++++++++++++++++++++++++++++++++++------
3 files changed, 122 insertions(+), 12 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 84daaa33ea0ab..4ba96e2cfa405 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
+#ifdef CONFIG_TINY_SRCU
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+#else // #ifdef CONFIG_TINY_SRCU
+int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
@@ -179,7 +186,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
#else
-static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { }
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
#endif
@@ -249,6 +256,32 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
return retval;
}
+/**
+ * srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader. See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_lite() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after. Note that grace-period auto-expediting is disabled for _lite
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_lite() can be invoked only from those contexts
+ * where RCU is watching. Otherwise, lockdep will complain.
+ */
+static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
+{
+ int retval;
+
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ retval = __srcu_read_lock_lite(ssp);
+ rcu_try_lock_acquire(&ssp->dep_map);
+ return retval;
+}
+
/**
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
@@ -325,6 +358,22 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__srcu_read_unlock(ssp, idx);
}
+/**
+ * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ srcu_lock_release(&ssp->dep_map);
+ __srcu_read_unlock(ssp, idx);
+}
+
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 79ad809c7f035..8074138cbd624 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -46,6 +46,7 @@ struct srcu_data {
/* Values for ->srcu_reader_flavor. */
#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
+#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
/*
* Node in SRCU combining tree, similar in function to rcu_data.
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4c51be484b48a..bf51758cf4a64 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -429,20 +429,29 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp)
}
/*
- * Returns approximate total of the readers' ->srcu_lock_count[] values
- * for the rank of per-CPU counters specified by idx.
+ * Computes approximate total of the readers' ->srcu_lock_count[] values
+ * for the rank of per-CPU counters specified by idx, and returns true if
+ * the caller did the proper barrier (gp), and if the count of the locks
+ * matches that of the unlocks passed in.
*/
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
+static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
{
int cpu;
+ unsigned long mask = 0;
unsigned long sum = 0;
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
- return sum;
+ WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
+ "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
+ if (mask & SRCU_READ_FLAVOR_LITE && !gp)
+ return false;
+ return sum == unlocks;
}
/*
@@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
{
+ bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
unsigned long unlocks;
unlocks = srcu_readers_unlock_idx(ssp, idx);
@@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* unlock is counted. Needs to be a smp_mb() as the read side may
* contain a read from a variable that is written to before the
* synchronize_srcu() in the write side. In this case smp_mb()s
- * A and B act like the store buffering pattern.
+ * A and B (or X and Y) act like the store buffering pattern.
*
- * This smp_mb() also pairs with smp_mb() C to prevent accesses
- * after the synchronize_srcu() from being executed before the
- * grace period ends.
+ * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
+ * Z) to prevent accesses after the synchronize_srcu() from being
+ * executed before the grace period ends.
*/
- smp_mb(); /* A */
+ if (!did_gp)
+ smp_mb(); /* A */
+ else
+ synchronize_rcu(); /* X */
/*
* If the locks are the same as the unlocks, then there must have
@@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* which are unlikely to be configured with an address space fully
* populated with memory, at least not anytime soon.
*/
- return srcu_readers_lock_idx(ssp, idx) == unlocks;
+ return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
}
/**
@@ -750,6 +763,47 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Returns an index that must be passed to the matching
+ * srcu_read_unlock_lite().
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+int __srcu_read_lock_lite(struct srcu_struct *ssp)
+{
+ int idx;
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
+ idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+ this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+ barrier(); /* Avoid leaking the critical section. */
+ return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock_lite);
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct. Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_lite(), but it must be within the same task.
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+{
+ barrier(); /* Avoid leaking the critical section. */
+ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock_lite);
+
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
/*
@@ -1134,6 +1188,8 @@ static void srcu_flip(struct srcu_struct *ssp)
* it stays until either (1) Compilers learn about this sort of
* control dependency or (2) Some production workload running on
* a production system is unduly delayed by this slowpath smp_mb().
+ * Except for _lite() readers, where it is inoperative, which
+ * means that it is a good thing that it is redundant.
*/
smp_mb(); /* E */ /* Pairs with B and C. */
@@ -1152,7 +1208,8 @@ static void srcu_flip(struct srcu_struct *ssp)
/*
* If SRCU is likely idle, in other words, the next SRCU grace period
- * should be expedited, return true, otherwise return false.
+ * should be expedited, return true, otherwise return false. Except that
+ * in the presence of _lite() readers, always return false.
*
* Note that it is OK for several current from-idle requests for a new
* grace period from idle to specify expediting because they will all end
@@ -1181,6 +1238,9 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
unsigned long tlast;
check_init_srcu_struct(ssp);
+ /* If _lite() readers, don't do unsolicited expediting. */
+ if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
+ return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
spin_lock_irqsave_rcu_node(sdp, flags);
--
2.40.1
>
> /*
> - * Returns approximate total of the readers' ->srcu_lock_count[] values
> - * for the rank of per-CPU counters specified by idx.
> + * Computes approximate total of the readers' ->srcu_lock_count[] values
> + * for the rank of per-CPU counters specified by idx, and returns true if
> + * the caller did the proper barrier (gp), and if the count of the locks
> + * matches that of the unlocks passed in.
> */
> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
> {
> int cpu;
> + unsigned long mask = 0;
> unsigned long sum = 0;
>
> for_each_possible_cpu(cpu) {
> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
>
> sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
> + if (IS_ENABLED(CONFIG_PROVE_RCU))
> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> }
> - return sum;
> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
I am trying to understand the (unlikely) case where synchronize_srcu() is done before any
srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the
updates?
> + if (mask & SRCU_READ_FLAVOR_LITE && !gp)
> + return false;
So, srcu_readers_active_idx_check() can potentially return false for very long
time, until the CPU executing srcu_readers_active_idx_check() does
at least one read lock/unlock lite call?
> + return sum == unlocks;
> }
>
> /*
> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> */
> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> {
> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we
need the reader flavor information for srcu lite variant to work. So, lite
variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something
obvious here?
- Neeraj
> unsigned long unlocks;
>
> unlocks = srcu_readers_unlock_idx(ssp, idx);
> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> * unlock is counted. Needs to be a smp_mb() as the read side may
> * contain a read from a variable that is written to before the
> * synchronize_srcu() in the write side. In this case smp_mb()s
> - * A and B act like the store buffering pattern.
> + * A and B (or X and Y) act like the store buffering pattern.
> *
> - * This smp_mb() also pairs with smp_mb() C to prevent accesses
> - * after the synchronize_srcu() from being executed before the
> - * grace period ends.
> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
> + * Z) to prevent accesses after the synchronize_srcu() from being
> + * executed before the grace period ends.
> */
> - smp_mb(); /* A */
> + if (!did_gp)
> + smp_mb(); /* A */
> + else
> + synchronize_rcu(); /* X */
>
> /*
> * If the locks are the same as the unlocks, then there must have
> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> * which are unlikely to be configured with an address space fully
> * populated with memory, at least not anytime soon.
> */
> - return srcu_readers_lock_idx(ssp, idx) == unlocks;
> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
> }
>
On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote:
>
> >
> > /*
> > - * Returns approximate total of the readers' ->srcu_lock_count[] values
> > - * for the rank of per-CPU counters specified by idx.
> > + * Computes approximate total of the readers' ->srcu_lock_count[] values
> > + * for the rank of per-CPU counters specified by idx, and returns true if
> > + * the caller did the proper barrier (gp), and if the count of the locks
> > + * matches that of the unlocks passed in.
> > */
> > -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
> > +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
> > {
> > int cpu;
> > + unsigned long mask = 0;
> > unsigned long sum = 0;
> >
> > for_each_possible_cpu(cpu) {
> > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> >
> > sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
> > + if (IS_ENABLED(CONFIG_PROVE_RCU))
> > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> > }
> > - return sum;
> > + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
> > + "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
>
> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any
> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the
> updates?
If a SRCU reader fail to observe the index flip, then isn't it the case
that the synchronize_rcu() invoked from srcu_readers_active_idx_check()
must wait on it?
> > + if (mask & SRCU_READ_FLAVOR_LITE && !gp)
> > + return false;
>
> So, srcu_readers_active_idx_check() can potentially return false for very long
> time, until the CPU executing srcu_readers_active_idx_check() does
> at least one read lock/unlock lite call?
That is correct. The theory is that until after an srcu_read_lock_lite()
has executed, there is no need to wait on it. Does the practice match the
theory in this case, or is there some sequence of events that I missed?
> > + return sum == unlocks;
> > }
> >
> > /*
> > @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> > */
> > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> > {
> > + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
>
> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we
> need the reader flavor information for srcu lite variant to work. So, lite
> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something
> obvious here?
At first glance, it appears that I am the one who missed something obvious.
Including in testing, which failed to uncover this issue.
Thank you for the careful reviews!
Thanx, Paul
> - Neeraj
>
> > unsigned long unlocks;
> >
> > unlocks = srcu_readers_unlock_idx(ssp, idx);
> > @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> > * unlock is counted. Needs to be a smp_mb() as the read side may
> > * contain a read from a variable that is written to before the
> > * synchronize_srcu() in the write side. In this case smp_mb()s
> > - * A and B act like the store buffering pattern.
> > + * A and B (or X and Y) act like the store buffering pattern.
> > *
> > - * This smp_mb() also pairs with smp_mb() C to prevent accesses
> > - * after the synchronize_srcu() from being executed before the
> > - * grace period ends.
> > + * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
> > + * Z) to prevent accesses after the synchronize_srcu() from being
> > + * executed before the grace period ends.
> > */
> > - smp_mb(); /* A */
> > + if (!did_gp)
> > + smp_mb(); /* A */
> > + else
> > + synchronize_rcu(); /* X */
> >
> > /*
> > * If the locks are the same as the unlocks, then there must have
> > @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> > * which are unlikely to be configured with an address space fully
> > * populated with memory, at least not anytime soon.
> > */
> > - return srcu_readers_lock_idx(ssp, idx) == unlocks;
> > + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
> > }
> >
>
On 11/11/2024 8:56 PM, Paul E. McKenney wrote:
> On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote:
>>
>>>
>>> /*
>>> - * Returns approximate total of the readers' ->srcu_lock_count[] values
>>> - * for the rank of per-CPU counters specified by idx.
>>> + * Computes approximate total of the readers' ->srcu_lock_count[] values
>>> + * for the rank of per-CPU counters specified by idx, and returns true if
>>> + * the caller did the proper barrier (gp), and if the count of the locks
>>> + * matches that of the unlocks passed in.
>>> */
>>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
>>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
>>> {
>>> int cpu;
>>> + unsigned long mask = 0;
>>> unsigned long sum = 0;
>>>
>>> for_each_possible_cpu(cpu) {
>>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
>>>
>>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
>>> + if (IS_ENABLED(CONFIG_PROVE_RCU))
>>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
>>> }
>>> - return sum;
>>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
>>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
>>
>> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any
>> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the
>> updates?
>
> If a SRCU reader fail to observe the index flip, then isn't it the case
> that the synchronize_rcu() invoked from srcu_readers_active_idx_check()
> must wait on it?
>
Below is the sequence of operations I was thinking of, where at step 4 CPU2
reads old pointer
ptr = old
CPU1 CPU2
1. Update ptr = new
2. synchronize_srcu()
<Does not use synchronize_rcu()
as SRCU_READ_FLAVOR_LITE is not
set for any sdp as srcu_read_lock_lite()
hasn't been called by any CPU>
3. srcu_read_lock_lite()
<No smp_mb() ordering>
4. Can read ptr == old ?
>>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp)
>>> + return false;
>>
>> So, srcu_readers_active_idx_check() can potentially return false for very long
>> time, until the CPU executing srcu_readers_active_idx_check() does
>> at least one read lock/unlock lite call?
>
> That is correct. The theory is that until after an srcu_read_lock_lite()
> has executed, there is no need to wait on it. Does the practice match the
> theory in this case, or is there some sequence of events that I missed?
>
Below sequence
CPU1 CPU2
1. srcu_read_lock_lite()
2. srcu_read_unlock_lite()
3. synchronize_srcu()
3.1 srcu_readers_lock_idx() is
called with gp = false as
srcu_read_lock_lite() was never
called on this CPU for this
srcu_struct. So
ssp->sda->srcu_reader_flavor is not
set for CPU1's sda.
3.2 Inside srcu_readers_lock_idx()
"mask" contains SRCU_READ_FLAVOR_LITE
as CPU2's sdp->srcu_reader_flavor has it.
3.3 CPU1 keeps returning false from
below check until CPU1 does at least
one srcu_read_lock_lite() call or
the thread migrates.
if (mask & SRCU_READ_FLAVOR_LITE && !gp)
return false;
>>> + return sum == unlocks;
>>> }
>>>
>>> /*
>>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
>>> */
>>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
>>> {
>>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
>>
>> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we
>> need the reader flavor information for srcu lite variant to work. So, lite
>> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something
>> obvious here?
>
> At first glance, it appears that I am the one who missed something obvious.
> Including in testing, which failed to uncover this issue.
>
> Thank you for the careful reviews!
>
Sure thing, no problem!
- Neeraj
> Thanx, Paul
>
>> - Neeraj
>>
>>> unsigned long unlocks;
>>>
>>> unlocks = srcu_readers_unlock_idx(ssp, idx);
>>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
>>> * unlock is counted. Needs to be a smp_mb() as the read side may
>>> * contain a read from a variable that is written to before the
>>> * synchronize_srcu() in the write side. In this case smp_mb()s
>>> - * A and B act like the store buffering pattern.
>>> + * A and B (or X and Y) act like the store buffering pattern.
>>> *
>>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses
>>> - * after the synchronize_srcu() from being executed before the
>>> - * grace period ends.
>>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
>>> + * Z) to prevent accesses after the synchronize_srcu() from being
>>> + * executed before the grace period ends.
>>> */
>>> - smp_mb(); /* A */
>>> + if (!did_gp)
>>> + smp_mb(); /* A */
>>> + else
>>> + synchronize_rcu(); /* X */
>>>
>>> /*
>>> * If the locks are the same as the unlocks, then there must have
>>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
>>> * which are unlikely to be configured with an address space fully
>>> * populated with memory, at least not anytime soon.
>>> */
>>> - return srcu_readers_lock_idx(ssp, idx) == unlocks;
>>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
>>> }
>>>
>>
On Mon, Nov 11, 2024 at 10:21:58PM +0530, Neeraj Upadhyay wrote:
> On 11/11/2024 8:56 PM, Paul E. McKenney wrote:
> > On Mon, Nov 11, 2024 at 06:24:58PM +0530, Neeraj Upadhyay wrote:
> >>> /*
> >>> - * Returns approximate total of the readers' ->srcu_lock_count[] values
> >>> - * for the rank of per-CPU counters specified by idx.
> >>> + * Computes approximate total of the readers' ->srcu_lock_count[] values
> >>> + * for the rank of per-CPU counters specified by idx, and returns true if
> >>> + * the caller did the proper barrier (gp), and if the count of the locks
> >>> + * matches that of the unlocks passed in.
> >>> */
> >>> -static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
> >>> +static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
> >>> {
> >>> int cpu;
> >>> + unsigned long mask = 0;
> >>> unsigned long sum = 0;
> >>>
> >>> for_each_possible_cpu(cpu) {
> >>> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> >>>
> >>> sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
> >>> + if (IS_ENABLED(CONFIG_PROVE_RCU))
> >>> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> >>> }
> >>> - return sum;
> >>> + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
> >>> + "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
> >>
> >> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any
> >> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the
> >> updates?
> >
> > If a SRCU reader fail to observe the index flip, then isn't it the case
> > that the synchronize_rcu() invoked from srcu_readers_active_idx_check()
> > must wait on it?
>
> Below is the sequence of operations I was thinking of, where at step 4 CPU2
> reads old pointer
>
> ptr = old
>
>
> CPU1 CPU2
>
> 1. Update ptr = new
>
> 2. synchronize_srcu()
>
> <Does not use synchronize_rcu()
> as SRCU_READ_FLAVOR_LITE is not
> set for any sdp as srcu_read_lock_lite()
> hasn't been called by any CPU>
>
> 3. srcu_read_lock_lite()
> <No smp_mb() ordering>
>
> 4. Can read ptr == old ?
As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix
to the wrong-CPU issue you quite rightly point out below, no it cannot.
The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update
->srcu_reader_flavor, which will place full ordering between that update
and the later read from "ptr".
So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE
bit, then the reader must see the new value of "ptr". Similarly,
if the reader can see the old value of "ptr", then synchronize_srcu()
must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit.
But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed
for this to work. Please see the upcoming patches to be posted as a
reply to this email.
> >>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp)
> >>> + return false;
> >>
> >> So, srcu_readers_active_idx_check() can potentially return false for very long
> >> time, until the CPU executing srcu_readers_active_idx_check() does
> >> at least one read lock/unlock lite call?
> >
> > That is correct. The theory is that until after an srcu_read_lock_lite()
> > has executed, there is no need to wait on it. Does the practice match the
> > theory in this case, or is there some sequence of events that I missed?
>
> Below sequence
>
> CPU1 CPU2
> 1. srcu_read_lock_lite()
>
>
> 2. srcu_read_unlock_lite()
>
> 3. synchronize_srcu()
>
> 3.1 srcu_readers_lock_idx() is
> called with gp = false as
> srcu_read_lock_lite() was never
> called on this CPU for this
> srcu_struct. So
> ssp->sda->srcu_reader_flavor is not
> set for CPU1's sda.
Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters
must also OR together the ->srcu_reader_flavor fields.
> 3.2 Inside srcu_readers_lock_idx()
> "mask" contains SRCU_READ_FLAVOR_LITE
> as CPU2's sdp->srcu_reader_flavor has it.
>
> 3.3 CPU1 keeps returning false from
> below check until CPU1 does at least
> one srcu_read_lock_lite() call or
> the thread migrates.
>
> if (mask & SRCU_READ_FLAVOR_LITE && !gp)
> return false;
This is also fixed by the OR of the ->srcu_reader_flavor fields, correct?
I guess I could claim that this bug prevents the wrong-CPU bug above
from resulting in a too-short SRCU grace period, but it is of course
better to just fix the bugs. ;-)
> >>> + return sum == unlocks;
> >>> }
> >>>
> >>> /*
> >>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> >>> */
> >>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> >>> {
> >>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
> >>
> >> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we
> >> need the reader flavor information for srcu lite variant to work. So, lite
> >> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something
> >> obvious here?
> >
> > At first glance, it appears that I am the one who missed something obvious.
> > Including in testing, which failed to uncover this issue.
> >
> > Thank you for the careful reviews!
>
> Sure thing, no problem!
And again, thank you!
Thanx, Paul
> >>> unsigned long unlocks;
> >>>
> >>> unlocks = srcu_readers_unlock_idx(ssp, idx);
> >>> @@ -482,13 +492,16 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> >>> * unlock is counted. Needs to be a smp_mb() as the read side may
> >>> * contain a read from a variable that is written to before the
> >>> * synchronize_srcu() in the write side. In this case smp_mb()s
> >>> - * A and B act like the store buffering pattern.
> >>> + * A and B (or X and Y) act like the store buffering pattern.
> >>> *
> >>> - * This smp_mb() also pairs with smp_mb() C to prevent accesses
> >>> - * after the synchronize_srcu() from being executed before the
> >>> - * grace period ends.
> >>> + * This smp_mb() also pairs with smp_mb() C (or, in the case of X,
> >>> + * Z) to prevent accesses after the synchronize_srcu() from being
> >>> + * executed before the grace period ends.
> >>> */
> >>> - smp_mb(); /* A */
> >>> + if (!did_gp)
> >>> + smp_mb(); /* A */
> >>> + else
> >>> + synchronize_rcu(); /* X */
> >>>
> >>> /*
> >>> * If the locks are the same as the unlocks, then there must have
> >>> @@ -546,7 +559,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> >>> * which are unlikely to be configured with an address space fully
> >>> * populated with memory, at least not anytime soon.
> >>> */
> >>> - return srcu_readers_lock_idx(ssp, idx) == unlocks;
> >>> + return srcu_readers_lock_idx(ssp, idx, did_gp, unlocks);
> >>> }
> >>>
> >>
>>>> I am trying to understand the (unlikely) case where synchronize_srcu() is done before any
>>>> srcu reader lock/unlock lite call is done. Can new SRCU readers fail to observe the
>>>> updates?
>>>
>>> If a SRCU reader fail to observe the index flip, then isn't it the case
>>> that the synchronize_rcu() invoked from srcu_readers_active_idx_check()
>>> must wait on it?
>>
>> Below is the sequence of operations I was thinking of, where at step 4 CPU2
>> reads old pointer
>>
>> ptr = old
>>
>>
>> CPU1 CPU2
>>
>> 1. Update ptr = new
>>
>> 2. synchronize_srcu()
>>
>> <Does not use synchronize_rcu()
>> as SRCU_READ_FLAVOR_LITE is not
>> set for any sdp as srcu_read_lock_lite()
>> hasn't been called by any CPU>
>>
>> 3. srcu_read_lock_lite()
>> <No smp_mb() ordering>
>>
>> 4. Can read ptr == old ?
>
> As long as the kernel was built with CONFIG_PROVE_RCU=y and given a fix
> to the wrong-CPU issue you quite rightly point out below, no it cannot.
> The CPU's first call to srcu_read_lock_lite() will use cmpxchg() to update
> ->srcu_reader_flavor, which will place full ordering between that update
> and the later read from "ptr".
>
> So if the synchronize_srcu() is too early to see the SRCU_READ_FLAVOR_LITE
> bit, then the reader must see the new value of "ptr". Similarly,
> if the reader can see the old value of "ptr", then synchronize_srcu()
> must see the reader's setting of the SRCU_READ_FLAVOR_LITE bit.
>
> But both the CONFIG_PROVE_RCU=n and the wrong-CPU issue must be fixed
> for this to work. Please see the upcoming patches to be posted as a
> reply to this email.
>
Got it. It will be good to document how full ordering provided by cmpxchg()
helps here.
>>>>> + if (mask & SRCU_READ_FLAVOR_LITE && !gp)
>>>>> + return false;
>>>>
>>>> So, srcu_readers_active_idx_check() can potentially return false for very long
>>>> time, until the CPU executing srcu_readers_active_idx_check() does
>>>> at least one read lock/unlock lite call?
>>>
>>> That is correct. The theory is that until after an srcu_read_lock_lite()
>>> has executed, there is no need to wait on it. Does the practice match the
>>> theory in this case, or is there some sequence of events that I missed?
>>
>> Below sequence
>>
>> CPU1 CPU2
>> 1. srcu_read_lock_lite()
>>
>>
>> 2. srcu_read_unlock_lite()
>>
>> 3. synchronize_srcu()
>>
>> 3.1 srcu_readers_lock_idx() is
>> called with gp = false as
>> srcu_read_lock_lite() was never
>> called on this CPU for this
>> srcu_struct. So
>> ssp->sda->srcu_reader_flavor is not
>> set for CPU1's sda.
>
> Good eyes! Yes, the scan that sums the ->srcu_unlock_count[] counters
> must also OR together the ->srcu_reader_flavor fields.
>
Agree
>> 3.2 Inside srcu_readers_lock_idx()
>> "mask" contains SRCU_READ_FLAVOR_LITE
>> as CPU2's sdp->srcu_reader_flavor has it.
>>
>> 3.3 CPU1 keeps returning false from
>> below check until CPU1 does at least
>> one srcu_read_lock_lite() call or
>> the thread migrates.
>>
>> if (mask & SRCU_READ_FLAVOR_LITE && !gp)
>> return false;
>
> This is also fixed by the OR of the ->srcu_reader_flavor fields, correct?
>
Yes, agree.
> I guess I could claim that this bug prevents the wrong-CPU bug above
> from resulting in a too-short SRCU grace period, but it is of course
> better to just fix the bugs. ;-)
>
>>>>> + return sum == unlocks;
>>>>> }
>>>>>
>>>>> /*
>>>>> @@ -473,6 +482,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
>>>>> */
>>>>> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
>>>>> {
>>>>> + bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
>>>>
>>>> sda->srcu_reader_flavor is only set when CONFIG_PROVE_RCU is enabled. But we
>>>> need the reader flavor information for srcu lite variant to work. So, lite
>>>> variant does not work when CONFIG_PROVE_RCU is disabled. Am I missing something
>>>> obvious here?
>>>
>>> At first glance, it appears that I am the one who missed something obvious.
>>> Including in testing, which failed to uncover this issue.
>>>
>>> Thank you for the careful reviews!
>>
>> Sure thing, no problem!
>
> And again, thank you!
>
Again, no problem!
- Neeraj
The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead
of __srcu_read_unlock_lite(), which means that it is doing an unnecessary
smp_mb(). This is harmless other than the performance degradation.
This commit therefore switches to __srcu_read_unlock_lite().
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
include/linux/srcu.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 512a8c54ba5ba..fff6265818d4f 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -373,7 +373,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
WARN_ON_ONCE(idx & ~0x1);
srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
srcu_lock_release(&ssp->dep_map);
- __srcu_read_unlock(ssp, idx);
+ __srcu_read_unlock_lite(ssp, idx);
}
/**
--
2.40.1
On 11/12/2024 7:01 AM, Paul E. McKenney wrote:
> The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead
> of __srcu_read_unlock_lite(), which means that it is doing an unnecessary
> smp_mb(). This is harmless other than the performance degradation.
>
> This commit therefore switches to __srcu_read_unlock_lite().
>
> Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
> Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
> Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> ---
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
- Neeraj
If srcu_read_lock_lite() is used on a given srcu_struct structure, then
the grace-period processing must to synchronize_rcu() instead of smp_mb()
between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[]
counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE
bit of the ->srcu_reader_flavor mask, which works well. But only if
the CPU running that srcu_struct structure's grace period has previously
executed srcu_read_lock_lite(), which might not be the case, especially
just after that srcu_struct structure has been created and initialized.
This commit therefore updates the srcu_readers_unlock_idx() function
to OR together the ->srcu_reader_flavor masks from all CPUs, and
then make the srcu_readers_active_idx_check() function that test the
SRCU_READ_FLAVOR_LITE bit in the resulting mask.
Note that the srcu_readers_unlock_idx() function is already scanning all
the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on
the grace-period slow path, hence no concerns about the small amount of
extra work.
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
kernel/rcu/srcutree.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 70979f294768c..5991381b44383 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
* Returns approximate total of the readers' ->srcu_unlock_count[] values
* for the rank of per-CPU counters specified by idx.
*/
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
{
int cpu;
unsigned long mask = 0;
@@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
- if (IS_ENABLED(CONFIG_PROVE_RCU))
- mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
+ mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
+ *rdm = mask;
return sum;
}
@@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
{
- bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
+ unsigned long rdm;
unsigned long unlocks;
- unlocks = srcu_readers_unlock_idx(ssp, idx);
+ unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
+ bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
/*
* Make sure that a lock is always counted if the corresponding
--
2.40.1
On 11/12/2024 7:01 AM, Paul E. McKenney wrote:
> If srcu_read_lock_lite() is used on a given srcu_struct structure, then
> the grace-period processing must to synchronize_rcu() instead of smp_mb()
s/to/do/
> between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[]
> counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE
> bit of the ->srcu_reader_flavor mask, which works well. But only if
> the CPU running that srcu_struct structure's grace period has previously
> executed srcu_read_lock_lite(), which might not be the case, especially
> just after that srcu_struct structure has been created and initialized.
>
> This commit therefore updates the srcu_readers_unlock_idx() function
> to OR together the ->srcu_reader_flavor masks from all CPUs, and
> then make the srcu_readers_active_idx_check() function that test the
> SRCU_READ_FLAVOR_LITE bit in the resulting mask.
>
> Note that the srcu_readers_unlock_idx() function is already scanning all
> the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on
> the grace-period slow path, hence no concerns about the small amount of
> extra work.
>
> Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
> Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
> Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> ---
> kernel/rcu/srcutree.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 70979f294768c..5991381b44383 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
> * Returns approximate total of the readers' ->srcu_unlock_count[] values
> * for the rank of per-CPU counters specified by idx.
> */
> -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
> {
> int cpu;
> unsigned long mask = 0;
> @@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
>
> sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
> - if (IS_ENABLED(CONFIG_PROVE_RCU))
> - mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> }
> WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
> "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
> + *rdm = mask;
> return sum;
> }
>
> @@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> */
> static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> {
> - bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
> + unsigned long rdm;
> unsigned long unlocks;
>
> - unlocks = srcu_readers_unlock_idx(ssp, idx);
> + unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
> + bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
Move "did_gp" declaration up?
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
-Neeraj
On Tue, Nov 12, 2024 at 08:58:08AM +0530, Neeraj Upadhyay wrote:
> On 11/12/2024 7:01 AM, Paul E. McKenney wrote:
> > If srcu_read_lock_lite() is used on a given srcu_struct structure, then
> > the grace-period processing must to synchronize_rcu() instead of smp_mb()
>
> s/to/do/
Good eyes, fixed!
> > between the scans of the ->srcu_unlock_count[] and ->srcu_lock_count[]
> > counters. Currently, it does that by testing the SRCU_READ_FLAVOR_LITE
> > bit of the ->srcu_reader_flavor mask, which works well. But only if
> > the CPU running that srcu_struct structure's grace period has previously
> > executed srcu_read_lock_lite(), which might not be the case, especially
> > just after that srcu_struct structure has been created and initialized.
> >
> > This commit therefore updates the srcu_readers_unlock_idx() function
> > to OR together the ->srcu_reader_flavor masks from all CPUs, and
> > then make the srcu_readers_active_idx_check() function that test the
> > SRCU_READ_FLAVOR_LITE bit in the resulting mask.
> >
> > Note that the srcu_readers_unlock_idx() function is already scanning all
> > the CPUs to sum up the ->srcu_unlock_count[] fields and that this is on
> > the grace-period slow path, hence no concerns about the small amount of
> > extra work.
> >
> > Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
> > Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
> > Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Cc: Frederic Weisbecker <frederic@kernel.org>
> > ---
> > kernel/rcu/srcutree.c | 11 ++++++-----
> > 1 file changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 70979f294768c..5991381b44383 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -458,7 +458,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
> > * Returns approximate total of the readers' ->srcu_unlock_count[] values
> > * for the rank of per-CPU counters specified by idx.
> > */
> > -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> > +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
> > {
> > int cpu;
> > unsigned long mask = 0;
> > @@ -468,11 +468,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> > struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> >
> > sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
> > - if (IS_ENABLED(CONFIG_PROVE_RCU))
> > - mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> > + mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
> > }
> > WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
> > "Mixed reader flavors for srcu_struct at %ps.\n", ssp);
> > + *rdm = mask;
> > return sum;
> > }
> >
> > @@ -482,10 +482,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
> > */
> > static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
> > {
> > - bool did_gp = !!(raw_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE);
> > + unsigned long rdm;
> > unsigned long unlocks;
> >
> > - unlocks = srcu_readers_unlock_idx(ssp, idx);
> > + unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
> > + bool did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
>
> Move "did_gp" declaration up?
C now allows this? ;-)
Fixed!
> Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
And applied all three, again, thank you!
Thanx, Paul
Currently, srcu_read_lock_lite() uses the SRCU_READ_FLAVOR_LITE bit in
->srcu_reader_flavor to communicate to the grace-period processing in
srcu_readers_active_idx_check() that the smp_mb() must be replaced by a
synchronize_rcu(). Unfortunately, ->srcu_reader_flavor is not updated
unless the kernel is built with CONFIG_PROVE_RCU=y. Therefore in all
kernels built with CONFIG_PROVE_RCU=n, srcu_readers_active_idx_check()
incorrectly uses smp_mb() instead of synchronize_rcu() for srcu_struct
structures whose readers use srcu_read_lock_lite().
This commit therefore causes Tree SRCU srcu_read_lock_lite()
to unconditionally update ->srcu_reader_flavor so that
srcu_readers_active_idx_check() can make the correct choice.
Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()"
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
include/linux/srcu.h | 8 +-------
include/linux/srcutiny.h | 3 +++
include/linux/srcutree.h | 21 +++++++++++++++++++++
kernel/rcu/srcutree.c | 6 ++----
4 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index fff6265818d4f..abaddd7e6ddf7 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -183,12 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
-void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-#else
-#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
-#endif
-
/**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
@@ -278,7 +272,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ srcu_check_read_flavor_lite(ssp);
retval = __srcu_read_lock_lite(ssp);
rcu_try_lock_acquire(&ssp->dep_map);
return retval;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4d96bbdb45f08..1321da803274d 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
synchronize_srcu(ssp);
}
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
+#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+
/* Defined here to avoid size increase for non-torture kernels. */
static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
char *tt, char *tf)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 778eb61542e18..490aeecc6bb47 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -248,4 +248,25 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
}
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
+
+// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
+static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+{
+ struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+ if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+ return;
+
+ // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
+ __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+}
+
+// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
+static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+{
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ __srcu_check_read_flavor(ssp, read_flavor);
+}
+
#endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 5991381b44383..ee3cbb8308664 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -711,11 +711,10 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-#ifdef CONFIG_PROVE_RCU
/*
* Check for consistent reader flavor.
*/
-void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
{
int old_read_flavor;
struct srcu_data *sdp;
@@ -733,8 +732,7 @@ void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
}
WARN_ONCE(old_read_flavor != read_flavor, "CPU %d old state %d new state %d\n", sdp->cpu, old_read_flavor, read_flavor);
}
-EXPORT_SYMBOL_GPL(srcu_check_read_flavor);
-#endif /* CONFIG_PROVE_RCU */
+EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
/*
* Counts the new reader in the appropriate per-CPU element of the
--
2.40.1
...
> +void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
> +
> +// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
> +static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
> +{
> + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> +
> + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
> + return;
> +
> + // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
Nit: s/srcu_check_read_flavor/__srcu_check_read_flavor/
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
- Neeraj
> + __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
> +}
> +
© 2016 - 2026 Red Hat, Inc.