b/include/linux/srcu.h | 3 b/include/linux/srcutiny.h | 2 b/include/linux/srcutree.h | 13 b/kernel/rcu/rcu.h | 9 b/kernel/rcu/rcutorture.c | 2 b/kernel/rcu/refscale.c | 32 + b/kernel/rcu/srcutiny.c | 6 b/kernel/rcu/srcutree.c | 2 b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 1 include/linux/srcu.h | 66 ++- include/linux/srcutiny.h | 25 + include/linux/srcutree.h | 77 +++ kernel/rcu/rcutorture.c | 9 kernel/rcu/srcutree.c | 233 +++++------ 14 files changed, 332 insertions(+), 148 deletions(-)
Hello!
This series contains SRCU updates, perhaps most notably the addition of
srcu_read_lock_fast() and srcu_read_unlock_fast(), which on my laptop are
about 20% faster than rcu_read_lock_trace() and rcu_read_unlock_trace().
There are of course drawbacks:
o Lack of CPU stall warnings.
o SRCU-fast readers permitted only where rcu_is_watching().
o A pointer-sized return value from srcu_read_lock_fast() must
be passed to the corresponding srcu_read_unlock_fast().
o In the absence of readers, a synchronize_srcu() having _fast()
readers will incur the latency of at least two normal RCU grace
periods.
o RCU Tasks Trace priority boosting could be easily added.
Boosting SRCU readers is more difficult.
Whether or not this can replace RCU Tasks Trace, it should replace the
SRCU-lite API, where the only drawback is a pointer-sized return value
from srcu_read_lock_fast() compared to the int-sized return value from
srcu_read_lock_lite(). And yes, I should have thought harder before
doing that SRCU-lite...
The series is as follows:
1. Make Tiny SRCU able to operate in preemptible kernels.
2. Define SRCU_READ_FLAVOR_ALL in terms of symbols.
3. Use ->srcu_gp_seq for rcutorture reader batch.
4. Pull ->srcu_{un,}lock_count into a new srcu_ctr structure.
5. Make SRCU readers use ->srcu_ctrs for counter selection.
6. Make Tree SRCU updates independent of ->srcu_idx.
7. Force synchronization for srcu_get_delay().
8. Rename srcu_check_read_flavor_lite() to
srcu_check_read_flavor_force().
9. Add SRCU_READ_FLAVOR_SLOWGP to flag need for synchronize_rcu().
10. Pull pointer-to-integer conversion into __srcu_ptr_to_ctr().
11. Pull integer-to-pointer conversion into __srcu_ctr_to_ptr().
12. Move SRCU Tree/Tiny definitions from srcu.h.
13. Add SRCU-fast readers.
14. Add ability to test srcu_read_{,un}lock_fast().
15. Add srcu_read_lock_fast() support using "srcu-fast".
16. Make scenario SRCU-P use srcu_read_lock_fast().
17. Fix srcu_read_unlock_{lite,nmisafe}() kernel-doc.
Thanx, Paul
------------------------------------------------------------------------
b/include/linux/srcu.h | 3
b/include/linux/srcutiny.h | 2
b/include/linux/srcutree.h | 13
b/kernel/rcu/rcu.h | 9
b/kernel/rcu/rcutorture.c | 2
b/kernel/rcu/refscale.c | 32 +
b/kernel/rcu/srcutiny.c | 6
b/kernel/rcu/srcutree.c | 2
b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 1
include/linux/srcu.h | 66 ++-
include/linux/srcutiny.h | 25 +
include/linux/srcutree.h | 77 +++
kernel/rcu/rcutorture.c | 9
kernel/rcu/srcutree.c | 233 +++++------
14 files changed, 332 insertions(+), 148 deletions(-)
Hello!
This series contains SRCU updates, perhaps most notably the addition of
srcu_read_lock_fast() and srcu_read_unlock_fast(), which on my laptop are
about 20% faster than rcu_read_lock_trace() and rcu_read_unlock_trace().
There are of course drawbacks:
o Lack of CPU stall warnings.
o SRCU-fast readers permitted only where rcu_is_watching().
o A pointer-sized return value from srcu_read_lock_fast() must
be passed to the corresponding srcu_read_unlock_fast().
o In the absence of readers, a synchronize_srcu() having _fast()
readers will incur the latency of at least two normal RCU grace
periods.
o RCU Tasks Trace priority boosting could be easily added.
Boosting SRCU readers is more difficult.
Whether or not this can replace RCU Tasks Trace, it should replace the
SRCU-lite API, where the only drawback is a pointer-sized return value
from srcu_read_lock_fast() compared to the int-sized return value from
srcu_read_lock_lite(). And yes, I should have thought harder before
doing that SRCU-lite...
This series is as follows:
1. Make Tiny SRCU able to operate in preemptible kernels.
2. Define SRCU_READ_FLAVOR_ALL in terms of symbols.
3. Use ->srcu_gp_seq for rcutorture reader batch.
4. Pull ->srcu_{un,}lock_count into a new srcu_ctr structure.
5. Make SRCU readers use ->srcu_ctrs for counter selection.
6. Make Tree SRCU updates independent of ->srcu_idx.
7. Force synchronization for srcu_get_delay().
8. Rename srcu_check_read_flavor_lite() to
srcu_check_read_flavor_force().
9. Add SRCU_READ_FLAVOR_SLOWGP to flag need for synchronize_rcu().
10. Pull pointer-to-integer conversion into __srcu_ptr_to_ctr().
11. Pull integer-to-pointer conversion into __srcu_ctr_to_ptr().
12. Move SRCU Tree/Tiny definitions from srcu.h.
13. Add SRCU-fast readers.
14. Add ability to test srcu_read_{,un}lock_fast().
15. Add srcu_read_lock_fast() support using "srcu-fast".
16. Make scenario SRCU-P use srcu_read_lock_fast().
17. Fix srcu_read_unlock_{lite,nmisafe}() kernel-doc.
18. Document that srcu_{read_lock,down_read}() can share srcu_struct.
19. Add srcu_down_read_fast() and srcu_up_read_fast().
20. Make SRCU-fast also be NMI-safe.
Changes since v1:
o Explicitly document that srcu_{read_lock,down_read}() can share
an srcu_struct.
o Add srcu_down_read_fast() and srcu_up_read_fast() based on
feedback from Andrii Nakryiko.
o Make SRCU-fast readers NMI-safe based on feedback from Alexei
Starovoitov.
Thanx, Paul
------------------------------------------------------------------------
b/include/linux/srcu.h | 3
b/include/linux/srcutiny.h | 2
b/include/linux/srcutree.h | 13
b/kernel/rcu/rcu.h | 9
b/kernel/rcu/rcutorture.c | 2
b/kernel/rcu/refscale.c | 32 +
b/kernel/rcu/srcutiny.c | 6
b/kernel/rcu/srcutree.c | 2
b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 1
include/linux/srcu.h | 103 ++++
include/linux/srcutiny.h | 25 +
include/linux/srcutree.h | 111 ++++-
kernel/rcu/rcutorture.c | 9
kernel/rcu/srcutree.c | 233 +++++------
14 files changed, 392 insertions(+), 159 deletions(-)
Currently, srcu_get_delay() can be called concurrently, for example,
by a CPU that is the first to request a new grace period and the CPU
processing the current grace period. Although concurrent access is
harmless, it unnecessarily expands the state space. Additionally,
all calls to srcu_get_delay() are from slow paths.
This commit therefore protects all calls to srcu_get_delay() with
ssp->srcu_sup->lock, which is already held on the invocation from the
srcu_funnel_gp_start() function. While in the area, this commit also
adds a lockdep_assert_held() to srcu_get_delay() itself.
Reported-by: syzbot+16a19b06125a2963eaee@syzkaller.appspotmail.com
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
kernel/rcu/srcutree.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index dfc98e69acca..46e4cdaa1786 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -648,6 +648,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
unsigned long jbase = SRCU_INTERVAL;
struct srcu_usage *sup = ssp->srcu_sup;
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
if (srcu_gp_is_expedited(ssp))
jbase = 0;
if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
@@ -675,9 +676,13 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
int cpu;
+ unsigned long delay;
struct srcu_usage *sup = ssp->srcu_sup;
- if (WARN_ON(!srcu_get_delay(ssp)))
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
+ delay = srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ if (WARN_ON(!delay))
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
@@ -1101,7 +1106,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
unsigned long curdelay;
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = !srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
@@ -1854,7 +1861,9 @@ static void process_srcu(struct work_struct *work)
ssp = sup->srcu_ssp;
srcu_advance_state(ssp);
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (curdelay) {
WRITE_ONCE(sup->reschedule_count, 0);
} else {
--
2.40.1
This commit prepares for array-index-free srcu_read_lock*() by moving the
->srcu_{un,}lock_count fields into a new srcu_ctr structure. This will
permit ->srcu_index to be replaced by a per-CPU pointer to this structure.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcutree.h | 13 +++--
kernel/rcu/srcutree.c | 115 +++++++++++++++++++--------------------
2 files changed, 66 insertions(+), 62 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index b17814c9d1c7..c794d599db5c 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -17,14 +17,19 @@
struct srcu_node;
struct srcu_struct;
+/* One element of the srcu_data srcu_ctrs array. */
+struct srcu_ctr {
+ atomic_long_t srcu_locks; /* Locks per CPU. */
+ atomic_long_t srcu_unlocks; /* Unlocks per CPU. */
+};
+
/*
* Per-CPU structure feeding into leaf srcu_node, similar in function
* to rcu_node.
*/
struct srcu_data {
/* Read-side state. */
- atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
- atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
+ struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */
int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */
/* Values: SRCU_READ_FLAVOR_.* */
@@ -221,7 +226,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+ this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
barrier(); /* Avoid leaking the critical section. */
return idx;
}
@@ -240,7 +245,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
{
barrier(); /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */
+ this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); /* Z */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index b5bb73c877de..d4e9cd917a69 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -116,8 +116,9 @@ do { \
/*
* Initialize SRCU per-CPU data. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
- * srcu_read_unlock() running against them. So if the is_static parameter
- * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
+ * srcu_read_unlock() running against them. So if the is_static
+ * parameter is set, don't initialize ->srcu_ctrs[].srcu_locks and
+ * ->srcu_ctrs[].srcu_unlocks.
*/
static void init_srcu_struct_data(struct srcu_struct *ssp)
{
@@ -128,8 +129,6 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
* Initialize the per-CPU srcu_data array, which feeds into the
* leaves of the srcu_node tree.
*/
- BUILD_BUG_ON(ARRAY_SIZE(sdp->srcu_lock_count) !=
- ARRAY_SIZE(sdp->srcu_unlock_count));
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
@@ -429,10 +428,10 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp)
}
/*
- * Computes approximate total of the readers' ->srcu_lock_count[] values
- * for the rank of per-CPU counters specified by idx, and returns true if
- * the caller did the proper barrier (gp), and if the count of the locks
- * matches that of the unlocks passed in.
+ * Computes approximate total of the readers' ->srcu_ctrs[].srcu_locks
+ * values for the rank of per-CPU counters specified by idx, and returns
+ * true if the caller did the proper barrier (gp), and if the count of
+ * the locks matches that of the unlocks passed in.
*/
static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
{
@@ -443,7 +442,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks);
if (IS_ENABLED(CONFIG_PROVE_RCU))
mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
@@ -455,8 +454,8 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
}
/*
- * Returns approximate total of the readers' ->srcu_unlock_count[] values
- * for the rank of per-CPU counters specified by idx.
+ * Returns approximate total of the readers' ->srcu_ctrs[].srcu_unlocks
+ * values for the rank of per-CPU counters specified by idx.
*/
static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
{
@@ -467,7 +466,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, u
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks);
mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
@@ -510,9 +509,9 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* been no readers on this index at some point in this function.
* But there might be more readers, as a task might have read
* the current ->srcu_idx but not yet have incremented its CPU's
- * ->srcu_lock_count[idx] counter. In fact, it is possible
+ * ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible
* that most of the tasks have been preempted between fetching
- * ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
+ * ->srcu_idx and incrementing ->srcu_ctrs[idx].srcu_locks. And there
* could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
* in a system whose address space was fully populated with memory.
* Call this quantity Nt.
@@ -521,36 +520,36 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* code for a long time. That now-preempted updater has already
* flipped ->srcu_idx (possibly during the preceding grace period),
* done an smp_mb() (again, possibly during the preceding grace
- * period), and summed up the ->srcu_unlock_count[idx] counters.
+ * period), and summed up the ->srcu_ctrs[idx].srcu_unlocks counters.
* How many times can a given one of the aforementioned Nt tasks
- * increment the old ->srcu_idx value's ->srcu_lock_count[idx]
+ * increment the old ->srcu_idx value's ->srcu_ctrs[idx].srcu_locks
* counter, in the absence of nesting?
*
* It can clearly do so once, given that it has already fetched
- * the old value of ->srcu_idx and is just about to use that value
- * to index its increment of ->srcu_lock_count[idx]. But as soon as
- * it leaves that SRCU read-side critical section, it will increment
- * ->srcu_unlock_count[idx], which must follow the updater's above
- * read from that same value. Thus, as soon the reading task does
- * an smp_mb() and a later fetch from ->srcu_idx, that task will be
- * guaranteed to get the new index. Except that the increment of
- * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
- * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
- * is before the smp_mb(). Thus, that task might not see the new
- * value of ->srcu_idx until the -second- __srcu_read_lock(),
- * which in turn means that this task might well increment
- * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
- * not just once.
+ * the old value of ->srcu_idx and is just about to use that
+ * value to index its increment of ->srcu_ctrs[idx].srcu_locks.
+ * But as soon as it leaves that SRCU read-side critical section,
+ * it will increment ->srcu_ctrs[idx].srcu_unlocks, which must
+ * follow the updater's above read from that same value. Thus,
+ * as soon the reading task does an smp_mb() and a later fetch from
+ * ->srcu_idx, that task will be guaranteed to get the new index.
+ * Except that the increment of ->srcu_ctrs[idx].srcu_unlocks
+ * in __srcu_read_unlock() is after the smp_mb(), and the fetch
+ * from ->srcu_idx in __srcu_read_lock() is before the smp_mb().
+ * Thus, that task might not see the new value of ->srcu_idx until
+ * the -second- __srcu_read_lock(), which in turn means that this
+ * task might well increment ->srcu_ctrs[idx].srcu_locks for the
+ * old value of ->srcu_idx twice, not just once.
*
* However, it is important to note that a given smp_mb() takes
* effect not just for the task executing it, but also for any
* later task running on that same CPU.
*
- * That is, there can be almost Nt + Nc further increments of
- * ->srcu_lock_count[idx] for the old index, where Nc is the number
- * of CPUs. But this is OK because the size of the task_struct
- * structure limits the value of Nt and current systems limit Nc
- * to a few thousand.
+ * That is, there can be almost Nt + Nc further increments
+ * of ->srcu_ctrs[idx].srcu_locks for the old index, where Nc
+ * is the number of CPUs. But this is OK because the size of
+ * the task_struct structure limits the value of Nt and current
+ * systems limit Nc to a few thousand.
*
* OK, but what about nesting? This does impose a limit on
* nesting of half of the size of the task_struct structure
@@ -581,10 +580,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[0]);
- sum += atomic_long_read(&sdp->srcu_lock_count[1]);
- sum -= atomic_long_read(&sdp->srcu_unlock_count[0]);
- sum -= atomic_long_read(&sdp->srcu_unlock_count[1]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[0].srcu_locks);
+ sum += atomic_long_read(&sdp->srcu_ctrs[1].srcu_locks);
+ sum -= atomic_long_read(&sdp->srcu_ctrs[0].srcu_unlocks);
+ sum -= atomic_long_read(&sdp->srcu_ctrs[1].srcu_unlocks);
}
return sum;
}
@@ -746,7 +745,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
int idx;
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
+ this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
@@ -760,7 +759,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
smp_mb(); /* C */ /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
+ this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -777,7 +776,7 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- atomic_long_inc(&sdp->srcu_lock_count[idx]);
+ atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
@@ -793,7 +792,7 @@ void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
- atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+ atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_unlocks);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
@@ -1123,17 +1122,17 @@ static void srcu_flip(struct srcu_struct *ssp)
/*
* Because the flip of ->srcu_idx is executed only if the
* preceding call to srcu_readers_active_idx_check() found that
- * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched
- * and because that summing uses atomic_long_read(), there is
- * ordering due to a control dependency between that summing and
- * the WRITE_ONCE() in this call to srcu_flip(). This ordering
- * ensures that if this updater saw a given reader's increment from
- * __srcu_read_lock(), that reader was using a value of ->srcu_idx
- * from before the previous call to srcu_flip(), which should be
- * quite rare. This ordering thus helps forward progress because
- * the grace period could otherwise be delayed by additional
- * calls to __srcu_read_lock() using that old (soon to be new)
- * value of ->srcu_idx.
+ * the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums
+ * matched and because that summing uses atomic_long_read(),
+ * there is ordering due to a control dependency between that
+ * summing and the WRITE_ONCE() in this call to srcu_flip().
+ * This ordering ensures that if this updater saw a given reader's
+ * increment from __srcu_read_lock(), that reader was using a value
+ * of ->srcu_idx from before the previous call to srcu_flip(),
+ * which should be quite rare. This ordering thus helps forward
+ * progress because the grace period could otherwise be delayed
+ * by additional calls to __srcu_read_lock() using that old (soon
+ * to be new) value of ->srcu_idx.
*
* This sum-equality check and ordering also ensures that if
* a given call to __srcu_read_lock() uses the new value of
@@ -1918,8 +1917,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu);
- u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
- u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
+ u0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_unlocks));
+ u1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks));
/*
* Make sure that a lock is always counted if the corresponding
@@ -1927,8 +1926,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
*/
smp_rmb();
- l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
- l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
+ l0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_locks));
+ l1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks));
c0 = l0 - u0;
c1 = l1 - u1;
--
2.40.1
This commit switches from a direct test of SRCU_READ_FLAVOR_LITE to a new
SRCU_READ_FLAVOR_SLOWGP macro to check for substituting synchronize_rcu()
for smp_mb() in SRCU grace periods. Right now, SRCU_READ_FLAVOR_SLOWGP
is exactly SRCU_READ_FLAVOR_LITE, but the addition of the _fast() flavor
of SRCU will change that.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 3 +++
kernel/rcu/srcutree.c | 6 +++---
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index ca00b9af7c23..505f5bdce444 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -49,6 +49,9 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
SRCU_READ_FLAVOR_LITE) // All of the above.
+#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE
+ // Flavors requiring synchronize_rcu()
+ // instead of smp_mb().
#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 46e4cdaa1786..973e49d04f4f 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -449,7 +449,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
- if (mask & SRCU_READ_FLAVOR_LITE && !gp)
+ if (mask & SRCU_READ_FLAVOR_SLOWGP && !gp)
return false;
return sum == unlocks;
}
@@ -487,7 +487,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
unsigned long unlocks;
unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
- did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
+ did_gp = !!(rdm & SRCU_READ_FLAVOR_SLOWGP);
/*
* Make sure that a lock is always counted if the corresponding
@@ -1205,7 +1205,7 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
check_init_srcu_struct(ssp);
/* If _lite() readers, don't do unsolicited expediting. */
- if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
+ if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_SLOWGP)
return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
--
2.40.1
This commit renames the srcu_check_read_flavor_lite() function to
srcu_check_read_flavor_force() and adds a read_flavor argument in order to
support an srcu_read_lock_fast() variant that is to avoid array indexing
in both the lock and unlock primitives.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 2 +-
include/linux/srcutiny.h | 2 +-
include/linux/srcutree.h | 10 ++++++----
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f6f779b9d9ff..ca00b9af7c23 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -279,7 +279,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_read_flavor_lite(ssp);
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE);
retval = __srcu_read_lock_lite(ssp);
rcu_try_lock_acquire(&ssp->dep_map);
return retval;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 31b59b4be2a7..6b1a7276aa4c 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -82,7 +82,7 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
}
#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
-#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+#define srcu_check_read_flavor_force(ssp, read_flavor) do { } while (0)
/* Defined here to avoid size increase for non-torture kernels. */
static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 6b7eba59f384..e29cc57eac81 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -251,16 +251,18 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
-static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+// Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is
+// needed only for flavors that require grace-period smp_mb() calls to be
+// promoted to synchronize_rcu().
+static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor)
{
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
- if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+ if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor))
return;
// Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered.
- __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ __srcu_check_read_flavor(ssp, read_flavor);
}
// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
--
2.40.1
This commit abstracts the srcu_read_lock*() pointer-to-integer conversion
into a new __srcu_ptr_to_ctr(). This will be used in rcutorture for
testing an srcu_read_lock_fast() that returns a pointer rather than
an integer.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcutree.h | 9 ++++++++-
kernel/rcu/srcutree.c | 4 ++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index e29cc57eac81..f41bb3a55a04 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -211,6 +211,13 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp);
void srcu_barrier(struct srcu_struct *ssp);
void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
+// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that
+// element's index.
+static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
+{
+ return scpp - &ssp->sda->srcu_ctrs[0];
+}
+
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Returns an index that must be passed to the matching
@@ -228,7 +235,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
this_cpu_inc(scp->srcu_locks.counter); /* Y */
barrier(); /* Avoid leaking the critical section. */
- return scp - &ssp->sda->srcu_ctrs[0];
+ return __srcu_ptr_to_ctr(ssp, scp);
}
/*
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 973e49d04f4f..4643a8ed7e32 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -753,7 +753,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
this_cpu_inc(scp->srcu_locks.counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
- return scp - &ssp->sda->srcu_ctrs[0];
+ return __srcu_ptr_to_ctr(ssp, scp);
}
EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -783,7 +783,7 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
atomic_long_inc(&scp->srcu_locks);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
- return scpp - &ssp->sda->srcu_ctrs[0];
+ return __srcu_ptr_to_ctr(ssp, scpp);
}
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
--
2.40.1
This commit permits rcutorture to test srcu_read_{,un}lock_fast(), which
is specified by the rcutorture.reader_flavor=0x8 kernel boot parameter.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
kernel/rcu/rcutorture.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 1d2de50fb5d6..1bd3eaa0b8e7 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -677,6 +677,7 @@ static void srcu_get_gp_data(int *flags, unsigned long *gp_seq)
static int srcu_torture_read_lock(void)
{
int idx;
+ struct srcu_ctr __percpu *scp;
int ret = 0;
if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) {
@@ -694,6 +695,12 @@ static int srcu_torture_read_lock(void)
WARN_ON_ONCE(idx & ~0x1);
ret += idx << 2;
}
+ if (reader_flavor & SRCU_READ_FLAVOR_FAST) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ idx = __srcu_ptr_to_ctr(srcu_ctlp, scp);
+ WARN_ON_ONCE(idx & ~0x1);
+ ret += idx << 3;
+ }
return ret;
}
@@ -719,6 +726,8 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
static void srcu_torture_read_unlock(int idx)
{
WARN_ON_ONCE((reader_flavor && (idx & ~reader_flavor)) || (!reader_flavor && (idx & ~0x1)));
+ if (reader_flavor & SRCU_READ_FLAVOR_FAST)
+ srcu_read_unlock_fast(srcu_ctlp, __srcu_ctr_to_ptr(srcu_ctlp, (idx & 0x8) >> 3));
if (reader_flavor & SRCU_READ_FLAVOR_LITE)
srcu_read_unlock_lite(srcu_ctlp, (idx & 0x4) >> 2);
if (reader_flavor & SRCU_READ_FLAVOR_NMI)
--
2.40.1
There are a couple of definitions under "#ifdef CONFIG_TINY_SRCU"
in include/linux/srcu.h. There is no point in them being there,
so this commit moves them to include/linux/srcutiny.h and
include/linux/srcutree.c, thus eliminating that #ifdef.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 10 +---------
include/linux/srcutiny.h | 3 +++
include/linux/srcutree.h | 1 +
3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 505f5bdce444..2bd0e24e9b55 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -52,6 +52,7 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE
// Flavors requiring synchronize_rcu()
// instead of smp_mb().
+void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
@@ -64,15 +65,6 @@ int init_srcu_struct(struct srcu_struct *ssp);
void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *ssp);
-int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
-#ifdef CONFIG_TINY_SRCU
-#define __srcu_read_lock_lite __srcu_read_lock
-#define __srcu_read_unlock_lite __srcu_read_unlock
-#else // #ifdef CONFIG_TINY_SRCU
-int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
-#endif // #else // #ifdef CONFIG_TINY_SRCU
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 6b1a7276aa4c..07a0c4489ea2 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -71,6 +71,9 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
return idx;
}
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+
static inline void synchronize_srcu_expedited(struct srcu_struct *ssp)
{
synchronize_srcu(ssp);
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 55fa400624bb..ef3065c0cadc 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -207,6 +207,7 @@ struct srcu_struct {
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
+int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void synchronize_srcu_expedited(struct srcu_struct *ssp);
void srcu_barrier(struct srcu_struct *ssp);
void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
--
2.40.1
This commit makes Tree SRCU updates independent of ->srcu_idx, then
drop ->srcu_idx.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcutree.h | 1 -
kernel/rcu/srcutree.c | 68 ++++++++++++++++++++--------------------
2 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 1b01ced61a45..6b7eba59f384 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -100,7 +100,6 @@ struct srcu_usage {
* Per-SRCU-domain structure, similar in function to rcu_state.
*/
struct srcu_struct {
- unsigned int srcu_idx; /* Current rdr array element. */
struct srcu_ctr __percpu *srcu_ctrp;
struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
struct lockdep_map dep_map;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9af86ce2dd24..dfc98e69acca 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -246,7 +246,6 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->srcu_sup->node = NULL;
mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
- ssp->srcu_idx = 0;
ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL;
ssp->srcu_sup->srcu_barrier_seq = 0;
mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
@@ -510,38 +509,39 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* If the locks are the same as the unlocks, then there must have
* been no readers on this index at some point in this function.
* But there might be more readers, as a task might have read
- * the current ->srcu_idx but not yet have incremented its CPU's
+ * the current ->srcu_ctrp but not yet have incremented its CPU's
* ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible
* that most of the tasks have been preempted between fetching
- * ->srcu_idx and incrementing ->srcu_ctrs[idx].srcu_locks. And there
- * could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
- * in a system whose address space was fully populated with memory.
- * Call this quantity Nt.
+ * ->srcu_ctrp and incrementing ->srcu_ctrs[idx].srcu_locks. And
+ * there could be almost (ULONG_MAX / sizeof(struct task_struct))
+ * tasks in a system whose address space was fully populated
+ * with memory. Call this quantity Nt.
*
- * So suppose that the updater is preempted at this point in the
- * code for a long time. That now-preempted updater has already
- * flipped ->srcu_idx (possibly during the preceding grace period),
- * done an smp_mb() (again, possibly during the preceding grace
- * period), and summed up the ->srcu_ctrs[idx].srcu_unlocks counters.
- * How many times can a given one of the aforementioned Nt tasks
- * increment the old ->srcu_idx value's ->srcu_ctrs[idx].srcu_locks
- * counter, in the absence of nesting?
+ * So suppose that the updater is preempted at this
+ * point in the code for a long time. That now-preempted
+ * updater has already flipped ->srcu_ctrp (possibly during
+ * the preceding grace period), done an smp_mb() (again,
+ * possibly during the preceding grace period), and summed up
+ * the ->srcu_ctrs[idx].srcu_unlocks counters. How many times
+ * can a given one of the aforementioned Nt tasks increment the
+ * old ->srcu_ctrp value's ->srcu_ctrs[idx].srcu_locks counter,
+ * in the absence of nesting?
*
* It can clearly do so once, given that it has already fetched
- * the old value of ->srcu_idx and is just about to use that
+ * the old value of ->srcu_ctrp and is just about to use that
* value to index its increment of ->srcu_ctrs[idx].srcu_locks.
* But as soon as it leaves that SRCU read-side critical section,
* it will increment ->srcu_ctrs[idx].srcu_unlocks, which must
- * follow the updater's above read from that same value. Thus,
- * as soon the reading task does an smp_mb() and a later fetch from
- * ->srcu_idx, that task will be guaranteed to get the new index.
+ * follow the updater's above read from that same value. Thus,
+ as soon the reading task does an smp_mb() and a later fetch from
+ * ->srcu_ctrp, that task will be guaranteed to get the new index.
* Except that the increment of ->srcu_ctrs[idx].srcu_unlocks
* in __srcu_read_unlock() is after the smp_mb(), and the fetch
- * from ->srcu_idx in __srcu_read_lock() is before the smp_mb().
- * Thus, that task might not see the new value of ->srcu_idx until
+ * from ->srcu_ctrp in __srcu_read_lock() is before the smp_mb().
+ * Thus, that task might not see the new value of ->srcu_ctrp until
* the -second- __srcu_read_lock(), which in turn means that this
* task might well increment ->srcu_ctrs[idx].srcu_locks for the
- * old value of ->srcu_idx twice, not just once.
+ * old value of ->srcu_ctrp twice, not just once.
*
* However, it is important to note that a given smp_mb() takes
* effect not just for the task executing it, but also for any
@@ -1095,7 +1095,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
/*
* Wait until all readers counted by array index idx complete, but
* loop an additional time if there is an expedited grace period pending.
- * The caller must ensure that ->srcu_idx is not changed while checking.
+ * The caller must ensure that ->srcu_ctrp is not changed while checking.
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
@@ -1113,14 +1113,14 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
}
/*
- * Increment the ->srcu_idx counter so that future SRCU readers will
+ * Increment the ->srcu_ctrp counter so that future SRCU readers will
* use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
* us to wait for pre-existing readers in a starvation-free manner.
*/
static void srcu_flip(struct srcu_struct *ssp)
{
/*
- * Because the flip of ->srcu_idx is executed only if the
+ * Because the flip of ->srcu_ctrp is executed only if the
* preceding call to srcu_readers_active_idx_check() found that
* the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums
* matched and because that summing uses atomic_long_read(),
@@ -1128,15 +1128,15 @@ static void srcu_flip(struct srcu_struct *ssp)
* summing and the WRITE_ONCE() in this call to srcu_flip().
* This ordering ensures that if this updater saw a given reader's
* increment from __srcu_read_lock(), that reader was using a value
- * of ->srcu_idx from before the previous call to srcu_flip(),
+ * of ->srcu_ctrp from before the previous call to srcu_flip(),
* which should be quite rare. This ordering thus helps forward
* progress because the grace period could otherwise be delayed
* by additional calls to __srcu_read_lock() using that old (soon
- * to be new) value of ->srcu_idx.
+ * to be new) value of ->srcu_ctrp.
*
* This sum-equality check and ordering also ensures that if
* a given call to __srcu_read_lock() uses the new value of
- * ->srcu_idx, this updater's earlier scans cannot have seen
+ * ->srcu_ctrp, this updater's earlier scans cannot have seen
* that reader's increments, which is all to the good, because
* this grace period need not wait on that reader. After all,
* if those earlier scans had seen that reader, there would have
@@ -1151,7 +1151,6 @@ static void srcu_flip(struct srcu_struct *ssp)
*/
smp_mb(); /* E */ /* Pairs with B and C. */
- WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
WRITE_ONCE(ssp->srcu_ctrp,
&ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
@@ -1470,8 +1469,9 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
*
* Wait for the count to drain to zero of both indexes. To avoid the
* possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
- * and then flip the srcu_idx and wait for the count of the other index.
+ * the index=!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]) to drain to zero
+ * at first, and then flip the ->srcu_ctrp and wait for the count of the
+ * other index.
*
* Can block; must be called from process context.
*
@@ -1697,7 +1697,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
/*
* Because readers might be delayed for an extended period after
- * fetching ->srcu_idx for their index, at any point in time there
+ * fetching ->srcu_ctrp for their index, at any point in time there
* might well be readers using both idx=0 and idx=1. We therefore
* need to wait for readers to clear from both index values before
* invoking a callback.
@@ -1725,7 +1725,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
}
if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
- idx = 1 ^ (ssp->srcu_idx & 1);
+ idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]);
if (!try_check_zero(ssp, idx, 1)) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
@@ -1743,7 +1743,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
* SRCU read-side critical sections are normally short,
* so check at least twice in quick succession after a flip.
*/
- idx = 1 ^ (ssp->srcu_idx & 1);
+ idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]);
if (!try_check_zero(ssp, idx, 2)) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
@@ -1901,7 +1901,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
int ss_state_idx = ss_state;
- idx = ssp->srcu_idx & 0x1;
+ idx = ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0];
if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
--
2.40.1
This commit adds srcu_read_{,un}lock_fast(), which is similar
to srcu_read_{,un}lock_lite(), but avoids the array-indexing and
pointer-following overhead. On a microbenchmark featuring tight
loops around empty readers, this results in about a 20% speedup
compared to RCU Tasks Trace on my x86 laptop.
Please note that SRCU-fast has drawbacks compared to RCU Tasks
Trace, including:
o Lack of CPU stall warnings.
o SRCU-fast readers permitted only where rcu_is_watching().
o A pointer-sized return value from srcu_read_lock_fast() must
be passed to the corresponding srcu_read_unlock_fast().
o In the absence of readers, a synchronize_srcu() having _fast()
readers will incur the latency of at least two normal RCU grace
periods.
o RCU Tasks Trace priority boosting could be easily added.
Boosting SRCU readers is more difficult.
SRCU-fast also has a drawback compared to SRCU-lite, namely that the
return value from srcu_read_lock_fast()-fast is a 64-bit pointer and
that from srcu_read_lock_lite() is only a 32-bit int.
[ paulmck: Apply feedback from Akira Yokosawa. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 47 ++++++++++++++++++++++++++++++++++++++--
include/linux/srcutiny.h | 22 +++++++++++++++++++
include/linux/srcutree.h | 38 ++++++++++++++++++++++++++++++++
3 files changed, 105 insertions(+), 2 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 2bd0e24e9b55..63bddc301423 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -47,9 +47,10 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
+#define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast().
#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
- SRCU_READ_FLAVOR_LITE) // All of the above.
-#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE
+ SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above.
+#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST)
// Flavors requiring synchronize_rcu()
// instead of smp_mb().
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
@@ -253,6 +254,33 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
return retval;
}
+/**
+ * srcu_read_lock_fast - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader. See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_fast() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after. Note that grace-period auto-expediting is disabled for _fast
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_fast() can be invoked only from those contexts
+ * where RCU is watching, that is, from contexts where it would be legal
+ * to invoke rcu_read_lock(). Otherwise, lockdep will complain.
+ */
+static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp)
+{
+ struct srcu_ctr __percpu *retval;
+
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
+ retval = __srcu_read_lock_fast(ssp);
+ rcu_try_lock_acquire(&ssp->dep_map);
+ return retval;
+}
+
/**
* srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
@@ -356,6 +384,21 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__srcu_read_unlock(ssp, idx);
}
+/**
+ * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @scp: return value from corresponding srcu_read_lock_fast().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+ __releases(ssp)
+{
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
+ srcu_lock_release(&ssp->dep_map);
+ __srcu_read_unlock_fast(ssp, scp);
+}
+
/**
* srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 07a0c4489ea2..380260317d98 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -71,6 +71,28 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
return idx;
}
+struct srcu_ctr;
+
+static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
+{
+ return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp;
+}
+
+static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
+{
+ return (struct srcu_ctr __percpu *)(intptr_t)idx;
+}
+
+static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
+{
+ return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp));
+}
+
+static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+{
+ __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp));
+}
+
#define __srcu_read_lock_lite __srcu_read_lock
#define __srcu_read_unlock_lite __srcu_read_unlock
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index ef3065c0cadc..bdc467efce3a 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -226,6 +226,44 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
return &ssp->sda->srcu_ctrs[idx];
}
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Returns a pointer that must be passed to the matching
+ * srcu_read_unlock_fast().
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
+{
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
+ this_cpu_inc(scp->srcu_locks.counter); /* Y */
+ barrier(); /* Avoid leaking the critical section. */
+ return scp;
+}
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct. Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_fast(), but it must be within the same task.
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+{
+ barrier(); /* Avoid leaking the critical section. */
+ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
+}
+
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Returns an index that must be passed to the matching
--
2.40.1
This commit creates a new srcu-fast option for the refscale.scale_type
module parameter that selects srcu_read_lock_fast() and
srcu_read_unlock_fast().
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
kernel/rcu/refscale.c | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 1b47376acdc4..f11a7c2af778 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -216,6 +216,36 @@ static const struct ref_scale_ops srcu_ops = {
.name = "srcu"
};
+static void srcu_fast_ref_scale_read_section(const int nloops)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ un_delay(udl, ndl);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static const struct ref_scale_ops srcu_fast_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = srcu_fast_ref_scale_read_section,
+ .delaysection = srcu_fast_ref_scale_delay_section,
+ .name = "srcu-fast"
+};
+
static void srcu_lite_ref_scale_read_section(const int nloops)
{
int i;
@@ -1163,7 +1193,7 @@ ref_scale_init(void)
long i;
int firsterr = 0;
static const struct ref_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
+ &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
&refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops,
&acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
--
2.40.1
This commit causes the rcutorture SRCU-P scenario use the
srcu_read_lock_fast() and srcu_read_unlock_fast() functions. This will
cause these two functions to be regularly tested by several developers
(myself included), for example, those who use torture.sh as an RCU
acceptance test.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 2db39f298d18..fb61703690cb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=srcud
rcupdate.rcu_self_test=1
rcutorture.fwd_progress=3
srcutree.big_cpu_lim=5
+rcutorture.reader_flavor=0x8
--
2.40.1
This commit abstracts the srcu_read_unlock*() integer-to-pointer
conversion into a new __srcu_ctr_to_ptr(). This will be used
in rcutorture for testing an srcu_read_unlock_fast() that avoids
array-indexing overhead by taking a pointer rather than an integer.
[ paulmck: Apply kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcutree.h | 9 ++++++++-
kernel/rcu/srcutree.c | 6 ++----
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index f41bb3a55a04..55fa400624bb 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -218,6 +218,13 @@ static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __
return scpp - &ssp->sda->srcu_ctrs[0];
}
+// Converts an integer to a per-CPU pointer to the corresponding
+// ->srcu_ctrs[] array element.
+static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
+{
+ return &ssp->sda->srcu_ctrs[idx];
+}
+
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Returns an index that must be passed to the matching
@@ -252,7 +259,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
{
barrier(); /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); /* Z */
+ this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4643a8ed7e32..d2a694944553 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -765,7 +765,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
smp_mb(); /* C */ /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter);
+ this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -794,10 +794,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
*/
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
- struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
-
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
- atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_unlocks);
+ atomic_long_inc(&raw_cpu_ptr(__srcu_ctr_to_ptr(ssp, idx))->srcu_unlocks);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
--
2.40.1
The srcu_read_unlock_lite() and srcu_read_unlock_nmisafe() both say that
their idx parameters must come from srcu_read_lock(). This would be bad,
because a given srcu_struct structure may be used only with one flavor of
SRCU reader. This commit therefore updates the srcu_read_unlock_lite()
kernel-doc header to say that its idx parameter must be obtained
from srcu_read_lock_lite() and the srcu_read_unlock_nmisafe()
kernel-doc header to say that its idx parameter must be obtained from
srcu_read_lock_nmisafe().
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
include/linux/srcu.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 63bddc301423..a0df80baaccf 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -402,7 +402,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct
/**
* srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
+ * @idx: return value from corresponding srcu_read_lock_lite().
*
* Exit a light-weight SRCU read-side critical section.
*/
@@ -418,7 +418,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
+ * @idx: return value from corresponding srcu_read_lock_nmisafe().
*
* Exit an SRCU read-side critical section, but in an NMI-safe manner.
*/
--
2.40.1
This commit adds a sentence to the srcu_down_read() function's kernel-doc
header noting that it is permissible to use srcu_down_read() and
srcu_read_lock() on the same srcu_struct, even concurrently.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a0df80baaccf..317eab82a5f0 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -359,7 +359,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
* srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
*
* Calls to srcu_down_read() may be nested, similar to the manner in
- * which calls to down_read() may be nested.
+ * which calls to down_read() may be nested. The same srcu_struct may be
+ * used concurrently by srcu_down_read() and srcu_read_lock().
*/
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
--
2.40.1
A pair of matching srcu_read_lock_fast() and srcu_read_unlock_fast()
invocations must take place within the same context, for example, within
the same task. Otherwise, lockdep complains, as is the right thing to
do for most use cases.
However, there are use cases involving tracing (for example, uretprobes)
in which an SRCU reader needs to begin in one task and end in a
timer handler, which might interrupt some other task. This commit
therefore supplies the semaphore-like srcu_down_read_fast() and
srcu_up_read_fast() functions, which act like srcu_read_lock_fast() and
srcu_read_unlock_fast(), but permitting srcu_up_read_fast() to be invoked
in a different context than was the matching srcu_down_read_fast().
Neither srcu_down_read_fast() nor srcu_up_read_fast() may be invoked
from an NMI handler.
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 317eab82a5f0..900b0d5c05f5 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -281,6 +281,24 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
return retval;
}
+/**
+ * srcu_down_read_fast - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter a semaphore-like SRCU read-side critical section, but for
+ * a light-weight smp_mb()-free reader. See srcu_read_lock_fast() and
+ * srcu_down_read() for more information.
+ *
+ * The same srcu_struct may be used concurrently by srcu_down_read_fast()
+ * and srcu_read_lock_fast().
+ */
+static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp)
+{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
+ srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
+ return __srcu_read_lock_fast(ssp);
+}
+
/**
* srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
@@ -400,6 +418,22 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct
__srcu_read_unlock_fast(ssp, scp);
}
+/**
+ * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @scp: return value from corresponding srcu_read_lock_fast().
+ *
+ * Exit an SRCU read-side critical section, but not necessarily from
+ * the same context as the maching srcu_down_read_fast().
+ */
+static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
+ __srcu_read_unlock_fast(ssp, scp);
+}
+
/**
* srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
--
2.40.1
BPF uses rcu_read_lock_trace() in NMI context, so srcu_read_lock_fast()
must be NMI-safe if it is to have any chance of addressing RCU Tasks
Trace use cases. This commit therefore causes srcu_read_lock_fast()
and srcu_read_unlock_fast() to use atomic_long_inc() instead of
this_cpu_inc() on architectures that support NMIs but do not have
NMI-safe implementations of this_cpu_inc(). Note that both x86 and
arm64 have NMI-safe implementations of this_cpu_inc(), and thus do not
pay the performance penalty inherent in atomic_inc_long().
It is tempting to use this trick to fold srcu_read_lock_nmisafe()
into srcu_read_lock(), but this would need careful thought, review,
and performance analysis. Though those smp_mb() calls might well make
performance a non-issue.
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
include/linux/srcutree.h | 34 ++++++++++++++++++++++++----------
1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index bdc467efce3a..8bed7e6cc4c1 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -231,17 +231,24 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
* srcu_struct. Returns a pointer that must be passed to the matching
* srcu_read_unlock_fast().
*
- * Note that this_cpu_inc() is an RCU read-side critical section either
- * because it disables interrupts, because it is a single instruction,
- * or because it is a read-modify-write atomic operation, depending on
- * the whims of the architecture.
+ * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
+ * critical sections either because they disables interrupts, because they
+ * are a single instruction, or because they are a read-modify-write atomic
+ * operation, depending on the whims of the architecture.
+ *
+ * This means that __srcu_read_lock_fast() is not all that fast
+ * on architectures that support NMIs but do not supply NMI-safe
+ * implementations of this_cpu_inc().
*/
static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
{
struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
- this_cpu_inc(scp->srcu_locks.counter); /* Y */
+ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+ this_cpu_inc(scp->srcu_locks.counter); /* Y */
+ else
+ atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */
barrier(); /* Avoid leaking the critical section. */
return scp;
}
@@ -252,15 +259,22 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct
* different CPU than that which was incremented by the corresponding
* srcu_read_lock_fast(), but it must be within the same task.
*
- * Note that this_cpu_inc() is an RCU read-side critical section either
- * because it disables interrupts, because it is a single instruction,
- * or because it is a read-modify-write atomic operation, depending on
- * the whims of the architecture.
+ * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
+ * critical sections either because they disables interrupts, because they
+ * are a single instruction, or because they are a read-modify-write atomic
+ * operation, depending on the whims of the architecture.
+ *
+ * This means that __srcu_read_unlock_fast() is not all that fast
+ * on architectures that support NMIs but do not supply NMI-safe
+ * implementations of this_cpu_inc().
*/
static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
{
barrier(); /* Avoid leaking the critical section. */
- this_cpu_inc(scp->srcu_unlocks.counter); /* Z */
+ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
+ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */
+ else
+ atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
}
--
2.40.1
This commit stops using ->srcu_idx for rcutorture's reader-batch
consistency checking, using ->srcu_gp_seq instead. This is a first
step towards a faster srcu_read_{,un}lock_lite() that avoids the array
accesses that use ->srcu_idx.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
kernel/rcu/rcutorture.c | 2 ++
kernel/rcu/srcutree.c | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d26fb1d33ed9..1d2de50fb5d6 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -791,6 +791,7 @@ static struct rcu_torture_ops srcu_ops = {
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
+ .gp_diff = rcu_seq_diff,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
@@ -834,6 +835,7 @@ static struct rcu_torture_ops srcud_ops = {
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
+ .gp_diff = rcu_seq_diff,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index f87a9fb6d6bb..b5bb73c877de 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1679,7 +1679,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
*/
unsigned long srcu_batches_completed(struct srcu_struct *ssp)
{
- return READ_ONCE(ssp->srcu_idx);
+ return READ_ONCE(ssp->srcu_sup->srcu_gp_seq);
}
EXPORT_SYMBOL_GPL(srcu_batches_completed);
--
2.40.1
This commit defines SRCU_READ_FLAVOR_ALL in terms of the
SRCU_READ_FLAVOR_* definitions instead of a hexadecimal constant.
Suggested-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcu.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d7ba46e74f58..f6f779b9d9ff 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -47,7 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
-#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above.
+#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
+ SRCU_READ_FLAVOR_LITE) // All of the above.
#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
--
2.40.1
Given that SRCU allows its read-side critical sections are not just
preemptible, but also allow general blocking, there is not much
reason to restrict Tiny SRCU to non-preemptible kernels. This commit
therefore removes Tiny SRCU dependencies on non-preemptibility, primarily
surrounding its interaction with rcutorture and early boot.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Ankur Arora <ankur.a.arora@oracle.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
kernel/rcu/rcu.h | 9 ++++++---
kernel/rcu/srcutiny.c | 6 ++++++
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f87c9d6d36fc..f6fcf87d9139 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -611,8 +611,6 @@ void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
static inline bool rcu_watching_zero_in_eqs(int cpu, int *vp) { return false; }
static inline unsigned long rcu_get_gp_seq(void) { return 0; }
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
-static inline unsigned long
-srcu_batches_completed(struct srcu_struct *sp) { return 0; }
static inline void rcu_force_quiescent_state(void) { }
static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; }
static inline void show_rcu_gp_kthreads(void) { }
@@ -624,7 +622,6 @@ static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
bool rcu_watching_zero_in_eqs(int cpu, int *vp);
unsigned long rcu_get_gp_seq(void);
unsigned long rcu_exp_batches_completed(void);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup);
void show_rcu_gp_kthreads(void);
int rcu_get_gp_kthreads_prio(void);
@@ -636,6 +633,12 @@ void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */
+#ifdef CONFIG_TINY_SRCU
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) { return 0; }
+#else // #ifdef CONFIG_TINY_SRCU
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
+
#ifdef CONFIG_RCU_NOCB_CPU
void rcu_bind_current_to_nocb(void);
#else
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index f688bdad293e..6e9fe2ce1075 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -20,7 +20,11 @@
#include "rcu_segcblist.h"
#include "rcu.h"
+#ifndef CONFIG_TREE_RCU
int rcu_scheduler_active __read_mostly;
+#else // #ifndef CONFIG_TREE_RCU
+extern int rcu_scheduler_active;
+#endif // #else // #ifndef CONFIG_TREE_RCU
static LIST_HEAD(srcu_boot_list);
static bool srcu_init_done;
@@ -282,11 +286,13 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
+#ifndef CONFIG_TREE_RCU
/* Lockdep diagnostics. */
void __init rcu_scheduler_starting(void)
{
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
}
+#endif // #ifndef CONFIG_TREE_RCU
/*
* Queue work for srcu_struct structures with early boot callbacks.
--
2.40.1
This commit causes SRCU readers to use ->srcu_ctrs for counter
selection instead of ->srcu_idx. This takes another step towards
array-indexing-free SRCU readers.
[ paulmck: Apply kernel test robot feedback. ]
Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
---
include/linux/srcutree.h | 9 +++++----
kernel/rcu/srcutree.c | 23 +++++++++++++----------
2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index c794d599db5c..1b01ced61a45 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -101,6 +101,7 @@ struct srcu_usage {
*/
struct srcu_struct {
unsigned int srcu_idx; /* Current rdr array element. */
+ struct srcu_ctr __percpu *srcu_ctrp;
struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
struct lockdep_map dep_map;
struct srcu_usage *srcu_sup; /* Update-side data. */
@@ -167,6 +168,7 @@ struct srcu_struct {
#define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \
{ \
.sda = &pcpu_name, \
+ .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \
__SRCU_STRUCT_INIT_COMMON(name, usage_name) \
}
@@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
*/
static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
{
- int idx;
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
+ this_cpu_inc(scp->srcu_locks.counter); /* Y */
barrier(); /* Avoid leaking the critical section. */
- return idx;
+ return scp - &ssp->sda->srcu_ctrs[0];
}
/*
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d4e9cd917a69..9af86ce2dd24 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
ssp->srcu_sup->sda_is_static = is_static;
- if (!is_static)
+ if (!is_static) {
ssp->sda = alloc_percpu(struct srcu_data);
+ ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
+ }
if (!ssp->sda)
goto err_free_sup;
init_srcu_struct_data(ssp);
@@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
*/
int __srcu_read_lock(struct srcu_struct *ssp)
{
- int idx;
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
+ this_cpu_inc(scp->srcu_locks.counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
- return idx;
+ return scp - &ssp->sda->srcu_ctrs[0];
}
EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -772,13 +773,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
- int idx;
- struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+ struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
+ struct srcu_ctr *scp = raw_cpu_ptr(scpp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
+ atomic_long_inc(&scp->srcu_locks);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
- return idx;
+ return scpp - &ssp->sda->srcu_ctrs[0];
}
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
@@ -1152,6 +1152,8 @@ static void srcu_flip(struct srcu_struct *ssp)
smp_mb(); /* E */ /* Pairs with B and C. */
WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
+ WRITE_ONCE(ssp->srcu_ctrp,
+ &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
/*
* Ensure that if the updater misses an __srcu_read_unlock()
@@ -2004,6 +2006,7 @@ static int srcu_module_coming(struct module *mod)
ssp->sda = alloc_percpu(struct srcu_data);
if (WARN_ON_ONCE(!ssp->sda))
return -ENOMEM;
+ ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
}
return 0;
}
--
2.40.1
© 2016 - 2026 Red Hat, Inc.