Now that the reader and writer wait loops are identical, share the
code.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------
1 file changed, 47 insertions(+), 70 deletions(-)
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se
* optionally wake up waiters before it returns.
*/
static inline void
-rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
- struct wake_q_head *wake_q)
+rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
__releases(&sem->wait_lock)
{
bool first = rwsem_first_waiter(sem) == waiter;
-
- wake_q_init(wake_q);
+ DEFINE_WAKE_Q(wake_q);
/*
* If the wait_list isn't empty and the waiter to be deleted is
@@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor
* be eligible to acquire or spin on the lock.
*/
if (rwsem_del_waiter(sem, waiter) && first)
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
- if (!wake_q_empty(wake_q))
- wake_up_q(wake_q);
+ if (!wake_q_empty(&wake_q))
+ wake_up_q(&wake_q);
}
/*
@@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite
rwsem_mark_wake(sem, wake_type, wake_q);
}
+#define waiter_type(_waiter, _r, _w) \
+ ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
+
+static __always_inline struct rw_semaphore *
+rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
+{
+ trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
+
+ /* wait to be given the lock */
+ for (;;) {
+ set_current_state(state);
+ if (!smp_load_acquire(&waiter->task)) {
+ /* Matches rwsem_waiter_wake()'s smp_store_release(). */
+ break;
+ }
+ if (signal_pending_state(state, current)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter->task)
+ goto out_nolock;
+ raw_spin_unlock_irq(&sem->wait_lock);
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
+ break;
+ }
+ schedule_preempt_disabled();
+ lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
+ }
+
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
+ trace_contention_end(sem, 0);
+ return sem;
+
+out_nolock:
+ rwsem_del_wake_waiter(sem, waiter);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
+ trace_contention_end(sem, -EINTR);
+ return ERR_PTR(-EINTR);
+}
+
/*
* Wait for the read lock to be granted
*/
@@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);
- trace_contention_begin(sem, LCB_F_READ);
-
- /* wait to be given the lock */
- for (;;) {
- set_current_state(state);
- if (!smp_load_acquire(&waiter.task)) {
- /* Matches rwsem_waiter_wake()'s smp_store_release(). */
- break;
- }
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
- break;
- }
- schedule_preempt_disabled();
- lockevent_inc(rwsem_sleep_reader);
- }
-
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock);
- trace_contention_end(sem, 0);
- return sem;
-
-out_nolock:
- rwsem_del_wake_waiter(sem, &waiter, &wake_q);
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock_fail);
- trace_contention_end(sem, -EINTR);
- return ERR_PTR(-EINTR);
+ return rwsem_waiter_wait(sem, &waiter, state);
}
/*
@@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema
}
raw_spin_unlock_irq(&sem->wait_lock);
- /* wait until we successfully acquire the lock */
- trace_contention_begin(sem, LCB_F_WRITE);
-
- for (;;) {
- set_current_state(state);
- if (!smp_load_acquire(&waiter.task)) {
- /* Matches rwsem_waiter_wake()'s smp_store_release(). */
- break;
- }
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
- break;
- }
- schedule_preempt_disabled();
- lockevent_inc(rwsem_sleep_writer);
- }
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_wlock);
- trace_contention_end(sem, 0);
- return sem;
-
-out_nolock:
- rwsem_del_wake_waiter(sem, &waiter, &wake_q);
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_wlock_fail);
- trace_contention_end(sem, -EINTR);
- return ERR_PTR(-EINTR);
+ return rwsem_waiter_wait(sem, &waiter, state);
}
/*
On 2/23/23 07:26, Peter Zijlstra wrote: > Now that the reader and writer wait loops are identical, share the > code. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------ > 1 file changed, 47 insertions(+), 70 deletions(-) > > --- a/kernel/locking/rwsem.c > +++ b/kernel/locking/rwsem.c > @@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se > * optionally wake up waiters before it returns. > */ > static inline void > -rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, > - struct wake_q_head *wake_q) > +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) > __releases(&sem->wait_lock) > { > bool first = rwsem_first_waiter(sem) == waiter; > - > - wake_q_init(wake_q); > + DEFINE_WAKE_Q(wake_q); > > /* > * If the wait_list isn't empty and the waiter to be deleted is > @@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor > * be eligible to acquire or spin on the lock. > */ > if (rwsem_del_waiter(sem, waiter) && first) > - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q); > + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); > raw_spin_unlock_irq(&sem->wait_lock); > - if (!wake_q_empty(wake_q)) > - wake_up_q(wake_q); > + if (!wake_q_empty(&wake_q)) > + wake_up_q(&wake_q); > } > > /* > @@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite > rwsem_mark_wake(sem, wake_type, wake_q); > } > > +#define waiter_type(_waiter, _r, _w) \ > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) > + > +static __always_inline struct rw_semaphore * > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) > +{ > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); > + > + /* wait to be given the lock */ > + for (;;) { > + set_current_state(state); > + if (!smp_load_acquire(&waiter->task)) { > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > + break; > + } > + if (signal_pending_state(state, current)) { > + raw_spin_lock_irq(&sem->wait_lock); > + if (waiter->task) > + goto out_nolock; > + raw_spin_unlock_irq(&sem->wait_lock); > + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > + break; > + } > + schedule_preempt_disabled(); > + lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer)); > + } > + > + __set_current_state(TASK_RUNNING); > + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock)); > + trace_contention_end(sem, 0); > + return sem; > + > +out_nolock: > + rwsem_del_wake_waiter(sem, waiter); > + __set_current_state(TASK_RUNNING); Similar to boqun's comment, we should move __set_current_state() before rwsem_del_wake_waiter(). Unfortunately, lockevent_inc() doesn't work with waiter_type() like that as the compilation will fail if CONFIG_LOCK_EVENT_COUNTS is enabled. Could you include the attached patch in your series and make the following changes? diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index deb0d016a6ce..5b14b0d076fd 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1021,13 +1021,14 @@ static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count, rwsem_mark_wake(sem, wake_type, wake_q); } -#define waiter_type(_waiter, _r, _w) \ - ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) +#define waiter_type(_reader, _r, _w) ((_reader) ? (_r) : (_w)) static __always_inline struct rw_semaphore * rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) { - trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); + bool reader = waiter->type == RWSEM_WAITING_FOR_READ; + + trace_contention_begin(sem, waiter_type(reader, LCB_F_READ, LCB_F_WRITE)); /* wait to be given the lock */ for (;;) { @@ -1045,18 +1046,18 @@ rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int sta break; } schedule_preempt_disabled(); - lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer)); + lockevent_cond_inc2(reader, rwsem_sleep_reader, rwsem_sleep_writer); } __set_current_state(TASK_RUNNING); - lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock)); + lockevent_cond_inc2(reader, rwsem_rlock, rwsem_wlock); trace_contention_end(sem, 0); return sem; out_nolock: rwsem_del_wake_waiter(sem, waiter); __set_current_state(TASK_RUNNING); - lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail)); + lockevent_cond_inc2(reader, rwsem_rlock_fail, rwsem_wlock_fail); trace_contention_end(sem, -EINTR); return ERR_PTR(-EINTR); } Thanks, Longman lockevent_inc
On Thu, Feb 23, 2023 at 05:45:56PM -0500, Waiman Long wrote: > Unfortunately, lockevent_inc() doesn't work with waiter_type() like that as > the compilation will fail if CONFIG_LOCK_EVENT_COUNTS is enabled. Could you > include the attached patch in your series and make the following changes? Yeah, robot told me; fixed it like so: --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -995,13 +995,16 @@ static inline void rwsem_cond_wake_waite rwsem_mark_wake(sem, wake_type, wake_q); } -#define waiter_type(_waiter, _r, _w) \ - ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) +#define lockevent_rw_inc(rd, evr, evw) do { \ + lockevent_cond_inc(evr, (rd)); \ + lockevent_cond_inc(evw, !(rd)); \ +} while (0) static __always_inline struct rw_semaphore * -rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, + int state, bool reader) { - trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); + trace_contention_begin(sem, reader ? LCB_F_READ : LCB_F_WRITE); /* wait to be given the lock */ for (;;) { @@ -1019,18 +1022,19 @@ rwsem_waiter_wait(struct rw_semaphore *s break; } schedule_preempt_disabled(); - lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer)); + lockevent_rw_inc(reader, rwsem_sleep_reader, rwsem_sleep_writer); } __set_current_state(TASK_RUNNING); - lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock)); + + lockevent_rw_inc(reader, rwsem_rlock, rwsem_wlock); trace_contention_end(sem, 0); return sem; out_nolock: rwsem_del_wake_waiter(sem, waiter); __set_current_state(TASK_RUNNING); - lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail)); + lockevent_rw_inc(reader, rwem_rlock_fail, rwsem_wlock_fail); trace_contention_end(sem, -EINTR); return ERR_PTR(-EINTR); } @@ -1112,7 +1116,7 @@ rwsem_down_read_slowpath(struct rw_semap if (!wake_q_empty(&wake_q)) wake_up_q(&wake_q); - return rwsem_waiter_wait(sem, &waiter, state); + return rwsem_waiter_wait(sem, &waiter, state, true); } /* @@ -1162,7 +1166,7 @@ rwsem_down_write_slowpath(struct rw_sema } raw_spin_unlock_irq(&sem->wait_lock); - return rwsem_waiter_wait(sem, &waiter, state); + return rwsem_waiter_wait(sem, &waiter, state, false); } /*
On Thu, Feb 23, 2023 at 01:26:47PM +0100, Peter Zijlstra wrote: > Now that the reader and writer wait loops are identical, share the > code. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------ > 1 file changed, 47 insertions(+), 70 deletions(-) > > --- a/kernel/locking/rwsem.c > +++ b/kernel/locking/rwsem.c > @@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se > * optionally wake up waiters before it returns. > */ > static inline void > -rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, > - struct wake_q_head *wake_q) > +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) > __releases(&sem->wait_lock) > { > bool first = rwsem_first_waiter(sem) == waiter; > - > - wake_q_init(wake_q); > + DEFINE_WAKE_Q(wake_q); > > /* > * If the wait_list isn't empty and the waiter to be deleted is > @@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor > * be eligible to acquire or spin on the lock. > */ > if (rwsem_del_waiter(sem, waiter) && first) > - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q); > + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); > raw_spin_unlock_irq(&sem->wait_lock); > - if (!wake_q_empty(wake_q)) > - wake_up_q(wake_q); > + if (!wake_q_empty(&wake_q)) > + wake_up_q(&wake_q); > } > > /* > @@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite > rwsem_mark_wake(sem, wake_type, wake_q); > } > > +#define waiter_type(_waiter, _r, _w) \ > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) > + > +static __always_inline struct rw_semaphore * > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) > +{ > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); > + > + /* wait to be given the lock */ > + for (;;) { > + set_current_state(state); > + if (!smp_load_acquire(&waiter->task)) { > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > + break; > + } > + if (signal_pending_state(state, current)) { > + raw_spin_lock_irq(&sem->wait_lock); Move the below __set_current_state(TASK_RUNNING)s up here? I think we need the preemption protection when changing the task state here. > + if (waiter->task) > + goto out_nolock; I originally wanted to suggest renaming the label to "out_locked", but I think we can just move the labeled code up here? And even open-code rwsem_del_wake_waiter() since it only has one usage. Regards, Boqun > + raw_spin_unlock_irq(&sem->wait_lock); > + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > + break; > + } > + schedule_preempt_disabled(); > + lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer)); > + } > + > + __set_current_state(TASK_RUNNING); > + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock)); > + trace_contention_end(sem, 0); > + return sem; > + > +out_nolock: > + rwsem_del_wake_waiter(sem, waiter); > + __set_current_state(TASK_RUNNING); > + lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail)); > + trace_contention_end(sem, -EINTR); > + return ERR_PTR(-EINTR); > +} > + > /* > * Wait for the read lock to be granted > */ > @@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap > if (!wake_q_empty(&wake_q)) > wake_up_q(&wake_q); > > - trace_contention_begin(sem, LCB_F_READ); > - > - /* wait to be given the lock */ > - for (;;) { > - set_current_state(state); > - if (!smp_load_acquire(&waiter.task)) { > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > - break; > - } > - if (signal_pending_state(state, current)) { > - raw_spin_lock_irq(&sem->wait_lock); > - if (waiter.task) > - goto out_nolock; > - raw_spin_unlock_irq(&sem->wait_lock); > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > - break; > - } > - schedule_preempt_disabled(); > - lockevent_inc(rwsem_sleep_reader); > - } > - > - __set_current_state(TASK_RUNNING); > - lockevent_inc(rwsem_rlock); > - trace_contention_end(sem, 0); > - return sem; > - > -out_nolock: > - rwsem_del_wake_waiter(sem, &waiter, &wake_q); > - __set_current_state(TASK_RUNNING); > - lockevent_inc(rwsem_rlock_fail); > - trace_contention_end(sem, -EINTR); > - return ERR_PTR(-EINTR); > + return rwsem_waiter_wait(sem, &waiter, state); > } > > /* > @@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema > } > raw_spin_unlock_irq(&sem->wait_lock); > > - /* wait until we successfully acquire the lock */ > - trace_contention_begin(sem, LCB_F_WRITE); > - > - for (;;) { > - set_current_state(state); > - if (!smp_load_acquire(&waiter.task)) { > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > - break; > - } > - if (signal_pending_state(state, current)) { > - raw_spin_lock_irq(&sem->wait_lock); > - if (waiter.task) > - goto out_nolock; > - raw_spin_unlock_irq(&sem->wait_lock); > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > - break; > - } > - schedule_preempt_disabled(); > - lockevent_inc(rwsem_sleep_writer); > - } > - __set_current_state(TASK_RUNNING); > - lockevent_inc(rwsem_wlock); > - trace_contention_end(sem, 0); > - return sem; > - > -out_nolock: > - rwsem_del_wake_waiter(sem, &waiter, &wake_q); > - __set_current_state(TASK_RUNNING); > - lockevent_inc(rwsem_wlock_fail); > - trace_contention_end(sem, -EINTR); > - return ERR_PTR(-EINTR); > + return rwsem_waiter_wait(sem, &waiter, state); > } > > /* > >
On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote: [..] > > +#define waiter_type(_waiter, _r, _w) \ > > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) > > + > > +static __always_inline struct rw_semaphore * > > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) > > +{ > > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); > > + > > + /* wait to be given the lock */ > > + for (;;) { > > + set_current_state(state); > > + if (!smp_load_acquire(&waiter->task)) { > > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > > + break; > > + } > > + if (signal_pending_state(state, current)) { > > + raw_spin_lock_irq(&sem->wait_lock); > > Move the below __set_current_state(TASK_RUNNING)s up here? I think we > need the preemption protection when changing the task state here. > Nevermind since we have the preemption protection for the whole function... but merging two __set_current_state()s into one still looks good. Regards, Boqun > > + if (waiter->task) > > + goto out_nolock; > [...] > > + > > + __set_current_state(TASK_RUNNING); > > + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock)); > > + trace_contention_end(sem, 0); > > + return sem; > > + > > +out_nolock: > > + rwsem_del_wake_waiter(sem, waiter); > > + __set_current_state(TASK_RUNNING); > > + lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail)); > > + trace_contention_end(sem, -EINTR); > > + return ERR_PTR(-EINTR); > > +} > > + > > /* > > * Wait for the read lock to be granted > > */ > > @@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap > > if (!wake_q_empty(&wake_q)) > > wake_up_q(&wake_q); > > > > - trace_contention_begin(sem, LCB_F_READ); > > - > > - /* wait to be given the lock */ > > - for (;;) { > > - set_current_state(state); > > - if (!smp_load_acquire(&waiter.task)) { > > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > > - break; > > - } > > - if (signal_pending_state(state, current)) { > > - raw_spin_lock_irq(&sem->wait_lock); > > - if (waiter.task) > > - goto out_nolock; > > - raw_spin_unlock_irq(&sem->wait_lock); > > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > > - break; > > - } > > - schedule_preempt_disabled(); > > - lockevent_inc(rwsem_sleep_reader); > > - } > > - > > - __set_current_state(TASK_RUNNING); > > - lockevent_inc(rwsem_rlock); > > - trace_contention_end(sem, 0); > > - return sem; > > - > > -out_nolock: > > - rwsem_del_wake_waiter(sem, &waiter, &wake_q); > > - __set_current_state(TASK_RUNNING); > > - lockevent_inc(rwsem_rlock_fail); > > - trace_contention_end(sem, -EINTR); > > - return ERR_PTR(-EINTR); > > + return rwsem_waiter_wait(sem, &waiter, state); > > } > > > > /* > > @@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema > > } > > raw_spin_unlock_irq(&sem->wait_lock); > > > > - /* wait until we successfully acquire the lock */ > > - trace_contention_begin(sem, LCB_F_WRITE); > > - > > - for (;;) { > > - set_current_state(state); > > - if (!smp_load_acquire(&waiter.task)) { > > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > > - break; > > - } > > - if (signal_pending_state(state, current)) { > > - raw_spin_lock_irq(&sem->wait_lock); > > - if (waiter.task) > > - goto out_nolock; > > - raw_spin_unlock_irq(&sem->wait_lock); > > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ > > - break; > > - } > > - schedule_preempt_disabled(); > > - lockevent_inc(rwsem_sleep_writer); > > - } > > - __set_current_state(TASK_RUNNING); > > - lockevent_inc(rwsem_wlock); > > - trace_contention_end(sem, 0); > > - return sem; > > - > > -out_nolock: > > - rwsem_del_wake_waiter(sem, &waiter, &wake_q); > > - __set_current_state(TASK_RUNNING); > > - lockevent_inc(rwsem_wlock_fail); > > - trace_contention_end(sem, -EINTR); > > - return ERR_PTR(-EINTR); > > + return rwsem_waiter_wait(sem, &waiter, state); > > } > > > > /* > > > >
On Thu, Feb 23, 2023 at 05:33:53PM -0800, Boqun Feng wrote: > On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote: > [..] > > > +#define waiter_type(_waiter, _r, _w) \ > > > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) > > > + > > > +static __always_inline struct rw_semaphore * > > > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) > > > +{ > > > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); > > > + > > > + /* wait to be given the lock */ > > > + for (;;) { > > > + set_current_state(state); > > > + if (!smp_load_acquire(&waiter->task)) { > > > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > > > + break; > > > + } > > > + if (signal_pending_state(state, current)) { > > > + raw_spin_lock_irq(&sem->wait_lock); > > > > Move the below __set_current_state(TASK_RUNNING)s up here? I think we > > need the preemption protection when changing the task state here. > > > > Nevermind since we have the preemption protection for the whole > function... but merging two __set_current_state()s into one still looks > good. Even if it were not; I still don't understand the concern. Preemption ignores task state.
On Sun, Feb 26, 2023 at 01:01:10PM +0100, Peter Zijlstra wrote: > On Thu, Feb 23, 2023 at 05:33:53PM -0800, Boqun Feng wrote: > > On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote: > > [..] > > > > +#define waiter_type(_waiter, _r, _w) \ > > > > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w)) > > > > + > > > > +static __always_inline struct rw_semaphore * > > > > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state) > > > > +{ > > > > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE)); > > > > + > > > > + /* wait to be given the lock */ > > > > + for (;;) { > > > > + set_current_state(state); > > > > + if (!smp_load_acquire(&waiter->task)) { > > > > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > > > > + break; > > > > + } > > > > + if (signal_pending_state(state, current)) { > > > > + raw_spin_lock_irq(&sem->wait_lock); > > > > > > Move the below __set_current_state(TASK_RUNNING)s up here? I think we > > > need the preemption protection when changing the task state here. > > > > > > > Nevermind since we have the preemption protection for the whole > > function... but merging two __set_current_state()s into one still looks > > good. > > Even if it were not; I still don't understand the concern. Preemption > ignores task state. Because I missed the exact thing you just mentioned... ;-) I was worried about the following case: ttwu(); set_current_state(TASK_UNINTERRUPTIBLE); .... <preemption enable> <preempted> preempt_schedule_irq(): __schedule(...): deactivate_task(); // Wakeup missed. However this is not true, since __schedule() in preempt_schedule_irq() is a SM_PREEMPT one. Sorry for the noise then. But good for me to revisit these stuffs ;-) Regards, Boqun
© 2016 - 2025 Red Hat, Inc.