include/asm-generic/barrier.h | 22 ++++++++++++++++++++++ include/linux/atomic.h | 4 ++++ kernel/locking/qspinlock.c | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-)
From: Guo Ren <guoren@linux.alibaba.com>
The current cond_load primitive contains two parts (condition expression
and load value), but the usage of cond_load may require the sub-size
condition expression of the load size. That means hardware could utilize
a mask argument to optimize the wait condition. If the mask argument
size is less than the hardware minimum wait size, the hardware uses its
minimum size.
The patch contains a qspinlock example: When it is at the head of the
waitqueue, it waits for the owner & pending to go away. The forward
progress condition only cares locked_pending part, but it needs to load
the 32-bit lock value as a return.
That also means WFE-liked instruction would need a mask argument of the
load reservation set.
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
include/asm-generic/barrier.h | 22 ++++++++++++++++++++++
include/linux/atomic.h | 4 ++++
kernel/locking/qspinlock.c | 3 ++-
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 961f4d88f9ef..fec61629f769 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -275,6 +275,28 @@ do { \
})
#endif
+/**
+ * smp_cond_load_mask_relaxed() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ * @mask: mask *ptr to wait for (effect of 0 is the same with -1)
+ */
+#ifndef smp_cond_load_mask_relaxed
+#define smp_cond_load_mask_relaxed(ptr, cond_expr, mask) \
+ smp_cond_load_relaxed(ptr, cond_expr)
+#endif
+
+/**
+ * smp_cond_load_mask_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ * @mask: mask *ptr to wait for (effect of 0 is the same with -1)
+ */
+#ifndef smp_cond_load_mask_acquire
+#define smp_cond_load_mask_acquire(ptr, cond_expr, mask) \
+ smp_cond_load_acquire(ptr, cond_expr)
+#endif
+
/*
* pmem_wmb() ensures that all stores for which the modification
* are written to persistent storage by preceding instructions have
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8dd57c3a99e9..dc7351945f27 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -27,9 +27,13 @@
#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+#define atomic_cond_read_mask_acquire(v, c, m) smp_cond_load_mask_acquire(&(v)->counter, (c), (m))
+#define atomic_cond_read_mask_relaxed(v, c, m) smp_cond_load_mask_relaxed(&(v)->counter, (c), (m))
#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+#define atomic64_cond_read_mask_acquire(v, c, m) smp_cond_load_mask_acquire(&(v)->counter, (c), (m))
+#define atomic64_cond_read_mask_relaxed(v, c, m) smp_cond_load_mask_relaxed(&(v)->counter, (c), (m))
/*
* The idea here is to build acquire/release variants by adding explicit
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ebe6b8ec7cb3..14fdd2ee752c 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -511,7 +511,8 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if ((val = pv_wait_head_or_lock(lock, node)))
goto locked;
- val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
+ val = atomic_cond_read_mask_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+ _Q_LOCKED_PENDING_MASK);
locked:
/*
--
2.36.1
On 12/25/22 06:55, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > The current cond_load primitive contains two parts (condition expression > and load value), but the usage of cond_load may require the sub-size > condition expression of the load size. That means hardware could utilize > a mask argument to optimize the wait condition. If the mask argument > size is less than the hardware minimum wait size, the hardware uses its > minimum size. > > The patch contains a qspinlock example: When it is at the head of the > waitqueue, it waits for the owner & pending to go away. The forward > progress condition only cares locked_pending part, but it needs to load > the 32-bit lock value as a return. > > That also means WFE-liked instruction would need a mask argument of the > load reservation set. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > Cc: Waiman Long <longman@redhat.com> > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: Boqun Feng <boqun.feng@gmail.com> > Cc: Will Deacon <will@kernel.org> > Cc: Ingo Molnar <mingo@redhat.com> > --- > include/asm-generic/barrier.h | 22 ++++++++++++++++++++++ > include/linux/atomic.h | 4 ++++ > kernel/locking/qspinlock.c | 3 ++- > 3 files changed, 28 insertions(+), 1 deletion(-) > > diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h > index 961f4d88f9ef..fec61629f769 100644 > --- a/include/asm-generic/barrier.h > +++ b/include/asm-generic/barrier.h > @@ -275,6 +275,28 @@ do { \ > }) > #endif > > +/** > + * smp_cond_load_mask_relaxed() - (Spin) wait for cond with ACQUIRE ordering > + * @ptr: pointer to the variable to wait on > + * @cond: boolean expression to wait for > + * @mask: mask *ptr to wait for (effect of 0 is the same with -1) > + */ > +#ifndef smp_cond_load_mask_relaxed > +#define smp_cond_load_mask_relaxed(ptr, cond_expr, mask) \ > + smp_cond_load_relaxed(ptr, cond_expr) > +#endif > + > +/** > + * smp_cond_load_mask_acquire() - (Spin) wait for cond with ACQUIRE ordering > + * @ptr: pointer to the variable to wait on > + * @cond: boolean expression to wait for > + * @mask: mask *ptr to wait for (effect of 0 is the same with -1) > + */ > +#ifndef smp_cond_load_mask_acquire > +#define smp_cond_load_mask_acquire(ptr, cond_expr, mask) \ > + smp_cond_load_acquire(ptr, cond_expr) > +#endif > + > /* > * pmem_wmb() ensures that all stores for which the modification > * are written to persistent storage by preceding instructions have > diff --git a/include/linux/atomic.h b/include/linux/atomic.h > index 8dd57c3a99e9..dc7351945f27 100644 > --- a/include/linux/atomic.h > +++ b/include/linux/atomic.h > @@ -27,9 +27,13 @@ > > #define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) > #define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) > +#define atomic_cond_read_mask_acquire(v, c, m) smp_cond_load_mask_acquire(&(v)->counter, (c), (m)) > +#define atomic_cond_read_mask_relaxed(v, c, m) smp_cond_load_mask_relaxed(&(v)->counter, (c), (m)) > > #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) > #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) > +#define atomic64_cond_read_mask_acquire(v, c, m) smp_cond_load_mask_acquire(&(v)->counter, (c), (m)) > +#define atomic64_cond_read_mask_relaxed(v, c, m) smp_cond_load_mask_relaxed(&(v)->counter, (c), (m)) > > /* > * The idea here is to build acquire/release variants by adding explicit > diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c > index ebe6b8ec7cb3..14fdd2ee752c 100644 > --- a/kernel/locking/qspinlock.c > +++ b/kernel/locking/qspinlock.c > @@ -511,7 +511,8 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) > if ((val = pv_wait_head_or_lock(lock, node))) > goto locked; > > - val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK)); > + val = atomic_cond_read_mask_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), > + _Q_LOCKED_PENDING_MASK); > > locked: > /* This patch is essentially a no-op. You will have to have at least one arch that has its own version of smp_cond_load_mask*() and get some benefit out of it. Otherwise, it is not likely to be merged. Cheers, Longman
© 2016 - 2025 Red Hat, Inc.