When osq_lock() returns false or osq_unlock() returns static
analysis shows that node->next should always be NULL.
This means that it isn't necessary to explicitly set it to NULL
prior to atomic_xchg(&lock->tail, curr) on extry to osq_lock().
Just in case there a non-obvious race condition that can leave it
non-NULL check with WARN_ON_ONCE() and NULL if set.
Note that without this check the fast path (adding at the list head)
doesn't need to to access the per-cpu osq_node at all.
Signed-off-by: David Laight <david.laight@aculab.com>
---
kernel/locking/osq_lock.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 27324b509f68..35bb99e96697 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -87,12 +87,17 @@ osq_wait_next(struct optimistic_spin_queue *lock,
bool osq_lock(struct optimistic_spin_queue *lock)
{
- struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
- struct optimistic_spin_node *prev, *next;
+ struct optimistic_spin_node *node, *prev, *next;
int curr = encode_cpu(smp_processor_id());
int prev_cpu;
- node->next = NULL;
+ /*
+ * node->next should be NULL on entry.
+ * Check just in case there is a race somewhere.
+ * Note that this is probably an unnecessary cache miss in the fast path.
+ */
+ if (WARN_ON_ONCE(raw_cpu_read(osq_node.next) != NULL))
+ raw_cpu_write(osq_node.next, NULL);
/*
* We need both ACQUIRE (pairs with corresponding RELEASE in
@@ -104,8 +109,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
if (prev_cpu == OSQ_UNLOCKED_VAL)
return true;
- node->prev_cpu = prev_cpu;
+ node = this_cpu_ptr(&osq_node);
prev = decode_cpu(prev_cpu);
+ node->prev_cpu = prev_cpu;
node->locked = 0;
/*
--
2.17.1
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
* David Laight <David.Laight@ACULAB.COM> wrote:
> When osq_lock() returns false or osq_unlock() returns static
> analysis shows that node->next should always be NULL.
> This means that it isn't necessary to explicitly set it to NULL
> prior to atomic_xchg(&lock->tail, curr) on extry to osq_lock().
>
> Just in case there a non-obvious race condition that can leave it
> non-NULL check with WARN_ON_ONCE() and NULL if set.
> Note that without this check the fast path (adding at the list head)
> doesn't need to to access the per-cpu osq_node at all.
>
> Signed-off-by: David Laight <david.laight@aculab.com>
> ---
> kernel/locking/osq_lock.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
> index 27324b509f68..35bb99e96697 100644
> --- a/kernel/locking/osq_lock.c
> +++ b/kernel/locking/osq_lock.c
> @@ -87,12 +87,17 @@ osq_wait_next(struct optimistic_spin_queue *lock,
>
> bool osq_lock(struct optimistic_spin_queue *lock)
> {
> - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
> - struct optimistic_spin_node *prev, *next;
> + struct optimistic_spin_node *node, *prev, *next;
> int curr = encode_cpu(smp_processor_id());
> int prev_cpu;
>
> - node->next = NULL;
> + /*
> + * node->next should be NULL on entry.
> + * Check just in case there is a race somewhere.
> + * Note that this is probably an unnecessary cache miss in the fast path.
> + */
> + if (WARN_ON_ONCE(raw_cpu_read(osq_node.next) != NULL))
> + raw_cpu_write(osq_node.next, NULL);
The fix-uppery and explanation about something that shouldn't happen is
excessive: please just put a plain WARN_ON_ONCE() here - which we can
remove in a release or so.
Thanks,
Ingo
On 12/31/23 16:54, David Laight wrote:
> When osq_lock() returns false or osq_unlock() returns static
> analysis shows that node->next should always be NULL.
> This means that it isn't necessary to explicitly set it to NULL
> prior to atomic_xchg(&lock->tail, curr) on extry to osq_lock().
>
> Just in case there a non-obvious race condition that can leave it
> non-NULL check with WARN_ON_ONCE() and NULL if set.
> Note that without this check the fast path (adding at the list head)
> doesn't need to to access the per-cpu osq_node at all.
>
> Signed-off-by: David Laight <david.laight@aculab.com>
> ---
> kernel/locking/osq_lock.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
> index 27324b509f68..35bb99e96697 100644
> --- a/kernel/locking/osq_lock.c
> +++ b/kernel/locking/osq_lock.c
> @@ -87,12 +87,17 @@ osq_wait_next(struct optimistic_spin_queue *lock,
>
> bool osq_lock(struct optimistic_spin_queue *lock)
> {
> - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
> - struct optimistic_spin_node *prev, *next;
> + struct optimistic_spin_node *node, *prev, *next;
> int curr = encode_cpu(smp_processor_id());
> int prev_cpu;
>
> - node->next = NULL;
> + /*
> + * node->next should be NULL on entry.
> + * Check just in case there is a race somewhere.
> + * Note that this is probably an unnecessary cache miss in the fast path.
> + */
> + if (WARN_ON_ONCE(raw_cpu_read(osq_node.next) != NULL))
> + raw_cpu_write(osq_node.next, NULL);
>
> /*
> * We need both ACQUIRE (pairs with corresponding RELEASE in
> @@ -104,8 +109,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
> if (prev_cpu == OSQ_UNLOCKED_VAL)
> return true;
>
> - node->prev_cpu = prev_cpu;
> + node = this_cpu_ptr(&osq_node);
> prev = decode_cpu(prev_cpu);
> + node->prev_cpu = prev_cpu;
> node->locked = 0;
>
> /*
Reviewed-by: Waiman Long <longman@redhat.com>
© 2016 - 2025 Red Hat, Inc.