With the planned schema of resize of hb, a reference count will be
obtained during futex_hash() and will be dropped after the hb is
unlocked. Once the reference is dropped, the hb must not be used because
it will disappear after a resize.
To prepare the integration, rename
- futex_hb_unlock() to futex_hb_unlock_put()
- futex_queue() to futex_queue_put()
- futex_q_unlock() to futex_q_unlock_put()
- double_unlock_hb() to double_unlock_hb_put()
which is additionally includes futex_hb_unlock_put(), an empty stub.
Introduce futex_hb_unlock_put() which is the unlock plus the reference
drop. Move futex_hb_waiters_dec() before the reference drop, if needed
before the unlock.
Update comments referring to the functions accordingly.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
io_uring/futex.c | 2 +-
kernel/futex/core.c | 12 ++++++++----
kernel/futex/futex.h | 31 ++++++++++++++++++++-----------
kernel/futex/pi.c | 19 ++++++++++---------
kernel/futex/requeue.c | 15 ++++++++-------
kernel/futex/waitwake.c | 23 ++++++++++++-----------
6 files changed, 59 insertions(+), 43 deletions(-)
diff --git a/io_uring/futex.c b/io_uring/futex.c
index e29662f039e1a..67246438da228 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -349,7 +349,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
hlist_add_head(&req->hash_node, &ctx->futex_list);
io_ring_submit_unlock(ctx, issue_flags);
- futex_queue(&ifd->q, hb);
+ futex_queue_put(&ifd->q, hb);
return IOU_ISSUE_SKIP_COMPLETE;
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 907b76590df16..3cfdd4c02f261 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -152,6 +152,9 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
return &futex_queues[hash & (futex_hashsize - 1)];
}
+void futex_hash_put(struct futex_hash_bucket *hb)
+{
+}
/**
* futex_setup_timer - set up the sleeping hrtimer.
@@ -543,8 +546,8 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
* Increment the counter before taking the lock so that
* a potential waker won't miss a to-be-slept task that is
* waiting for the spinlock. This is safe as all futex_q_lock()
- * users end up calling futex_queue(). Similarly, for housekeeping,
- * decrement the counter at futex_q_unlock() when some error has
+ * users end up calling futex_queue_put(). Similarly, for housekeeping,
+ * decrement the counter at futex_q_unlock_put() when some error has
* occurred and we don't end up adding the task to the list.
*/
futex_hb_waiters_inc(hb); /* implies smp_mb(); (A) */
@@ -555,11 +558,12 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
return hb;
}
-void futex_q_unlock(struct futex_hash_bucket *hb)
+void futex_q_unlock_put(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
spin_unlock(&hb->lock);
futex_hb_waiters_dec(hb);
+ futex_hash_put(hb);
}
void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
@@ -586,7 +590,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
* @q: The futex_q to unqueue
*
* The q->lock_ptr must not be held by the caller. A call to futex_unqueue() must
- * be paired with exactly one earlier call to futex_queue().
+ * be paired with exactly one earlier call to futex_queue_put().
*
* Return:
* - 1 - if the futex_q was still queued (and we removed unqueued it);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 618ce1fe870e9..5793546a48ebf 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -202,6 +202,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
+extern void futex_hash_put(struct futex_hash_bucket *hb);
/**
* futex_match - Check whether two futex keys are equal
@@ -288,23 +289,29 @@ extern void __futex_unqueue(struct futex_q *q);
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
extern int futex_unqueue(struct futex_q *q);
+static inline void futex_hb_unlock_put(struct futex_hash_bucket *hb)
+{
+ spin_unlock(&hb->lock);
+ futex_hash_put(hb);
+}
+
/**
- * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
+ * futex_queue_put() - Enqueue the futex_q on the futex_hash_bucket
* @q: The futex_q to enqueue
* @hb: The destination hash bucket
*
- * The hb->lock must be held by the caller, and is released here. A call to
- * futex_queue() is typically paired with exactly one call to futex_unqueue(). The
- * exceptions involve the PI related operations, which may use futex_unqueue_pi()
- * or nothing if the unqueue is done as part of the wake process and the unqueue
- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
- * an example).
+ * The hb->lock must be held by the caller, and is released here and the reference
+ * on the hb is droppedV. A call to futex_queue_put() is typically paired with
+ * exactly one call to futex_unqueue(). The exceptions involve the PI related
+ * operations, which may use futex_unqueue_pi() or nothing if the unqueue is
+ * done as part of the wake process and the unqueue state is implicit in the
+ * state of woken task (see futex_wait_requeue_pi() for an example).
*/
-static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+static inline void futex_queue_put(struct futex_q *q, struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
__futex_queue(q, hb);
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
}
extern void futex_unqueue_pi(struct futex_q *q);
@@ -350,7 +357,7 @@ static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
}
extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
-extern void futex_q_unlock(struct futex_hash_bucket *hb);
+extern void futex_q_unlock_put(struct futex_hash_bucket *hb);
extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
@@ -380,11 +387,13 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
}
static inline void
-double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+double_unlock_hb_put(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
+ futex_hash_put(hb1);
+ futex_hash_put(hb2);
}
/* syscalls */
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index d62cca5ed8f4c..8561f94f21ed9 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -217,9 +217,9 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
/*
* We get here with hb->lock held, and having found a
* futex_top_waiter(). This means that futex_lock_pi() of said futex_q
- * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
- * which in turn means that futex_lock_pi() still has a reference on
- * our pi_state.
+ * has dropped the hb->lock in between futex_queue_put() and
+ * futex_unqueue_pi(), which in turn means that futex_lock_pi() still
+ * has a reference on our pi_state.
*
* The waiter holding a reference on @pi_state also protects against
* the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
@@ -963,7 +963,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
* exit to complete.
* - EAGAIN: The user space value changed.
*/
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -1086,7 +1086,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
goto out;
out_unlock_put_key:
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
out:
if (to) {
@@ -1096,7 +1096,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
ret = fault_in_user_writeable(uaddr);
if (ret)
@@ -1196,7 +1196,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
}
get_pi_state(pi_state);
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
/* drops pi_state->pi_mutex.wait_lock */
ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
@@ -1235,7 +1235,8 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* owner.
*/
if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
+
switch (ret) {
case -EFAULT:
goto pi_faulted;
@@ -1255,7 +1256,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
ret = (curval == uval) ? 0 : -EAGAIN;
out_unlock:
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
return ret;
pi_retry:
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index b47bb764b3520..80e99a498de28 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -58,7 +58,7 @@ enum {
};
const struct futex_q futex_q_init = {
- /* list gets initialized in futex_queue()*/
+ /* list gets initialized in futex_queue_put()*/
.wake = futex_wake_mark,
.key = FUTEX_KEY_INIT,
.bitset = FUTEX_BITSET_MATCH_ANY,
@@ -456,8 +456,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
ret = futex_get_value_locked(&curval, uaddr1);
if (unlikely(ret)) {
- double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
+ double_unlock_hb_put(hb1, hb2);
ret = get_user(curval, uaddr1);
if (ret)
@@ -542,8 +542,9 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
* waiter::requeue_state is correct.
*/
case -EFAULT:
- double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
+ double_unlock_hb_put(hb1, hb2);
+
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
@@ -556,8 +557,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
* exit to complete.
* - EAGAIN: The user space value changed.
*/
- double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
+ double_unlock_hb_put(hb1, hb2);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -674,9 +675,9 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
put_pi_state(pi_state);
out_unlock:
- double_unlock_hb(hb1, hb2);
- wake_up_q(&wake_q);
futex_hb_waiters_dec(hb2);
+ double_unlock_hb_put(hb1, hb2);
+ wake_up_q(&wake_q);
return ret ? ret : task_count;
}
@@ -814,7 +815,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* shared futexes. We need to compare the keys:
*/
if (futex_match(&q.key, &key2)) {
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
ret = -EINVAL;
goto out;
}
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 3a10375d95218..fdb9fcaaf9fba 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -195,7 +195,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
}
}
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
wake_up_q(&wake_q);
return ret;
}
@@ -274,7 +274,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
if (!IS_ENABLED(CONFIG_MMU) ||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
@@ -327,7 +327,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
}
out_unlock:
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
wake_up_q(&wake_q);
return ret;
}
@@ -335,7 +335,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
static long futex_wait_restart(struct restart_block *restart);
/**
- * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
+ * futex_wait_queue() - futex_queue_put() and wait for wakeup, timeout, or signal
* @hb: the futex hash bucket, must be locked by the caller
* @q: the futex_q to queue up on
* @timeout: the prepared hrtimer_sleeper, or null for no timeout
@@ -346,11 +346,11 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
- * futex_queue() calls spin_unlock() upon completion, both serializing
+ * futex_queue_put() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb);
+ futex_queue_put(q, hb);
/* Arm the timer */
if (timeout)
@@ -461,11 +461,12 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
- futex_queue(q, hb);
+ futex_queue_put(q, hb);
continue;
}
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
+
__set_current_state(TASK_RUNNING);
/*
@@ -624,7 +625,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
ret = futex_get_value_locked(&uval, uaddr);
if (ret) {
- futex_q_unlock(*hb);
+ futex_q_unlock_put(*hb);
ret = get_user(uval, uaddr);
if (ret)
@@ -637,7 +638,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
}
if (uval != val) {
- futex_q_unlock(*hb);
+ futex_q_unlock_put(*hb);
ret = -EWOULDBLOCK;
}
@@ -665,7 +666,7 @@ int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
if (ret)
return ret;
- /* futex_queue and wait for wakeup, timeout, or a signal. */
+ /* futex_queue_put and wait for wakeup, timeout, or a signal. */
futex_wait_queue(hb, &q, to);
/* If we were woken (and unqueued), we succeeded, whatever. */
--
2.45.2
On Mon, Dec 16 2024 at 00:00, Sebastian Andrzej Siewior wrote:
> With the planned schema of resize of hb, a reference count will be
> obtained during futex_hash() and will be dropped after the hb is
> unlocked. Once the reference is dropped, the hb must not be used because
> it will disappear after a resize.
> To prepare the integration, rename
> - futex_hb_unlock() to futex_hb_unlock_put()
> - futex_queue() to futex_queue_put()
> - futex_q_unlock() to futex_q_unlock_put()
> - double_unlock_hb() to double_unlock_hb_put()
>
> which is additionally includes futex_hb_unlock_put(), an empty stub.
> Introduce futex_hb_unlock_put() which is the unlock plus the reference
> drop. Move futex_hb_waiters_dec() before the reference drop, if needed
> before the unlock.
> Update comments referring to the functions accordingly.
If I didn't know what you are talking about, it would have taken me a
while to decode the word salad above. It starts with the usage of 'hb',
an acronym which might be understandable for people familiar with the
futex code, but otherwise it's an arbitrary reference to nothing.
Assuming that 'hb' stands for hashbucket, the usage here is wrong:
With the planned schema of resize of hb...
This is about resizing the hash not the hashbucket, right?
So something like this might be more to the point:
futex: Prepare for reference counting of the process private hash
To support runtime resizing of the process private hash, it's
required to add a reference count to the hash structure. The
reference count ensures that the hash cannot be resized or freed
while a task is operating on it.
The reference count will be obtained within futex_hash() and dropped
once the hash bucket is unlocked and not longer required for the
particular operation (queue, unqueue, wakeup etc.).
This is achieved by:
- appending _put() to existing functions so it's clear that they
also put the hash reference and fixing up the usage sites
- providing new helpers, which combine common operations (unlock,
put), and using them at the appropriate places
- providing new helper for standalone reference counting
functionality and using them at places, where the unlock operation
needs to be separate.
Hmm?
> -void futex_q_unlock(struct futex_hash_bucket *hb)
> +void futex_q_unlock_put(struct futex_hash_bucket *hb)
> __releases(&hb->lock)
> {
> spin_unlock(&hb->lock);
> futex_hb_waiters_dec(hb);
> + futex_hash_put(hb);
You missed a place to move the waiter_dec() before the unlock. Actually
this move should be in a separate preparatory patch, which does only
that. It also needs a proper change log which explains why this done,
why it is equivalent and not introducing a change in terms of ordering
rules. This:
> Move futex_hb_waiters_dec() before the reference drop, if needed
> before the unlock.
does not really give any clue at all.
> if (unlikely(ret)) {
> - double_unlock_hb(hb1, hb2);
> futex_hb_waiters_dec(hb2);
> + double_unlock_hb_put(hb1, hb2);
And having this move before the _put() change makes the latter a purely
mechanical change which let's the reader/reviewer focus on the reference
count rules and not be distracted by the waiter count changes.
> - /* futex_queue and wait for wakeup, timeout, or a signal. */
> + /* futex_queue_put and wait for wakeup, timeout, or a signal. */
When you fix up these comments, can you please use the fn() notation?
> futex_wait_queue(hb, &q, to);
>
> /* If we were woken (and unqueued), we succeeded, whatever. */
Thanks,
tglx
On 2024-12-16 19:48:07 [+0100], Thomas Gleixner wrote:
> So something like this might be more to the point:
>
> futex: Prepare for reference counting of the process private hash
>
> To support runtime resizing of the process private hash, it's
> required to add a reference count to the hash structure. The
> reference count ensures that the hash cannot be resized or freed
> while a task is operating on it.
>
> The reference count will be obtained within futex_hash() and dropped
> once the hash bucket is unlocked and not longer required for the
> particular operation (queue, unqueue, wakeup etc.).
>
> This is achieved by:
>
> - appending _put() to existing functions so it's clear that they
> also put the hash reference and fixing up the usage sites
>
> - providing new helpers, which combine common operations (unlock,
> put), and using them at the appropriate places
>
> - providing new helper for standalone reference counting
> functionality and using them at places, where the unlock operation
> needs to be separate.
>
> Hmm?
much better.
> > -void futex_q_unlock(struct futex_hash_bucket *hb)
> > +void futex_q_unlock_put(struct futex_hash_bucket *hb)
> > __releases(&hb->lock)
> > {
> > spin_unlock(&hb->lock);
> > futex_hb_waiters_dec(hb);
> > + futex_hash_put(hb);
>
> You missed a place to move the waiter_dec() before the unlock. Actually
This is fine because futex_hb_waiters_dec() happens before the reference
drop. However it is better to keep it symmetrical so I going to move it.
> this move should be in a separate preparatory patch, which does only
> that. It also needs a proper change log which explains why this done,
> why it is equivalent and not introducing a change in terms of ordering
> rules. This:
>
> > Move futex_hb_waiters_dec() before the reference drop, if needed
> > before the unlock.
>
> does not really give any clue at all.
>
> > if (unlikely(ret)) {
> > - double_unlock_hb(hb1, hb2);
> > futex_hb_waiters_dec(hb2);
> > + double_unlock_hb_put(hb1, hb2);
>
> And having this move before the _put() change makes the latter a purely
> mechanical change which let's the reader/reviewer focus on the reference
> count rules and not be distracted by the waiter count changes.
Okay, moving this into its own patch.
> > - /* futex_queue and wait for wakeup, timeout, or a signal. */
> > + /* futex_queue_put and wait for wakeup, timeout, or a signal. */
>
> When you fix up these comments, can you please use the fn() notation?
sure
> Thanks,
>
> tglx
Sebastian
© 2016 - 2025 Red Hat, Inc.