[PATCH 2/4] thread-pool: Fix thread race

Marc Morcos posted 4 patches 1 month, 3 weeks ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Markus Armbruster <armbru@redhat.com>, "Dr. David Alan Gilbert" <dave@treblig.org>
[PATCH 2/4] thread-pool: Fix thread race
Posted by Marc Morcos 1 month, 3 weeks ago
Fix a data race occurred between `worker_thread()` writing and
`thread_pool_completion_bh()` reading shared data in `util/thread-pool.c`.

Signed-off-by: Marc Morcos <marcmorcos@google.com>
---
 util/thread-pool.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/util/thread-pool.c b/util/thread-pool.c
index d2ead6b728..1ced3bd371 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -18,6 +18,7 @@
 #include "qemu/defer-call.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
+#include "qemu/atomic.h"
 #include "qemu/coroutine.h"
 #include "trace.h"
 #include "block/thread-pool.h"
@@ -39,9 +40,13 @@ struct ThreadPoolElementAio {
     ThreadPoolFunc *func;
     void *arg;
 
-    /* Moving state out of THREAD_QUEUED is protected by lock.  After
-     * that, only the worker thread can write to it.  Reads and writes
-     * of state and ret are ordered with memory barriers.
+    /*
+     * Accessed with atomics.  Moving state out of THREAD_QUEUED is
+     * protected by pool->lock and only the worker thread can move
+     * the state from THREAD_ACTIVE to THREAD_DONE.
+     *
+     * When state is THREAD_DONE, ret must have been written already.
+     * Use acquire/release ordering when reading/writing ret as well.
      */
     enum ThreadState state;
     int ret;
@@ -105,15 +110,14 @@ static void *worker_thread(void *opaque)
 
         req = QTAILQ_FIRST(&pool->request_list);
         QTAILQ_REMOVE(&pool->request_list, req, reqs);
-        req->state = THREAD_ACTIVE;
+        qatomic_set(&req->state, THREAD_ACTIVE);
         qemu_mutex_unlock(&pool->lock);
 
         ret = req->func(req->arg);
 
         req->ret = ret;
-        /* Write ret before state.  */
-        smp_wmb();
-        req->state = THREAD_DONE;
+        /* _release to write ret before state.  */
+        qatomic_store_release(&req->state, THREAD_DONE);
 
         qemu_bh_schedule(pool->completion_bh);
         qemu_mutex_lock(&pool->lock);
@@ -180,7 +184,8 @@ static void thread_pool_completion_bh(void *opaque)
 
 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
-        if (elem->state != THREAD_DONE) {
+        /* _acquire to read state before ret.  */
+        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
             continue;
         }
 
@@ -189,9 +194,6 @@ restart:
         QLIST_REMOVE(elem, all);
 
         if (elem->common.cb) {
-            /* Read state before ret.  */
-            smp_rmb();
-
             /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
              * wait for another request that completed at the same time.
              */
@@ -223,12 +225,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
     trace_thread_pool_cancel_aio(elem, elem->common.opaque);
 
     QEMU_LOCK_GUARD(&pool->lock);
-    if (elem->state == THREAD_QUEUED) {
+    if (qatomic_read(&elem->state) == THREAD_QUEUED) {
         QTAILQ_REMOVE(&pool->request_list, elem, reqs);
         qemu_bh_schedule(pool->completion_bh);
 
-        elem->state = THREAD_DONE;
-        elem->ret = -ECANCELED;
+        qatomic_set(&elem->ret, -ECANCELED);
+        qatomic_store_release(&elem->state, THREAD_DONE);
     }
 
 }
-- 
2.52.0.239.gd5f0c6e74e-goog
Re: [PATCH 2/4] thread-pool: Fix thread race
Posted by Paolo Bonzini 1 month, 3 weeks ago
On 12/13/25 01:14, Marc Morcos wrote:
>   
>           req->ret = ret;

Better use qatomic_set here---will fix it myself, thanks!

Paolo

> -        /* Write ret before state.  */
> -        smp_wmb();
> -        req->state = THREAD_DONE;
> +        /* _release to write ret before state.  */
> +        qatomic_store_release(&req->state, THREAD_DONE);
>   
>           qemu_bh_schedule(pool->completion_bh);
>           qemu_mutex_lock(&pool->lock);
> @@ -180,7 +184,8 @@ static void thread_pool_completion_bh(void *opaque)
>   
>   restart:
>       QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> -        if (elem->state != THREAD_DONE) {
> +        /* _acquire to read state before ret.  */
> +        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
>               continue;
>           }
>   
> @@ -189,9 +194,6 @@ restart:
>           QLIST_REMOVE(elem, all);
>   
>           if (elem->common.cb) {
> -            /* Read state before ret.  */
> -            smp_rmb();
> -
>               /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
>                * wait for another request that completed at the same time.
>                */
> @@ -223,12 +225,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
>       trace_thread_pool_cancel_aio(elem, elem->common.opaque);
>   
>       QEMU_LOCK_GUARD(&pool->lock);
> -    if (elem->state == THREAD_QUEUED) {
> +    if (qatomic_read(&elem->state) == THREAD_QUEUED) {
>           QTAILQ_REMOVE(&pool->request_list, elem, reqs);
>           qemu_bh_schedule(pool->completion_bh);
>   
> -        elem->state = THREAD_DONE;
> -        elem->ret = -ECANCELED;
> +        qatomic_set(&elem->ret, -ECANCELED);
> +        qatomic_store_release(&elem->state, THREAD_DONE);
>       }
>   
>   }
Re: [PATCH 2/4] thread-pool: Fix thread race
Posted by Marc Morcos 1 month, 3 weeks ago
Sounds good, thanks!

On Mon, Dec 15, 2025, 6:43 AM Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 12/13/25 01:14, Marc Morcos wrote:
> >
> >           req->ret = ret;
>
> Better use qatomic_set here---will fix it myself, thanks!
>
> Paolo
>
> > -        /* Write ret before state.  */
> > -        smp_wmb();
> > -        req->state = THREAD_DONE;
> > +        /* _release to write ret before state.  */
> > +        qatomic_store_release(&req->state, THREAD_DONE);
> >
> >           qemu_bh_schedule(pool->completion_bh);
> >           qemu_mutex_lock(&pool->lock);
> > @@ -180,7 +184,8 @@ static void thread_pool_completion_bh(void *opaque)
> >
> >   restart:
> >       QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> > -        if (elem->state != THREAD_DONE) {
> > +        /* _acquire to read state before ret.  */
> > +        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
> >               continue;
> >           }
> >
> > @@ -189,9 +194,6 @@ restart:
> >           QLIST_REMOVE(elem, all);
> >
> >           if (elem->common.cb) {
> > -            /* Read state before ret.  */
> > -            smp_rmb();
> > -
> >               /* Schedule ourselves in case elem->common.cb() calls
> aio_poll() to
> >                * wait for another request that completed at the same
> time.
> >                */
> > @@ -223,12 +225,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
> >       trace_thread_pool_cancel_aio(elem, elem->common.opaque);
> >
> >       QEMU_LOCK_GUARD(&pool->lock);
> > -    if (elem->state == THREAD_QUEUED) {
> > +    if (qatomic_read(&elem->state) == THREAD_QUEUED) {
> >           QTAILQ_REMOVE(&pool->request_list, elem, reqs);
> >           qemu_bh_schedule(pool->completion_bh);
> >
> > -        elem->state = THREAD_DONE;
> > -        elem->ret = -ECANCELED;
> > +        qatomic_set(&elem->ret, -ECANCELED);
> > +        qatomic_store_release(&elem->state, THREAD_DONE);
> >       }
> >
> >   }
>
>