[Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.

remy.noel@blade-group.com posted 1 patch 5 years, 4 months ago
Test checkpatch passed
Test asan passed
Test docker-mingw@fedora passed
Test docker-quick@centos7 passed
Test docker-clang@ubuntu passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20181217164847.24411-1-remy.noel@blade-group.com
util/aio-posix.c | 86 ++++++++++++++++++++++++++++--------------------
util/aio-win32.c | 67 ++++++++++++++++---------------------
2 files changed, 79 insertions(+), 74 deletions(-)
[Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by remy.noel@blade-group.com 5 years, 4 months ago
From: Remy Noel <remy.noel@blade-group.com>

It is possible for an io_poll callback to be concurrently executed along
with an aio_set_fd_handlers. This can cause all sorts of problems, like
a NULL callback or a bad opaque pointer.

This changes set_fd_handlers so that it no longer modify existing handlers
entries and instead, always insert those after having proper initialisation.

Also, we do not call aio_epoll_update for deleted handlers as this has
no impact whatsoever.

Signed-off-by: Remy Noel <remy.noel@blade-group.com>
---
 util/aio-posix.c | 86 ++++++++++++++++++++++++++++--------------------
 util/aio-win32.c | 67 ++++++++++++++++---------------------
 2 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/util/aio-posix.c b/util/aio-posix.c
index 51c41ed3c9..d658cf3007 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -200,6 +200,31 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
     return NULL;
 }
 
+static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
+{
+    /* If the GSource is in the process of being destroyed then
+     * g_source_remove_poll() causes an assertion failure.  Skip
+     * removal in that case, because glib cleans up its state during
+     * destruction anyway.
+     */
+    if (!g_source_is_destroyed(&ctx->source)) {
+        g_source_remove_poll(&ctx->source, &node->pfd);
+    }
+
+    /* If a read is in progress, just mark the node as deleted */
+    if (qemu_lockcnt_count(&ctx->list_lock)) {
+        node->deleted = 1;
+        node->pfd.revents = 0;
+        return false;
+    }
+    /* Otherwise, delete it for real.  We can't just mark it as
+     * deleted because deleted nodes are only cleaned up while
+     * no one is walking the handlers list.
+     */
+    QLIST_REMOVE(node, node);
+    return true;
+}
+
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
                         bool is_external,
@@ -209,6 +234,7 @@ void aio_set_fd_handler(AioContext *ctx,
                         void *opaque)
 {
     AioHandler *node;
+    AioHandler *new_node = NULL;
     bool is_new = false;
     bool deleted = false;
     int poll_disable_change;
@@ -223,50 +249,36 @@ void aio_set_fd_handler(AioContext *ctx,
             qemu_lockcnt_unlock(&ctx->list_lock);
             return;
         }
-
-        /* If the GSource is in the process of being destroyed then
-         * g_source_remove_poll() causes an assertion failure.  Skip
-         * removal in that case, because glib cleans up its state during
-         * destruction anyway.
-         */
-        if (!g_source_is_destroyed(&ctx->source)) {
-            g_source_remove_poll(&ctx->source, &node->pfd);
-        }
-
-        /* If a read is in progress, just mark the node as deleted */
-        if (qemu_lockcnt_count(&ctx->list_lock)) {
-            node->deleted = 1;
-            node->pfd.revents = 0;
-        } else {
-            /* Otherwise, delete it for real.  We can't just mark it as
-             * deleted because deleted nodes are only cleaned up while
-             * no one is walking the handlers list.
-             */
-            QLIST_REMOVE(node, node);
-            deleted = true;
-        }
         poll_disable_change = -!node->io_poll;
     } else {
         poll_disable_change = !io_poll - (node && !node->io_poll);
         if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
-            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
-
-            g_source_add_poll(&ctx->source, &node->pfd);
             is_new = true;
         }
+        /* Alloc and insert if it's not already there */
+        new_node = g_new0(AioHandler, 1);
 
         /* Update handler with latest information */
-        node->io_read = io_read;
-        node->io_write = io_write;
-        node->io_poll = io_poll;
-        node->opaque = opaque;
-        node->is_external = is_external;
+        new_node->io_read = io_read;
+        new_node->io_write = io_write;
+        new_node->io_poll = io_poll;
+        new_node->opaque = opaque;
+        new_node->is_external = is_external;
+
+        if (is_new) {
+            new_node->pfd.fd = fd;
+        } else {
+            new_node->pfd = node->pfd;
+        }
+        g_source_add_poll(&ctx->source, &new_node->pfd);
+
+        new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
+        new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
 
-        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
-        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
+    }
+    if (node) {
+        deleted = aio_remove_fd_handler(ctx, node);
     }
 
     /* No need to order poll_disable_cnt writes against other updates;
@@ -278,7 +290,9 @@ void aio_set_fd_handler(AioContext *ctx,
     atomic_set(&ctx->poll_disable_cnt,
                atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
 
-    aio_epoll_update(ctx, node, is_new);
+    if (new_node) {
+        aio_epoll_update(ctx, new_node, is_new);
+    }
     qemu_lockcnt_unlock(&ctx->list_lock);
     aio_notify(ctx);
 
diff --git a/util/aio-win32.c b/util/aio-win32.c
index c58957cc4b..a23b9c364d 100644
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -35,6 +35,22 @@ struct AioHandler {
     QLIST_ENTRY(AioHandler) node;
 };
 
+static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
+{
+    /* If aio_poll is in progress, just mark the node as deleted */
+    if (qemu_lockcnt_count(&ctx->list_lock)) {
+        node->deleted = 1;
+        node->pfd.revents = 0;
+    } else {
+        /* Otherwise, delete it for real.  We can't just mark it as
+         * deleted because deleted nodes are only cleaned up after
+         * releasing the list_lock.
+         */
+        QLIST_REMOVE(node, node);
+        g_free(node);
+    }
+}
+
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
                         bool is_external,
@@ -44,41 +60,23 @@ void aio_set_fd_handler(AioContext *ctx,
                         void *opaque)
 {
     /* fd is a SOCKET in our case */
-    AioHandler *node;
+    AioHandler *old_node;
+    AioHandler *node = NULL;
 
     qemu_lockcnt_lock(&ctx->list_lock);
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->pfd.fd == fd && !node->deleted) {
+    QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
+        if (old_node->pfd.fd == fd && !old_node->deleted) {
             break;
         }
     }
 
-    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            /* If aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
-        }
-    } else {
+    if (io_read || io_write) {
         HANDLE event;
         long bitmask = 0;
 
-        if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
-            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
-        }
+        /* Alloc and insert if it's not already there */
+        node = g_new0(AioHandler, 1);
+        node->pfd.fd = fd;
 
         node->pfd.events = 0;
         if (node->io_read) {
@@ -104,9 +102,13 @@ void aio_set_fd_handler(AioContext *ctx,
             bitmask |= FD_WRITE | FD_CONNECT;
         }
 
+        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
         event = event_notifier_get_handle(&ctx->notifier);
         WSAEventSelect(node->pfd.fd, event, bitmask);
     }
+    if (old_node) {
+        aio_remove_fd_handler(ctx, old_node);
+    }
 
     qemu_lockcnt_unlock(&ctx->list_lock);
     aio_notify(ctx);
@@ -139,18 +141,7 @@ void aio_set_event_notifier(AioContext *ctx,
         if (node) {
             g_source_remove_poll(&ctx->source, &node->pfd);
 
-            /* aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
+            aio_remove_fd_handler(ctx, node);
         }
     } else {
         if (node == NULL) {
-- 
2.19.2


Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Stefan Hajnoczi 5 years, 4 months ago
On Mon, Dec 17, 2018 at 05:48:47PM +0100, remy.noel@blade-group.com wrote:
> From: Remy Noel <remy.noel@blade-group.com>
> 
> It is possible for an io_poll callback to be concurrently executed along
> with an aio_set_fd_handlers. This can cause all sorts of problems, like
> a NULL callback or a bad opaque pointer.
> 
> This changes set_fd_handlers so that it no longer modify existing handlers
> entries and instead, always insert those after having proper initialisation.
> 
> Also, we do not call aio_epoll_update for deleted handlers as this has
> no impact whatsoever.
> 
> Signed-off-by: Remy Noel <remy.noel@blade-group.com>
> ---

Please include a changelog in future patches.  For example:

v3:
 * Don't drop revents when a handler is modified [Stefan]

That way reviewers know what to look for and which issues you have
addressed.

>  util/aio-posix.c | 86 ++++++++++++++++++++++++++++--------------------
>  util/aio-win32.c | 67 ++++++++++++++++---------------------
>  2 files changed, 79 insertions(+), 74 deletions(-)
> 
> diff --git a/util/aio-posix.c b/util/aio-posix.c
> index 51c41ed3c9..d658cf3007 100644
> --- a/util/aio-posix.c
> +++ b/util/aio-posix.c

Thanks!  The worst case I can now imagine is if an fd is handled twice
due to a concurrent aio_set_fd_handler() call, but spurious
->io_read()/->io_write() should not cause problems.

I will wait for Paolo to review this because he is most familiar with
the lockcnt abstraction.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Remy NOEL 5 years, 4 months ago
On 12/18/18 3:14 PM, Stefan Hajnoczi wrote:

> Please include a changelog in future patches.  For example:
>
> v3:
>   * Don't drop revents when a handler is modified [Stefan]
>
> That way reviewers know what to look for and which issues you have
> addressed.
Sorry, wasn't sure i had to do this in single commit patches without 
cover letter.
> Thanks!  The worst case I can now imagine is if an fd is handled twice
> due to a concurrent aio_set_fd_handler() call, but spurious
> ->io_read()/->io_write() should not cause problems.

Concurrent aio_set_fd_handlers should not be possible (not on the same 
AioContext at least) since we lock ctx->list_lock.

Remy


Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Stefan Hajnoczi 5 years, 4 months ago
On Mon, Dec 17, 2018 at 05:48:47PM +0100, remy.noel@blade-group.com wrote:
> From: Remy Noel <remy.noel@blade-group.com>
> 
> It is possible for an io_poll callback to be concurrently executed along
> with an aio_set_fd_handlers. This can cause all sorts of problems, like
> a NULL callback or a bad opaque pointer.
> 
> This changes set_fd_handlers so that it no longer modify existing handlers
> entries and instead, always insert those after having proper initialisation.
> 
> Also, we do not call aio_epoll_update for deleted handlers as this has
> no impact whatsoever.
> 
> Signed-off-by: Remy Noel <remy.noel@blade-group.com>

Tested-by: Stefan Hajnoczi <stefanha@redhat.com>
Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Paolo Bonzini 5 years, 4 months ago
On 17/12/18 17:48, remy.noel@blade-group.com wrote:
> Also, we do not call aio_epoll_update for deleted handlers as this has
> no impact whatsoever.

Why?  epoll is used in level-triggered mode, so you do have to remove
the file descriptor...

>                 atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
>  
> -    aio_epoll_update(ctx, node, is_new);
> +    if (new_node) {
> +        aio_epoll_update(ctx, new_node, is_new);
> +    }
>      qemu_lockcnt_unlock(&ctx->list_lock);
>      aio_notify(ctx);

... so I think this should be "if (node || new_node)"?

Paolo

Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Remy NOEL 5 years, 4 months ago
On 12/18/18 6:39 PM, Paolo Bonzini wrote:

> On 17/12/18 17:48, remy.noel@blade-group.com wrote:
>> Also, we do not call aio_epoll_update for deleted handlers as this has
>> no impact whatsoever.
> Why?  epoll is used in level-triggered mode, so you do have to remove
> the file descriptor...
>
>>                  atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
>>   
>> -    aio_epoll_update(ctx, node, is_new);
>> +    if (new_node) {
>> +        aio_epoll_update(ctx, new_node, is_new);
>> +    }
>>       qemu_lockcnt_unlock(&ctx->list_lock);
>>       aio_notify(ctx);
> ... so I think this should be "if (node || new_node)"?

Well, currently, when an AioHandler is removed, we do not change 
node->pdf.events (only revents).

Therefore a call to aio_epoll_update on node will only result in a call 
to epoll_ctl with EPOLL_CTL_MOD and the same event, which seems kinda 
pointless.

we may set node->pfd.events to 0 to unregister the file descriptor, but 
this would change the behavior compared to current handling of node 
deletion if i'm not mistaken.

Remy


Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Paolo Bonzini 5 years, 4 months ago
On 19/12/18 17:29, Remy NOEL wrote:
>>
>>
>>>                  atomic_read(&ctx->poll_disable_cnt) +
>>> poll_disable_change);
>>>   -    aio_epoll_update(ctx, node, is_new);
>>> +    if (new_node) {
>>> +        aio_epoll_update(ctx, new_node, is_new);
>>> +    }
>>>       qemu_lockcnt_unlock(&ctx->list_lock);
>>>       aio_notify(ctx);
>> ... so I think this should be "if (node || new_node)"?
> 
> Well, currently, when an AioHandler is removed, we do not change
> node->pdf.events (only revents).
> 
> Therefore a call to aio_epoll_update on node will only result in a call
> to epoll_ctl with EPOLL_CTL_MOD and the same event, which seems kinda
> pointless.
> 
> we may set node->pfd.events to 0 to unregister the file descriptor, but
> this would change the behavior compared to current handling of node
> deletion if i'm not mistaken.

You found another bug then. :)

Paolo

Re: [Qemu-devel] [QEMU-devel][PATCH v3] aio-posix: Fix concurrent aio_poll/set_fd_handler.
Posted by Remy NOEL 5 years, 4 months ago
On 12/19/18 8:32 PM, Paolo Bonzini wrote:

> You found another bug then. :)
K. Will fix.