Series comparison

-[PULL for-6.1 0/3] Block patches
+[PULL 0/1] Block patches
-The following changes since commit 801f3db7564dcce8a37a70833c0abe40ec19f8ce:
+The following changes since commit 887cba855bb6ff4775256f7968409281350b568c:
-  Merge remote-tracking branch 'remotes/philmd/tags/kconfig-20210720' into staging (2021-07-20 19:30:28 +0100)
+  configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100)
 are available in the Git repository at:
   https://gitlab.com/stefanha/qemu.git tags/block-pull-request
-for you to fetch changes up to d7ddd0a1618a75b31dc308bb37365ce1da972154:
+for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6:
-  linux-aio: limit the batch size using `aio-max-batch` parameter (2021-07-21 13:47:50 +0100)
+  virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400)
 ----------------------------------------------------------------
 Pull request
-Stefano's performance regression fix for commit 2558cb8dd4 ("linux-aio:
-increasing MAX_EVENTS to a larger hardcoded value").
 ----------------------------------------------------------------
-Stefano Garzarella (3):
+Stefan Hajnoczi (1):
-  iothread: generalize iothread_set_param/iothread_get_param
+  virtio-blk: fix host notifier issues during dataplane start/stop
   iothread: add aio-max-batch parameter
   linux-aio: limit the batch size using `aio-max-batch` parameter
- qapi/misc.json            |  6 ++-
+ hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
- qapi/qom.json             |  7 +++-
+file changed, 38 insertions(+), 29 deletions(-)
  include/block/aio.h       | 12 ++++++
  include/sysemu/iothread.h |  3 ++
  block/linux-aio.c         |  9 ++++-
  iothread.c                | 82 ++++++++++++++++++++++++++++++++++-----
  monitor/hmp-cmds.c        |  2 +
  util/aio-posix.c          | 12 ++++++
  util/aio-win32.c          |  5 +++
  util/async.c              |  2 +
  qemu-options.hx           |  8 +++-
 files changed, 134 insertions(+), 14 deletions(-)
 --
-.31.1
+.40.1

-[PULL for-6.1 1/3] iothread: generalize iothread_set_param/iothread_get_param
+[PULL 1/1] virtio-blk: fix host notifier issues during dataplane start/stop
-From: Stefano Garzarella <sgarzare@redhat.com>
+The main loop thread can consume 100% CPU when using --device
 virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but
 reading virtqueue host notifiers fails with EAGAIN. The file descriptors
 are stale and remain registered with the AioContext because of bugs in
 the virtio-blk dataplane start/stop code.
-Changes in preparation for next patches where we add a new
+The problem is that the dataplane start/stop code involves drain
-parameter not related to the poll mechanism.
+operations, which call virtio_blk_drained_begin() and
 virtio_blk_drained_end() at points where the host notifier is not
 operational:
 - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
   vblk->dataplane_started has been set to true but the host notifier has
   not been attached yet.
 - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
   drain after the host notifier has already been detached but with
   vblk->dataplane_started still set to true.
-Let's add two new generic functions (iothread_set_param and
+I would like to simplify ->ioeventfd_start/stop() to avoid interactions
-iothread_get_param) that we use to set and get IOThread
+with drain entirely, but couldn't find a way to do that. Instead, this
-parameters.
+patch accepts the fragile nature of the code and reorders it so that
 vblk->dataplane_started is false during drain operations. This way the
 virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
 touch the host notifier. The result is that
 virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
 complete control over the host notifier and stale file descriptors are
 no longer left in the AioContext.
-Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+This patch fixes the 100% CPU consumption in the main loop thread and
-Message-id: 20210721094211.69853-2-sgarzare@redhat.com
+correctly moves host notifier processing to the IOThread.
 Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
 Reported-by: Lukáš Doktor <ldoktor@redhat.com>
 Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
 Tested-by: Lukas Doktor <ldoktor@redhat.com>
 Message-id: 20230704151527.193586-1-stefanha@redhat.com
 Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
 ---
- iothread.c | 27 +++++++++++++++++++++++----
+ hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
-file changed, 23 insertions(+), 4 deletions(-)
+file changed, 38 insertions(+), 29 deletions(-)
-diff --git a/iothread.c b/iothread.c
+diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
 index XXXXXXX..XXXXXXX 100644
---- a/iothread.c
+--- a/hw/block/dataplane/virtio-blk.c
-+++ b/iothread.c
++++ b/hw/block/dataplane/virtio-blk.c
-@@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_shrink_info = {
+@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
-     "poll-shrink", offsetof(IOThread, poll_shrink),
- };
+     memory_region_transaction_commit();
--static void iothread_get_poll_param(Object *obj, Visitor *v,
+-    /*
-+static void iothread_get_param(Object *obj, Visitor *v,
+-     * These fields are visible to the IOThread so we rely on implicit barriers
-         const char *name, void *opaque, Error **errp)
+-     * in aio_context_acquire() on the write side and aio_notify_accept() on
- {
+-     * the read side.
-     IOThread *iothread = IOTHREAD(obj);
+-     */
-@@ -XXX,XX +XXX,XX @@ static void iothread_get_poll_param(Object *obj, Visitor *v,
+-    s->starting = false;
-     visit_type_int64(v, name, field, errp);
+-    vblk->dataplane_started = true;
      trace_virtio_blk_data_plane_start(s);
      old_context = blk_get_aio_context(s->conf->conf.blk);
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
          event_notifier_set(virtio_queue_get_host_notifier(vq));
      }
 +    /*
 +     * These fields must be visible to the IOThread when it processes the
 +     * virtqueue, otherwise it will think dataplane has not started yet.
 +     *
 +     * Make sure ->dataplane_started is false when blk_set_aio_context() is
 +     * called above so that draining does not cause the host notifier to be
 +     * detached/attached prematurely.
 +     */
 +    s->starting = false;
 +    vblk->dataplane_started = true;
 +    smp_wmb(); /* paired with aio_notify_accept() on the read side */
 +
      /* Get this show started by hooking up our callbacks */
      if (!blk_in_drain(s->conf->conf.blk)) {
          aio_context_acquire(s->ctx);
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
    fail_guest_notifiers:
      vblk->dataplane_disabled = true;
      s->starting = false;
 -    vblk->dataplane_started = true;
      return -ENOSYS;
  }
--static void iothread_set_poll_param(Object *obj, Visitor *v,
+@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
-+static bool iothread_set_param(Object *obj, Visitor *v,
+         aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
          const char *name, void *opaque, Error **errp)
  {
      IOThread *iothread = IOTHREAD(obj);
@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
      int64_t value;
      if (!visit_type_int64(v, name, &value, errp)) {
 -        return;
 +        return false;
      }
-     if (value < 0) {
++    /*
-         error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
++     * Batch all the host notifiers in a single transaction to avoid
-                    info->name, INT64_MAX);
++     * quadratic time complexity in address_space_update_ioeventfds().
--        return;
++     */
-+        return false;
++    memory_region_transaction_begin();
      }
      *field = value;
 +    return true;
 +}
 +
-+static void iothread_get_poll_param(Object *obj, Visitor *v,
++    for (i = 0; i < nvqs; i++) {
-+        const char *name, void *opaque, Error **errp)
++        virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
 +{
 +
 +    iothread_get_param(obj, v, name, opaque, errp);
 +}
 +
 +static void iothread_set_poll_param(Object *obj, Visitor *v,
 +        const char *name, void *opaque, Error **errp)
 +{
 +    IOThread *iothread = IOTHREAD(obj);
 +
 +    if (!iothread_set_param(obj, v, name, opaque, errp)) {
 +        return;
 +    }
 +
-     if (iothread->ctx) {
++    /*
-         aio_context_set_poll_params(iothread->ctx,
++     * The transaction expects the ioeventfds to be open when it
-                                     iothread->poll_max_ns,
++     * commits. Do it now, before the cleanup loop.
 +     */
 +    memory_region_transaction_commit();
 +
 +    for (i = 0; i < nvqs; i++) {
 +        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
 +    }
 +
 +    /*
 +     * Set ->dataplane_started to false before draining so that host notifiers
 +     * are not detached/attached anymore.
 +     */
 +    vblk->dataplane_started = false;
 +
      aio_context_acquire(s->ctx);
      /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
      aio_context_release(s->ctx);
 -    /*
 -     * Batch all the host notifiers in a single transaction to avoid
 -     * quadratic time complexity in address_space_update_ioeventfds().
 -     */
 -    memory_region_transaction_begin();
 -
 -    for (i = 0; i < nvqs; i++) {
 -        virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
 -    }
 -
 -    /*
 -     * The transaction expects the ioeventfds to be open when it
 -     * commits. Do it now, before the cleanup loop.
 -     */
 -    memory_region_transaction_commit();
 -
 -    for (i = 0; i < nvqs; i++) {
 -        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
 -    }
 -
      qemu_bh_cancel(s->bh);
      notify_guest_bh(s); /* final chance to notify guest */
      /* Clean up guest notifier (irq) */
      k->set_guest_notifiers(qbus->parent, nvqs, false);
 -    vblk->dataplane_started = false;
      s->stopping = false;
  }
 --
-.31.1
+.40.1

-[PULL for-6.1 2/3] iothread: add aio-max-batch parameter
+Deleted patch
-From: Stefano Garzarella <sgarzare@redhat.com>
-The `aio-max-batch` parameter will be propagated to AIO engines
-and it will be used to control the maximum number of queued requests.
-When there are in queue a number of requests equal to `aio-max-batch`,
-the engine invokes the system call to forward the requests to the kernel.
-This parameter allows us to control the maximum batch size to reduce
-the latency that requests might accumulate while queued in the AIO
-engine queue.
-If `aio-max-batch` is equal to 0 (default value), the AIO engine will
-use its default maximum batch size value.
-Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
-Message-id: 20210721094211.69853-3-sgarzare@redhat.com
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
----
- qapi/misc.json            |  6 ++++-
- qapi/qom.json             |  7 ++++-
- include/block/aio.h       | 12 +++++++++
- include/sysemu/iothread.h |  3 +++
- iothread.c                | 55 +++++++++++++++++++++++++++++++++++----
- monitor/hmp-cmds.c        |  2 ++
- util/aio-posix.c          | 12 +++++++++
- util/aio-win32.c          |  5 ++++
- util/async.c              |  2 ++
- qemu-options.hx           |  8 ++++--
-files changed, 103 insertions(+), 9 deletions(-)
-diff --git a/qapi/misc.json b/qapi/misc.json
-index XXXXXXX..XXXXXXX 100644
---- a/qapi/misc.json
-+++ b/qapi/misc.json
-@@ -XXX,XX +XXX,XX @@
- # @poll-shrink: how many ns will be removed from polling time, 0 means that
- #               it's not configured (since 2.9)
- #
-+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
-+#                 0 means that the engine will use its default (since 6.1)
-+#
- # Since: 2.0
- ##
- { 'struct': 'IOThreadInfo',
-@@ -XXX,XX +XXX,XX @@
-            'thread-id': 'int',
-            'poll-max-ns': 'int',
-            'poll-grow': 'int',
--           'poll-shrink': 'int' } }
-+           'poll-shrink': 'int',
-+           'aio-max-batch': 'int' } }
- ##
- # @query-iothreads:
-diff --git a/qapi/qom.json b/qapi/qom.json
-index XXXXXXX..XXXXXXX 100644
---- a/qapi/qom.json
-+++ b/qapi/qom.json
-@@ -XXX,XX +XXX,XX @@
- #               algorithm detects it is spending too long polling without
- #               encountering events. 0 selects a default behaviour (default: 0)
- #
-+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
-+#                 0 means that the engine will use its default
-+#                 (default:0, since 6.1)
-+#
- # Since: 2.0
- ##
- { 'struct': 'IothreadProperties',
-   'data': { '*poll-max-ns': 'int',
-             '*poll-grow': 'int',
--            '*poll-shrink': 'int' } }
-+            '*poll-shrink': 'int',
-+            '*aio-max-batch': 'int' } }
- ##
- # @MemoryBackendProperties:
-diff --git a/include/block/aio.h b/include/block/aio.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/block/aio.h
-+++ b/include/block/aio.h
-@@ -XXX,XX +XXX,XX @@ struct AioContext {
-     int64_t poll_grow;      /* polling time growth factor */
-     int64_t poll_shrink;    /* polling time shrink factor */
-+    /* AIO engine parameters */
-+    int64_t aio_max_batch;  /* maximum number of requests in a batch */
-+
-     /*
-      * List of handlers participating in userspace polling.  Protected by
-      * ctx->list_lock.  Iterated and modified mostly by the event loop thread
-@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-                                  int64_t grow, int64_t shrink,
-                                  Error **errp);
-+/**
-+ * aio_context_set_aio_params:
-+ * @ctx: the aio context
-+ * @max_batch: maximum number of requests in a batch, 0 means that the
-+ *             engine will use its default
-+ */
-+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
-+                                Error **errp);
-+
- #endif
-diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/sysemu/iothread.h
-+++ b/include/sysemu/iothread.h
-@@ -XXX,XX +XXX,XX @@ struct IOThread {
-     int64_t poll_max_ns;
-     int64_t poll_grow;
-     int64_t poll_shrink;
-+
-+    /* AioContext AIO engine parameters */
-+    int64_t aio_max_batch;
- };
- typedef struct IOThread IOThread;
-diff --git a/iothread.c b/iothread.c
-index XXXXXXX..XXXXXXX 100644
---- a/iothread.c
-+++ b/iothread.c
-@@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread)
-     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
- }
-+static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
-+{
-+    ERRP_GUARD();
-+
-+    aio_context_set_poll_params(iothread->ctx,
-+                                iothread->poll_max_ns,
-+                                iothread->poll_grow,
-+                                iothread->poll_shrink,
-+                                errp);
-+    if (*errp) {
-+        return;
-+    }
-+
-+    aio_context_set_aio_params(iothread->ctx,
-+                               iothread->aio_max_batch,
-+                               errp);
-+}
-+
- static void iothread_complete(UserCreatable *obj, Error **errp)
- {
-     Error *local_error = NULL;
-@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
-      */
-     iothread_init_gcontext(iothread);
--    aio_context_set_poll_params(iothread->ctx,
--                                iothread->poll_max_ns,
--                                iothread->poll_grow,
--                                iothread->poll_shrink,
--                                &local_error);
-+    iothread_set_aio_context_params(iothread, &local_error);
-     if (local_error) {
-         error_propagate(errp, local_error);
-         aio_context_unref(iothread->ctx);
-@@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_grow_info = {
- static PollParamInfo poll_shrink_info = {
-     "poll-shrink", offsetof(IOThread, poll_shrink),
- };
-+static PollParamInfo aio_max_batch_info = {
-+    "aio-max-batch", offsetof(IOThread, aio_max_batch),
-+};
- static void iothread_get_param(Object *obj, Visitor *v,
-         const char *name, void *opaque, Error **errp)
-@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
-     }
- }
-+static void iothread_get_aio_param(Object *obj, Visitor *v,
-+        const char *name, void *opaque, Error **errp)
-+{
-+
-+    iothread_get_param(obj, v, name, opaque, errp);
-+}
-+
-+static void iothread_set_aio_param(Object *obj, Visitor *v,
-+        const char *name, void *opaque, Error **errp)
-+{
-+    IOThread *iothread = IOTHREAD(obj);
-+
-+    if (!iothread_set_param(obj, v, name, opaque, errp)) {
-+        return;
-+    }
-+
-+    if (iothread->ctx) {
-+        aio_context_set_aio_params(iothread->ctx,
-+                                   iothread->aio_max_batch,
-+                                   errp);
-+    }
-+}
-+
- static void iothread_class_init(ObjectClass *klass, void *class_data)
- {
-     UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
-@@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
-                               iothread_get_poll_param,
-                               iothread_set_poll_param,
-                               NULL, &poll_shrink_info);
-+    object_class_property_add(klass, "aio-max-batch", "int",
-+                              iothread_get_aio_param,
-+                              iothread_set_aio_param,
-+                              NULL, &aio_max_batch_info);
- }
- static const TypeInfo iothread_info = {
-@@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque)
-     info->poll_max_ns = iothread->poll_max_ns;
-     info->poll_grow = iothread->poll_grow;
-     info->poll_shrink = iothread->poll_shrink;
-+    info->aio_max_batch = iothread->aio_max_batch;
-     QAPI_LIST_APPEND(*tail, info);
-     return 0;
-diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
-index XXXXXXX..XXXXXXX 100644
---- a/monitor/hmp-cmds.c
-+++ b/monitor/hmp-cmds.c
-@@ -XXX,XX +XXX,XX @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
-         monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
-         monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
-         monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
-+        monitor_printf(mon, "  aio-max-batch=%" PRId64 "\n",
-+                       value->aio_max_batch);
-     }
-     qapi_free_IOThreadInfoList(info_list);
-diff --git a/util/aio-posix.c b/util/aio-posix.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/aio-posix.c
-+++ b/util/aio-posix.c
-@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-     aio_notify(ctx);
- }
-+
-+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
-+                                Error **errp)
-+{
-+    /*
-+     * No thread synchronization here, it doesn't matter if an incorrect value
-+     * is used once.
-+     */
-+    ctx->aio_max_batch = max_batch;
-+
-+    aio_notify(ctx);
-+}
-diff --git a/util/aio-win32.c b/util/aio-win32.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/aio-win32.c
-+++ b/util/aio-win32.c
-@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-         error_setg(errp, "AioContext polling is not implemented on Windows");
-     }
- }
-+
-+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
-+                                Error **errp)
-+{
-+}
-diff --git a/util/async.c b/util/async.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/async.c
-+++ b/util/async.c
-@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
-     ctx->poll_grow = 0;
-     ctx->poll_shrink = 0;
-+    ctx->aio_max_batch = 0;
-+
-     return ctx;
- fail:
-     g_source_destroy(&ctx->source);
-diff --git a/qemu-options.hx b/qemu-options.hx
-index XXXXXXX..XXXXXXX 100644
---- a/qemu-options.hx
-+++ b/qemu-options.hx
-@@ -XXX,XX +XXX,XX @@ SRST
-             CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB
--    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink``
-+    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch``
-         Creates a dedicated event loop thread that devices can be
-         assigned to. This is known as an IOThread. By default device
-         emulation happens in vCPU threads or the main event loop thread.
-@@ -XXX,XX +XXX,XX @@ SRST
-         the polling time when the algorithm detects it is spending too
-         long polling without encountering events.
--        The polling parameters can be modified at run-time using the
-+        The ``aio-max-batch`` parameter is the maximum number of requests
-+        in a batch for the AIO engine, 0 means that the engine will use
-+        its default.
-+
-+        The IOThread parameters can be modified at run-time using the
-         ``qom-set`` command (where ``iothread1`` is the IOThread's
-         ``id``):
---
-.31.1

-[PULL for-6.1 3/3] linux-aio: limit the batch size using `aio-max-batch` parameter
+Deleted patch
-From: Stefano Garzarella <sgarzare@redhat.com>
-When there are multiple queues attached to the same AIO context,
-some requests may experience high latency, since in the worst case
-the AIO engine queue is only flushed when it is full (MAX_EVENTS) or
-there are no more queues plugged.
-Commit 2558cb8dd4 ("linux-aio: increasing MAX_EVENTS to a larger
-hardcoded value") changed MAX_EVENTS from 128 to 1024, to increase
-the number of in-flight requests. But this change also increased
-the potential maximum batch to 1024 elements.
-When there is a single queue attached to the AIO context, the issue
-is mitigated from laio_io_unplug() that will flush the queue every
-time is invoked since there can't be others queue plugged.
-Let's use the new `aio-max-batch` IOThread parameter to mitigate
-this issue, limiting the number of requests in a batch.
-We also define a default value (32): this value is obtained running
-some benchmarks and it represents a good tradeoff between the latency
-increase while a request is queued and the cost of the io_submit(2)
-system call.
-Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
-Message-id: 20210721094211.69853-4-sgarzare@redhat.com
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
----
- block/linux-aio.c | 9 ++++++++-
-file changed, 8 insertions(+), 1 deletion(-)
-diff --git a/block/linux-aio.c b/block/linux-aio.c
-index XXXXXXX..XXXXXXX 100644
---- a/block/linux-aio.c
-+++ b/block/linux-aio.c
-@@ -XXX,XX +XXX,XX @@
-  */
- #define MAX_EVENTS 1024
-+/* Maximum number of requests in a batch. (default value) */
-+#define DEFAULT_MAX_BATCH 32
-+
- struct qemu_laiocb {
-     Coroutine *co;
-     LinuxAioState *ctx;
-@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-     LinuxAioState *s = laiocb->ctx;
-     struct iocb *iocbs = &laiocb->iocb;
-     QEMUIOVector *qiov = laiocb->qiov;
-+    int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
-+
-+    /* limit the batch with the number of available events */
-+    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
-     switch (type) {
-     case QEMU_AIO_WRITE:
-@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-     s->io_q.in_queue++;
-     if (!s->io_q.blocked &&
-         (!s->io_q.plugged ||
--         s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
-+         s->io_q.in_queue >= max_batch)) {
-         ioq_submit(s);
-     }
---
-.31.1

The following changes since commit 801f3db7564dcce8a37a70833c0abe40ec19f8ce:

Merge remote-tracking branch 'remotes/philmd/tags/kconfig-20210720' into staging (2021-07-20 19:30:28 +0100)

are available in the Git repository at:

https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to d7ddd0a1618a75b31dc308bb37365ce1da972154:

linux-aio: limit the batch size using `aio-max-batch` parameter (2021-07-21 13:47:50 +0100)

----------------------------------------------------------------
Pull request

Stefano's performance regression fix for commit 2558cb8dd4 ("linux-aio:
increasing MAX_EVENTS to a larger hardcoded value").

----------------------------------------------------------------

Stefano Garzarella (3):
  iothread: generalize iothread_set_param/iothread_get_param
  iothread: add aio-max-batch parameter
  linux-aio: limit the batch size using `aio-max-batch` parameter

-- 
2.31.1

From: Stefano Garzarella <sgarzare@redhat.com>

Changes in preparation for next patches where we add a new
parameter not related to the poll mechanism.

Let's add two new generic functions (iothread_set_param and
iothread_get_param) that we use to set and get IOThread
parameters.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20210721094211.69853-2-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 iothread.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/iothread.c b/iothread.c
index XXXXXXX..XXXXXXX 100644
--- a/iothread.c
+++ b/iothread.c
@@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_shrink_info = {
     "poll-shrink", offsetof(IOThread, poll_shrink),
 };
 
-static void iothread_get_poll_param(Object *obj, Visitor *v,
+static void iothread_get_param(Object *obj, Visitor *v,
         const char *name, void *opaque, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
@@ -XXX,XX +XXX,XX @@ static void iothread_get_poll_param(Object *obj, Visitor *v,
     visit_type_int64(v, name, field, errp);
 }
 
-static void iothread_set_poll_param(Object *obj, Visitor *v,
+static bool iothread_set_param(Object *obj, Visitor *v,
         const char *name, void *opaque, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
     int64_t value;
 
     if (!visit_type_int64(v, name, &value, errp)) {
-        return;
+        return false;
     }
 
     if (value < 0) {
         error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
                    info->name, INT64_MAX);
-        return;
+        return false;
     }
 
     *field = value;
 
+    return true;
+}
+
+static void iothread_get_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+
+    iothread_get_param(obj, v, name, opaque, errp);
+}
+
+static void iothread_set_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThread *iothread = IOTHREAD(obj);
+
+    if (!iothread_set_param(obj, v, name, opaque, errp)) {
+        return;
+    }
+
     if (iothread->ctx) {
         aio_context_set_poll_params(iothread->ctx,
                                     iothread->poll_max_ns,
-- 
2.31.1

From: Stefano Garzarella <sgarzare@redhat.com>

The `aio-max-batch` parameter will be propagated to AIO engines
and it will be used to control the maximum number of queued requests.

When there are in queue a number of requests equal to `aio-max-batch`,
the engine invokes the system call to forward the requests to the kernel.

This parameter allows us to control the maximum batch size to reduce
the latency that requests might accumulate while queued in the AIO
engine queue.

If `aio-max-batch` is equal to 0 (default value), the AIO engine will
use its default maximum batch size value.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20210721094211.69853-3-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi/misc.json            |  6 ++++-
 qapi/qom.json             |  7 ++++-
 include/block/aio.h       | 12 +++++++++
 include/sysemu/iothread.h |  3 +++
 iothread.c                | 55 +++++++++++++++++++++++++++++++++++----
 monitor/hmp-cmds.c        |  2 ++
 util/aio-posix.c          | 12 +++++++++
 util/aio-win32.c          |  5 ++++
 util/async.c              |  2 ++
 qemu-options.hx           |  8 ++++--
 10 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/qapi/misc.json b/qapi/misc.json
index XXXXXXX..XXXXXXX 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -XXX,XX +XXX,XX @@
 # @poll-shrink: how many ns will be removed from polling time, 0 means that
 #               it's not configured (since 2.9)
 #
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+#                 0 means that the engine will use its default (since 6.1)
+#
 # Since: 2.0
 ##
 { 'struct': 'IOThreadInfo',
@@ -XXX,XX +XXX,XX @@
            'thread-id': 'int',
            'poll-max-ns': 'int',
            'poll-grow': 'int',
-           'poll-shrink': 'int' } }
+           'poll-shrink': 'int',
+           'aio-max-batch': 'int' } }
 
 ##
 # @query-iothreads:
diff --git a/qapi/qom.json b/qapi/qom.json
index XXXXXXX..XXXXXXX 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -XXX,XX +XXX,XX @@
 #               algorithm detects it is spending too long polling without
 #               encountering events. 0 selects a default behaviour (default: 0)
 #
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+#                 0 means that the engine will use its default
+#                 (default:0, since 6.1)
+#
 # Since: 2.0
 ##
 { 'struct': 'IothreadProperties',
   'data': { '*poll-max-ns': 'int',
             '*poll-grow': 'int',
-            '*poll-shrink': 'int' } }
+            '*poll-shrink': 'int',
+            '*aio-max-batch': 'int' } }
 
 ##
 # @MemoryBackendProperties:
diff --git a/include/block/aio.h b/include/block/aio.h
index XXXXXXX..XXXXXXX 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -XXX,XX +XXX,XX @@ struct AioContext {
     int64_t poll_grow;      /* polling time growth factor */
     int64_t poll_shrink;    /* polling time shrink factor */
 
+    /* AIO engine parameters */
+    int64_t aio_max_batch;  /* maximum number of requests in a batch */
+
     /*
      * List of handlers participating in userspace polling.  Protected by
      * ctx->list_lock.  Iterated and modified mostly by the event loop thread
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                  int64_t grow, int64_t shrink,
                                  Error **errp);
 
+/**
+ * aio_context_set_aio_params:
+ * @ctx: the aio context
+ * @max_batch: maximum number of requests in a batch, 0 means that the
+ *             engine will use its default
+ */
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp);
+
 #endif
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index XXXXXXX..XXXXXXX 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -XXX,XX +XXX,XX @@ struct IOThread {
     int64_t poll_max_ns;
     int64_t poll_grow;
     int64_t poll_shrink;
+
+    /* AioContext AIO engine parameters */
+    int64_t aio_max_batch;
 };
 typedef struct IOThread IOThread;
 
diff --git a/iothread.c b/iothread.c
index XXXXXXX..XXXXXXX 100644
--- a/iothread.c
+++ b/iothread.c
@@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread)
     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
 }
 
+static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
+{
+    ERRP_GUARD();
+
+    aio_context_set_poll_params(iothread->ctx,
+                                iothread->poll_max_ns,
+                                iothread->poll_grow,
+                                iothread->poll_shrink,
+                                errp);
+    if (*errp) {
+        return;
+    }
+
+    aio_context_set_aio_params(iothread->ctx,
+                               iothread->aio_max_batch,
+                               errp);
+}
+
 static void iothread_complete(UserCreatable *obj, Error **errp)
 {
     Error *local_error = NULL;
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
      */
     iothread_init_gcontext(iothread);
 
-    aio_context_set_poll_params(iothread->ctx,
-                                iothread->poll_max_ns,
-                                iothread->poll_grow,
-                                iothread->poll_shrink,
-                                &local_error);
+    iothread_set_aio_context_params(iothread, &local_error);
     if (local_error) {
         error_propagate(errp, local_error);
         aio_context_unref(iothread->ctx);
@@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_grow_info = {
 static PollParamInfo poll_shrink_info = {
     "poll-shrink", offsetof(IOThread, poll_shrink),
 };
+static PollParamInfo aio_max_batch_info = {
+    "aio-max-batch", offsetof(IOThread, aio_max_batch),
+};
 
 static void iothread_get_param(Object *obj, Visitor *v,
         const char *name, void *opaque, Error **errp)
@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
     }
 }
 
+static void iothread_get_aio_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+
+    iothread_get_param(obj, v, name, opaque, errp);
+}
+
+static void iothread_set_aio_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThread *iothread = IOTHREAD(obj);
+
+    if (!iothread_set_param(obj, v, name, opaque, errp)) {
+        return;
+    }
+
+    if (iothread->ctx) {
+        aio_context_set_aio_params(iothread->ctx,
+                                   iothread->aio_max_batch,
+                                   errp);
+    }
+}
+
 static void iothread_class_init(ObjectClass *klass, void *class_data)
 {
     UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
@@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
                               iothread_get_poll_param,
                               iothread_set_poll_param,
                               NULL, &poll_shrink_info);
+    object_class_property_add(klass, "aio-max-batch", "int",
+                              iothread_get_aio_param,
+                              iothread_set_aio_param,
+                              NULL, &aio_max_batch_info);
 }
 
 static const TypeInfo iothread_info = {
@@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque)
     info->poll_max_ns = iothread->poll_max_ns;
     info->poll_grow = iothread->poll_grow;
     info->poll_shrink = iothread->poll_shrink;
+    info->aio_max_batch = iothread->aio_max_batch;
 
     QAPI_LIST_APPEND(*tail, info);
     return 0;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index XXXXXXX..XXXXXXX 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -XXX,XX +XXX,XX @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
         monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
         monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
+        monitor_printf(mon, "  aio-max-batch=%" PRId64 "\n",
+                       value->aio_max_batch);
     }
 
     qapi_free_IOThreadInfoList(info_list);
diff --git a/util/aio-posix.c b/util/aio-posix.c
index XXXXXXX..XXXXXXX 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 
     aio_notify(ctx);
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+    /*
+     * No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
+     */
+    ctx->aio_max_batch = max_batch;
+
+    aio_notify(ctx);
+}
diff --git a/util/aio-win32.c b/util/aio-win32.c
index XXXXXXX..XXXXXXX 100644
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
         error_setg(errp, "AioContext polling is not implemented on Windows");
     }
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+}
diff --git a/util/async.c b/util/async.c
index XXXXXXX..XXXXXXX 100644
--- a/util/async.c
+++ b/util/async.c
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
     ctx->poll_grow = 0;
     ctx->poll_shrink = 0;
 
+    ctx->aio_max_batch = 0;
+
     return ctx;
 fail:
     g_source_destroy(&ctx->source);
diff --git a/qemu-options.hx b/qemu-options.hx
index XXXXXXX..XXXXXXX 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -XXX,XX +XXX,XX @@ SRST
 
             CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB
 
-    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink``
+    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch``
         Creates a dedicated event loop thread that devices can be
         assigned to. This is known as an IOThread. By default device
         emulation happens in vCPU threads or the main event loop thread.
@@ -XXX,XX +XXX,XX @@ SRST
         the polling time when the algorithm detects it is spending too
         long polling without encountering events.
 
-        The polling parameters can be modified at run-time using the
+        The ``aio-max-batch`` parameter is the maximum number of requests
+        in a batch for the AIO engine, 0 means that the engine will use
+        its default.
+
+        The IOThread parameters can be modified at run-time using the
         ``qom-set`` command (where ``iothread1`` is the IOThread's
         ``id``):
 
-- 
2.31.1

From: Stefano Garzarella <sgarzare@redhat.com>

When there are multiple queues attached to the same AIO context,
some requests may experience high latency, since in the worst case
the AIO engine queue is only flushed when it is full (MAX_EVENTS) or
there are no more queues plugged.

Commit 2558cb8dd4 ("linux-aio: increasing MAX_EVENTS to a larger
hardcoded value") changed MAX_EVENTS from 128 to 1024, to increase
the number of in-flight requests. But this change also increased
the potential maximum batch to 1024 elements.

When there is a single queue attached to the AIO context, the issue
is mitigated from laio_io_unplug() that will flush the queue every
time is invoked since there can't be others queue plugged.

Let's use the new `aio-max-batch` IOThread parameter to mitigate
this issue, limiting the number of requests in a batch.

We also define a default value (32): this value is obtained running
some benchmarks and it represents a good tradeoff between the latency
increase while a request is queued and the cost of the io_submit(2)
system call.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20210721094211.69853-4-sgarzare@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/linux-aio.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index XXXXXXX..XXXXXXX 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -XXX,XX +XXX,XX @@
  */
 #define MAX_EVENTS 1024
 
+/* Maximum number of requests in a batch. (default value) */
+#define DEFAULT_MAX_BATCH 32
+
 struct qemu_laiocb {
     Coroutine *co;
     LinuxAioState *ctx;
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     LinuxAioState *s = laiocb->ctx;
     struct iocb *iocbs = &laiocb->iocb;
     QEMUIOVector *qiov = laiocb->qiov;
+    int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
+
+    /* limit the batch with the number of available events */
+    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
 
     switch (type) {
     case QEMU_AIO_WRITE:
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     s->io_q.in_queue++;
     if (!s->io_q.blocked &&
         (!s->io_q.plugged ||
-         s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
+         s->io_q.in_queue >= max_batch)) {
         ioq_submit(s);
     }
 
-- 
2.31.1

The main loop thread can consume 100% CPU when using --device
virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but
reading virtqueue host notifiers fails with EAGAIN. The file descriptors
are stale and remain registered with the AioContext because of bugs in
the virtio-blk dataplane start/stop code.

The problem is that the dataplane start/stop code involves drain
operations, which call virtio_blk_drained_begin() and
virtio_blk_drained_end() at points where the host notifier is not
operational:
- In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
  vblk->dataplane_started has been set to true but the host notifier has
  not been attached yet.
- In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
  drain after the host notifier has already been detached but with
  vblk->dataplane_started still set to true.

I would like to simplify ->ioeventfd_start/stop() to avoid interactions
with drain entirely, but couldn't find a way to do that. Instead, this
patch accepts the fragile nature of the code and reorders it so that
vblk->dataplane_started is false during drain operations. This way the
virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
touch the host notifier. The result is that
virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
complete control over the host notifier and stale file descriptors are
no longer left in the AioContext.

This patch fixes the 100% CPU consumption in the main loop thread and
correctly moves host notifier processing to the IOThread.

Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
Reported-by: Lukáš Doktor <ldoktor@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Lukas Doktor <ldoktor@redhat.com>
Message-id: 20230704151527.193586-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 
     memory_region_transaction_commit();
 
-    /*
-     * These fields are visible to the IOThread so we rely on implicit barriers
-     * in aio_context_acquire() on the write side and aio_notify_accept() on
-     * the read side.
-     */
-    s->starting = false;
-    vblk->dataplane_started = true;
     trace_virtio_blk_data_plane_start(s);
 
     old_context = blk_get_aio_context(s->conf->conf.blk);
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
         event_notifier_set(virtio_queue_get_host_notifier(vq));
     }
 
+    /*
+     * These fields must be visible to the IOThread when it processes the
+     * virtqueue, otherwise it will think dataplane has not started yet.
+     *
+     * Make sure ->dataplane_started is false when blk_set_aio_context() is
+     * called above so that draining does not cause the host notifier to be
+     * detached/attached prematurely.
+     */
+    s->starting = false;
+    vblk->dataplane_started = true;
+    smp_wmb(); /* paired with aio_notify_accept() on the read side */
+
     /* Get this show started by hooking up our callbacks */
     if (!blk_in_drain(s->conf->conf.blk)) {
         aio_context_acquire(s->ctx);
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
   fail_guest_notifiers:
     vblk->dataplane_disabled = true;
     s->starting = false;
-    vblk->dataplane_started = true;
     return -ENOSYS;
 }
 
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
         aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
     }
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+
+    for (i = 0; i < nvqs; i++) {
+        virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+    }
+
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    for (i = 0; i < nvqs; i++) {
+        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
+    }
+
+    /*
+     * Set ->dataplane_started to false before draining so that host notifiers
+     * are not detached/attached anymore.
+     */
+    vblk->dataplane_started = false;
+
     aio_context_acquire(s->ctx);
 
     /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 
     aio_context_release(s->ctx);
 
-    /*
-     * Batch all the host notifiers in a single transaction to avoid
-     * quadratic time complexity in address_space_update_ioeventfds().
-     */
-    memory_region_transaction_begin();
-
-    for (i = 0; i < nvqs; i++) {
-        virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
-    }
-
-    /*
-     * The transaction expects the ioeventfds to be open when it
-     * commits. Do it now, before the cleanup loop.
-     */
-    memory_region_transaction_commit();
-
-    for (i = 0; i < nvqs; i++) {
-        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
-    }
-
     qemu_bh_cancel(s->bh);
     notify_guest_bh(s); /* final chance to notify guest */
 
     /* Clean up guest notifier (irq) */
     k->set_guest_notifiers(qbus->parent, nvqs, false);
 
-    vblk->dataplane_started = false;
     s->stopping = false;
 }
-- 
2.40.1