fdmon_ops->wait() is called with notify_me enabled. This makes it an
expensive place to call qemu_bh_schedule() because aio_notify() invokes
write(2) on the EventNotifier.
Moving qemu_bh_schedule() after notify_me is reset improves IOPS from
270k to 300k IOPS with --blockdev file,aio=io_uring.
I considered alternatives:
1. Introducing a variant of qemu_bh_schedule() that skips aio_notify().
This only makes sense within the AioContext and fdmon implementation
itself and is therefore a specialized internal API. I don't like
that.
2. Changing fdmon_ops->wait() so implementors can reset notify_me
themselves. This makes things complex and the other fdmon
implementations don't need it, so it doesn't seem like a good
solution.
So in the end I moved the qemu_bh_schedule() call from fdmon-io_uring.c
to aio-posix.c. It's ugly but straightforward.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
util/aio-posix.c | 11 +++++++++++
util/fdmon-io_uring.c | 11 ++++++++++-
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/util/aio-posix.c b/util/aio-posix.c
index df945312b3..20b11e5650 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -700,6 +700,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
qatomic_read(&ctx->notify_me) - 2);
}
+#ifdef CONFIG_LINUX_IO_URING
+ /*
+ * This is part of fdmon-io_uring.c but it's more efficient to do it here
+ * after notify_me has been reset. That way qemu_bh_schedule() ->
+ * aio_notify() does not write the EventNotifier.
+ */
+ if (!QSIMPLEQ_EMPTY(&ctx->cqe_handler_ready_list)) {
+ qemu_bh_schedule(ctx->cqe_handler_bh);
+ }
+#endif
+
aio_notify_accept(ctx);
/* Calculate blocked time for adaptive polling */
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
index 7f7c50907d..9ed02597d6 100644
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -334,8 +334,12 @@ static bool process_cqe(AioContext *ctx,
}
cqe_handler->cqe = *cqe;
+
+ /*
+ * aio_poll() and fdmon_io_uring_gsource_dispatch() schedule cqe_handler_bh
+ * when the list is non-empty.
+ */
QSIMPLEQ_INSERT_TAIL(&ctx->cqe_handler_ready_list, cqe_handler, next);
- qemu_bh_schedule(ctx->cqe_handler_bh);
return false;
}
@@ -386,6 +390,11 @@ static void fdmon_io_uring_gsource_dispatch(AioContext *ctx,
AioHandlerList *ready_list)
{
process_cq_ring(ctx, ready_list);
+
+ /* Ensure CqeHandlers enqueued by process_cq_ring() will run */
+ if (!QSIMPLEQ_EMPTY(&ctx->cqe_handler_ready_list)) {
+ qemu_bh_schedule(ctx->cqe_handler_bh);
+ }
}
static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
--
2.51.0