Polling only monitors the ctx->notified field and does not need the
ctx->notifier EventNotifier to be signalled. Keep ctx->aio_notify_me
disabled while polling to avoid unnecessary EventNotifier syscalls.
This optimization improves virtio-blk 4KB random read performance by
18%. The following results are with an IOThread and the null-co block
driver:
Test IOPS Error
Before 244518.62 ± 1.20%
After 290706.11 ± 0.44%
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/aio-posix.c | 56 ++++++++++++++++++++++++------------------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 1b2a3af65b..078ec15890 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -464,9 +464,6 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
*
* Polls for a given time.
*
- * Note that ctx->notify_me must be non-zero so this function can detect
- * aio_notify().
- *
* Note that the caller must have incremented ctx->list_lock.
*
* Returns: true if progress was made, false otherwise
@@ -476,7 +473,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
bool progress;
int64_t start_time, elapsed_time;
- assert(ctx->notify_me);
assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
@@ -520,8 +516,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
* @timeout: timeout for blocking wait, computed by the caller and updated if
* polling succeeds.
*
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
- *
* Note that the caller must have incremented ctx->list_lock.
*
* Returns: true if progress was made, false otherwise
@@ -566,23 +560,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
*/
assert(in_aio_context_home_thread(ctx));
- /* aio_notify can avoid the expensive event_notifier_set if
- * everything (file descriptors, bottom halves, timers) will
- * be re-evaluated before the next blocking poll(). This is
- * already true when aio_poll is called with blocking == false;
- * if blocking == true, it is only true after poll() returns,
- * so disable the optimization now.
- */
- if (blocking) {
- atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
- /*
- * Write ctx->notify_me before computing the timeout
- * (reading bottom half flags, etc.). Pairs with
- * smp_mb in aio_notify().
- */
- smp_mb();
- }
-
qemu_lockcnt_inc(&ctx->list_lock);
if (ctx->poll_max_ns) {
@@ -597,15 +574,38 @@ bool aio_poll(AioContext *ctx, bool blocking)
* system call---a single round of run_poll_handlers_once suffices.
*/
if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
+ /*
+ * aio_notify can avoid the expensive event_notifier_set if
+ * everything (file descriptors, bottom halves, timers) will
+ * be re-evaluated before the next blocking poll(). This is
+ * already true when aio_poll is called with blocking == false;
+ * if blocking == true, it is only true after poll() returns,
+ * so disable the optimization now.
+ */
+ if (timeout) {
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+ /*
+ * Write ctx->notify_me before computing the timeout
+ * (reading bottom half flags, etc.). Pairs with
+ * smp_mb in aio_notify().
+ */
+ smp_mb();
+
+ /* Check again in case a shorter timer was added */
+ timeout = qemu_soonest_timeout(timeout, aio_compute_timeout(ctx));
+ }
+
ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
- }
- if (blocking) {
- /* Finish the poll before clearing the flag. */
- atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
- aio_notify_accept(ctx);
+ if (timeout) {
+ /* Finish the poll before clearing the flag. */
+ atomic_store_release(&ctx->notify_me,
+ atomic_read(&ctx->notify_me) - 2);
+ }
}
+ aio_notify_accept(ctx);
+
/* Adjust polling time */
if (ctx->poll_max_ns) {
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
--
2.26.2
On Tue, Aug 04, 2020 at 06:28:04AM +0100, Stefan Hajnoczi wrote:
> @@ -597,15 +574,38 @@ bool aio_poll(AioContext *ctx, bool blocking)
> * system call---a single round of run_poll_handlers_once suffices.
> */
> if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
> + /*
> + * aio_notify can avoid the expensive event_notifier_set if
> + * everything (file descriptors, bottom halves, timers) will
> + * be re-evaluated before the next blocking poll(). This is
> + * already true when aio_poll is called with blocking == false;
> + * if blocking == true, it is only true after poll() returns,
> + * so disable the optimization now.
> + */
> + if (timeout) {
> + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
> + /*
> + * Write ctx->notify_me before computing the timeout
> + * (reading bottom half flags, etc.). Pairs with
> + * smp_mb in aio_notify().
> + */
> + smp_mb();
> +
> + /* Check again in case a shorter timer was added */
> + timeout = qemu_soonest_timeout(timeout, aio_compute_timeout(ctx));
> + }
> +
> ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
> - }
>
> - if (blocking) {
> - /* Finish the poll before clearing the flag. */
> - atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
> - aio_notify_accept(ctx);
> + if (timeout) {
> + /* Finish the poll before clearing the flag. */
> + atomic_store_release(&ctx->notify_me,
> + atomic_read(&ctx->notify_me) - 2);
> + }
> }
Hi Paolo,
We can avoid calling aio_compute_timeout() like this, what do you think?
bool use_notify_me = timeout != 0;
if (use_notify_me) {
atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
/*
* Write ctx->notify_me before computing the timeout
* (reading bottom half flags, etc.). Pairs with
* smp_mb in aio_notify().
*/
smp_mb();
/* Don't block if aio_notify() was called */
if (atomic_read(ctx->notified)) {
timeout = 0;
}
}
ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
if (use_notify_me) {
/* Finish the poll before clearing the flag. */
atomic_store_release(&ctx->notify_me,
atomic_read(&ctx->notify_me) - 2);
}
On 04/08/20 12:29, Stefan Hajnoczi wrote:
> On Tue, Aug 04, 2020 at 06:28:04AM +0100, Stefan Hajnoczi wrote:
>> @@ -597,15 +574,38 @@ bool aio_poll(AioContext *ctx, bool blocking)
>> * system call---a single round of run_poll_handlers_once suffices.
>> */
>> if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
>> + /*
>> + * aio_notify can avoid the expensive event_notifier_set if
>> + * everything (file descriptors, bottom halves, timers) will
>> + * be re-evaluated before the next blocking poll(). This is
>> + * already true when aio_poll is called with blocking == false;
>> + * if blocking == true, it is only true after poll() returns,
>> + * so disable the optimization now.
>> + */
>> + if (timeout) {
>> + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
>> + /*
>> + * Write ctx->notify_me before computing the timeout
>> + * (reading bottom half flags, etc.). Pairs with
>> + * smp_mb in aio_notify().
>> + */
>> + smp_mb();
>> +
>> + /* Check again in case a shorter timer was added */
>> + timeout = qemu_soonest_timeout(timeout, aio_compute_timeout(ctx));
>> + }
>> +
>> ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
>> - }
>>
>> - if (blocking) {
>> - /* Finish the poll before clearing the flag. */
>> - atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
>> - aio_notify_accept(ctx);
>> + if (timeout) {
>> + /* Finish the poll before clearing the flag. */
>> + atomic_store_release(&ctx->notify_me,
>> + atomic_read(&ctx->notify_me) - 2);
>> + }
>> }
>
> Hi Paolo,
> We can avoid calling aio_compute_timeout() like this, what do you think?
I don't understand :) except I guess you mean we can avoid the second
call. Can you post either a complete patch with this squashed, or a 4th
patch (whatever you think is best)?
Paolo
> bool use_notify_me = timeout != 0;
>
> if (use_notify_me) {
> atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
> /*
> * Write ctx->notify_me before computing the timeout
> * (reading bottom half flags, etc.). Pairs with
> * smp_mb in aio_notify().
> */
> smp_mb();
>
> /* Don't block if aio_notify() was called */
> if (atomic_read(ctx->notified)) {
> timeout = 0;
> }
> }
>
> ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
>
> if (use_notify_me) {
> /* Finish the poll before clearing the flag. */
> atomic_store_release(&ctx->notify_me,
> atomic_read(&ctx->notify_me) - 2);
> }
>
On Tue, Aug 04, 2020 at 06:53:09PM +0200, Paolo Bonzini wrote:
> On 04/08/20 12:29, Stefan Hajnoczi wrote:
> > On Tue, Aug 04, 2020 at 06:28:04AM +0100, Stefan Hajnoczi wrote:
> >> @@ -597,15 +574,38 @@ bool aio_poll(AioContext *ctx, bool blocking)
> >> * system call---a single round of run_poll_handlers_once suffices.
> >> */
> >> if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
> >> + /*
> >> + * aio_notify can avoid the expensive event_notifier_set if
> >> + * everything (file descriptors, bottom halves, timers) will
> >> + * be re-evaluated before the next blocking poll(). This is
> >> + * already true when aio_poll is called with blocking == false;
> >> + * if blocking == true, it is only true after poll() returns,
> >> + * so disable the optimization now.
> >> + */
> >> + if (timeout) {
> >> + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
> >> + /*
> >> + * Write ctx->notify_me before computing the timeout
> >> + * (reading bottom half flags, etc.). Pairs with
> >> + * smp_mb in aio_notify().
> >> + */
> >> + smp_mb();
> >> +
> >> + /* Check again in case a shorter timer was added */
> >> + timeout = qemu_soonest_timeout(timeout, aio_compute_timeout(ctx));
> >> + }
> >> +
> >> ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
> >> - }
> >>
> >> - if (blocking) {
> >> - /* Finish the poll before clearing the flag. */
> >> - atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
> >> - aio_notify_accept(ctx);
> >> + if (timeout) {
> >> + /* Finish the poll before clearing the flag. */
> >> + atomic_store_release(&ctx->notify_me,
> >> + atomic_read(&ctx->notify_me) - 2);
> >> + }
> >> }
> >
> > Hi Paolo,
> > We can avoid calling aio_compute_timeout() like this, what do you think?
>
> I don't understand :) except I guess you mean we can avoid the second
> call. Can you post either a complete patch with this squashed, or a 4th
> patch (whatever you think is best)?
Sure, I'll post a new revision of this series.
Stefan
© 2016 - 2026 Red Hat, Inc.