1
The following changes since commit d0ed6a69d399ae193959225cdeaa9382746c91cc:
1
The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935:
2
2
3
Update version for v5.1.0 release (2020-08-11 17:07:03 +0100)
3
Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 44277bf914471962c9e88e09c859aae65ae109c4:
9
for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a:
10
10
11
aio-posix: keep aio_notify_me disabled during polling (2020-08-13 13:34:14 =
11
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100)
12
+0100)
13
12
14
----------------------------------------------------------------
13
----------------------------------------------------------------
15
Pull request
14
Pull request
16
15
16
The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the
17
request needs to be resubmitted.
18
19
The MAINTAINERS changes carry no risk and we might as well include them in QEMU
20
6.1.
21
17
----------------------------------------------------------------
22
----------------------------------------------------------------
18
23
19
Stefan Hajnoczi (3):
24
Fabian Ebner (1):
20
async: rename event_notifier_dummy_cb/poll()
25
block/io_uring: resubmit when result is -EAGAIN
21
async: always set ctx->notified in aio_notify()
22
aio-posix: keep aio_notify_me disabled during polling
23
26
24
util/aio-posix.c | 47 +++++++++++++++++++++++++----------------------
27
Philippe Mathieu-Daudé (1):
25
util/async.c | 36 +++++++++++++++++++++++-------------
28
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver
26
2 files changed, 48 insertions(+), 35 deletions(-)
27
29
28
--=20
30
Stefano Garzarella (1):
29
2.26.2
31
MAINTAINERS: add Stefano Garzarella as io_uring reviewer
30
32
33
MAINTAINERS | 2 ++
34
block/io_uring.c | 16 +++++++++++++++-
35
2 files changed, 17 insertions(+), 1 deletion(-)
36
37
--
38
2.31.1
39
diff view generated by jsdifflib
1
Polling only monitors the ctx->notified field and does not need the
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
ctx->notifier EventNotifier to be signalled. Keep ctx->aio_notify_me
3
disabled while polling to avoid unnecessary EventNotifier syscalls.
4
2
5
This optimization improves virtio-blk 4KB random read performance by
3
I've been working with io_uring for a while so I'd like to help
6
18%. The following results are with an IOThread and the null-co block
4
with reviews.
7
driver:
8
5
9
Test IOPS Error
6
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
10
Before 244518.62 ± 1.20%
7
Message-Id: <20210728131515.131045-1-sgarzare@redhat.com>
11
After 290706.11 ± 0.44%
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
16
Message-id: 20200806131802.569478-4-stefanha@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
9
---
19
util/aio-posix.c | 47 +++++++++++++++++++++++++----------------------
10
MAINTAINERS | 1 +
20
1 file changed, 25 insertions(+), 22 deletions(-)
11
1 file changed, 1 insertion(+)
21
12
22
diff --git a/util/aio-posix.c b/util/aio-posix.c
13
diff --git a/MAINTAINERS b/MAINTAINERS
23
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
24
--- a/util/aio-posix.c
15
--- a/MAINTAINERS
25
+++ b/util/aio-posix.c
16
+++ b/MAINTAINERS
26
@@ -XXX,XX +XXX,XX @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
17
@@ -XXX,XX +XXX,XX @@ Linux io_uring
27
*
18
M: Aarushi Mehta <mehta.aaru20@gmail.com>
28
* Polls for a given time.
19
M: Julia Suvorova <jusual@redhat.com>
29
*
20
M: Stefan Hajnoczi <stefanha@redhat.com>
30
- * Note that ctx->notify_me must be non-zero so this function can detect
21
+R: Stefano Garzarella <sgarzare@redhat.com>
31
- * aio_notify().
22
L: qemu-block@nongnu.org
32
- *
23
S: Maintained
33
* Note that the caller must have incremented ctx->list_lock.
24
F: block/io_uring.c
34
*
35
* Returns: true if progress was made, false otherwise
36
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
37
bool progress;
38
int64_t start_time, elapsed_time;
39
40
- assert(ctx->notify_me);
41
assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
42
43
trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
44
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
45
* @timeout: timeout for blocking wait, computed by the caller and updated if
46
* polling succeeds.
47
*
48
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
49
- *
50
* Note that the caller must have incremented ctx->list_lock.
51
*
52
* Returns: true if progress was made, false otherwise
53
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
54
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
55
int ret = 0;
56
bool progress;
57
+ bool use_notify_me;
58
int64_t timeout;
59
int64_t start = 0;
60
61
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
62
*/
63
assert(in_aio_context_home_thread(ctx));
64
65
- /* aio_notify can avoid the expensive event_notifier_set if
66
+ qemu_lockcnt_inc(&ctx->list_lock);
67
+
68
+ if (ctx->poll_max_ns) {
69
+ start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
70
+ }
71
+
72
+ timeout = blocking ? aio_compute_timeout(ctx) : 0;
73
+ progress = try_poll_mode(ctx, &timeout);
74
+ assert(!(timeout && progress));
75
+
76
+ /*
77
+ * aio_notify can avoid the expensive event_notifier_set if
78
* everything (file descriptors, bottom halves, timers) will
79
* be re-evaluated before the next blocking poll(). This is
80
* already true when aio_poll is called with blocking == false;
81
* if blocking == true, it is only true after poll() returns,
82
* so disable the optimization now.
83
*/
84
- if (blocking) {
85
+ use_notify_me = timeout != 0;
86
+ if (use_notify_me) {
87
atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
88
/*
89
- * Write ctx->notify_me before computing the timeout
90
- * (reading bottom half flags, etc.). Pairs with
91
+ * Write ctx->notify_me before reading ctx->notified. Pairs with
92
* smp_mb in aio_notify().
93
*/
94
smp_mb();
95
- }
96
-
97
- qemu_lockcnt_inc(&ctx->list_lock);
98
99
- if (ctx->poll_max_ns) {
100
- start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
101
+ /* Don't block if aio_notify() was called */
102
+ if (atomic_read(&ctx->notified)) {
103
+ timeout = 0;
104
+ }
105
}
106
107
- timeout = blocking ? aio_compute_timeout(ctx) : 0;
108
- progress = try_poll_mode(ctx, &timeout);
109
- assert(!(timeout && progress));
110
-
111
/* If polling is allowed, non-blocking aio_poll does not need the
112
* system call---a single round of run_poll_handlers_once suffices.
113
*/
114
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
115
ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
116
}
117
118
- if (blocking) {
119
+ if (use_notify_me) {
120
/* Finish the poll before clearing the flag. */
121
- atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
122
- aio_notify_accept(ctx);
123
+ atomic_store_release(&ctx->notify_me,
124
+ atomic_read(&ctx->notify_me) - 2);
125
}
126
127
+ aio_notify_accept(ctx);
128
+
129
/* Adjust polling time */
130
if (ctx->poll_max_ns) {
131
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
132
--
25
--
133
2.26.2
26
2.31.1
134
27
diff view generated by jsdifflib
1
aio_notify() does not set ctx->notified when called with
1
From: Fabian Ebner <f.ebner@proxmox.com>
2
ctx->aio_notify_me disabled. Therefore aio_notify_me needs to be enabled
3
during polling.
4
2
5
This is suboptimal since expensive event_notifier_set(&ctx->notifier)
3
Linux SCSI can throw spurious -EAGAIN in some corner cases in its
6
and event_notifier_test_and_clear(&ctx->notifier) calls are required
4
completion path, which will end up being the result in the completed
7
when ctx->aio_notify_me is enabled.
5
io_uring request.
8
6
9
Change aio_notify() so that aio->notified is always set, regardless of
7
Resubmitting such requests should allow block jobs to complete, even
10
ctx->aio_notify_me. This will make polling cheaper since
8
if such spurious errors are encountered.
11
ctx->aio_notify_me can remain disabled. Move the
12
event_notifier_test_and_clear() to the fd handler function (which is now
13
no longer an empty function so "dummy" has been dropped from its name).
14
9
15
The next patch takes advantage of this by optimizing polling in
10
Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com>
16
util/aio-posix.c.
11
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
17
12
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Message-id: 20210729091029.65369-1-f.ebner@proxmox.com
19
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
20
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
21
Message-id: 20200806131802.569478-3-stefanha@redhat.com
22
23
[Paolo Bonzini pointed out that the smp_wmb() in aio_notify_accept()
24
should be smp_wb() but the comment should be smp_wmb() instead of
25
smp_wb(). Fixed.
26
--Stefan]
27
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
---
15
---
30
util/async.c | 32 +++++++++++++++++++++-----------
16
block/io_uring.c | 16 +++++++++++++++-
31
1 file changed, 21 insertions(+), 11 deletions(-)
17
1 file changed, 15 insertions(+), 1 deletion(-)
32
18
33
diff --git a/util/async.c b/util/async.c
19
diff --git a/block/io_uring.c b/block/io_uring.c
34
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
35
--- a/util/async.c
21
--- a/block/io_uring.c
36
+++ b/util/async.c
22
+++ b/block/io_uring.c
37
@@ -XXX,XX +XXX,XX @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
23
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s)
38
24
total_bytes = ret + luringcb->total_read;
39
void aio_notify(AioContext *ctx)
25
40
{
26
if (ret < 0) {
41
- /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
27
- if (ret == -EINTR) {
42
+ /*
28
+ /*
43
+ * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
29
+ * Only writev/readv/fsync requests on regular files or host block
44
+ * aio_notify_accept.
30
+ * devices are submitted. Therefore -EAGAIN is not expected but it's
45
+ */
31
+ * known to happen sometimes with Linux SCSI. Submit again and hope
46
+ smp_wmb();
32
+ * the request completes successfully.
47
+ atomic_set(&ctx->notified, true);
33
+ *
48
+
34
+ * For more information, see:
49
+ /*
35
+ * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
50
+ * Write ctx->notified before reading ctx->notify_me. Pairs
36
+ *
51
* with smp_mb in aio_ctx_prepare or aio_poll.
37
+ * If the code is changed to submit other types of requests in the
52
*/
38
+ * future, then this workaround may need to be extended to deal with
53
smp_mb();
39
+ * genuine -EAGAIN results that should not be resubmitted
54
if (atomic_read(&ctx->notify_me)) {
40
+ * immediately.
55
event_notifier_set(&ctx->notifier);
41
+ */
56
- atomic_mb_set(&ctx->notified, true);
42
+ if (ret == -EINTR || ret == -EAGAIN) {
57
}
43
luring_resubmit(s, luringcb);
58
}
44
continue;
59
45
}
60
void aio_notify_accept(AioContext *ctx)
61
{
62
- if (atomic_xchg(&ctx->notified, false)
63
-#ifdef WIN32
64
- || true
65
-#endif
66
- ) {
67
- event_notifier_test_and_clear(&ctx->notifier);
68
- }
69
+ atomic_set(&ctx->notified, false);
70
+
71
+ /*
72
+ * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
73
+ * in aio_notify.
74
+ */
75
+ smp_mb();
76
}
77
78
static void aio_timerlist_notify(void *opaque, QEMUClockType type)
79
@@ -XXX,XX +XXX,XX @@ static void aio_timerlist_notify(void *opaque, QEMUClockType type)
80
aio_notify(opaque);
81
}
82
83
-static void aio_context_notifier_dummy_cb(EventNotifier *e)
84
+static void aio_context_notifier_cb(EventNotifier *e)
85
{
86
+ AioContext *ctx = container_of(e, AioContext, notifier);
87
+
88
+ event_notifier_test_and_clear(&ctx->notifier);
89
}
90
91
/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
92
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
93
94
aio_set_event_notifier(ctx, &ctx->notifier,
95
false,
96
- aio_context_notifier_dummy_cb,
97
+ aio_context_notifier_cb,
98
aio_context_notifier_poll);
99
#ifdef CONFIG_LINUX_AIO
100
ctx->linux_aio = NULL;
101
--
46
--
102
2.26.2
47
2.31.1
103
48
diff view generated by jsdifflib
1
The event_notifier_*() prefix can be confused with the EventNotifier
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
APIs that are also called event_notifier_*().
3
2
4
Rename the functions to aio_context_notifier_*() to make it clear that
3
I'm interested in following the activity around the NVMe bdrv.
5
they relate to the AioContext::notifier field.
6
4
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Message-id: 20210728183340.2018313-1-philmd@redhat.com
9
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
10
Message-id: 20200806131802.569478-2-stefanha@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
8
---
13
util/async.c | 8 ++++----
9
MAINTAINERS | 1 +
14
1 file changed, 4 insertions(+), 4 deletions(-)
10
1 file changed, 1 insertion(+)
15
11
16
diff --git a/util/async.c b/util/async.c
12
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/util/async.c
14
--- a/MAINTAINERS
19
+++ b/util/async.c
15
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ static void aio_timerlist_notify(void *opaque, QEMUClockType type)
16
@@ -XXX,XX +XXX,XX @@ F: block/null.c
21
aio_notify(opaque);
17
NVMe Block Driver
22
}
18
M: Stefan Hajnoczi <stefanha@redhat.com>
23
19
R: Fam Zheng <fam@euphon.net>
24
-static void event_notifier_dummy_cb(EventNotifier *e)
20
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
25
+static void aio_context_notifier_dummy_cb(EventNotifier *e)
21
L: qemu-block@nongnu.org
26
{
22
S: Supported
27
}
23
F: block/nvme*
28
29
/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
30
-static bool event_notifier_poll(void *opaque)
31
+static bool aio_context_notifier_poll(void *opaque)
32
{
33
EventNotifier *e = opaque;
34
AioContext *ctx = container_of(e, AioContext, notifier);
35
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
36
37
aio_set_event_notifier(ctx, &ctx->notifier,
38
false,
39
- event_notifier_dummy_cb,
40
- event_notifier_poll);
41
+ aio_context_notifier_dummy_cb,
42
+ aio_context_notifier_poll);
43
#ifdef CONFIG_LINUX_AIO
44
ctx->linux_aio = NULL;
45
#endif
46
--
24
--
47
2.26.2
25
2.31.1
48
26
diff view generated by jsdifflib