1
The following changes since commit 8bac3ba57eecc466b7e73dabf7d19328a59f684e:
1
The following changes since commit 887cba855bb6ff4775256f7968409281350b568c:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-rx-20200408' into staging (2020-04-09 13:23:30 +0100)
3
configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c:
9
for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6:
10
10
11
async: use explicit memory barriers (2020-04-09 16:17:14 +0100)
11
virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
Fixes for QEMU on aarch64 ARM hosts and fdmon-io_uring.
17
18
----------------------------------------------------------------
16
----------------------------------------------------------------
19
17
20
Paolo Bonzini (2):
18
Stefan Hajnoczi (1):
21
aio-wait: delegate polling of main AioContext if BQL not held
19
virtio-blk: fix host notifier issues during dataplane start/stop
22
async: use explicit memory barriers
23
20
24
Stefan Hajnoczi (1):
21
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
25
aio-posix: signal-proof fdmon-io_uring
22
1 file changed, 38 insertions(+), 29 deletions(-)
26
27
include/block/aio-wait.h | 22 ++++++++++++++++++++++
28
include/block/aio.h | 29 ++++++++++-------------------
29
util/aio-posix.c | 16 ++++++++++++++--
30
util/aio-win32.c | 17 ++++++++++++++---
31
util/async.c | 16 ++++++++++++----
32
util/fdmon-io_uring.c | 10 ++++++++--
33
6 files changed, 80 insertions(+), 30 deletions(-)
34
23
35
--
24
--
36
2.25.1
25
2.40.1
37
diff view generated by jsdifflib
Deleted patch
1
The io_uring_enter(2) syscall returns with errno=EINTR when interrupted
2
by a signal. Retry the syscall in this case.
3
1
4
It's essential to do this in the io_uring_submit_and_wait() case. My
5
interpretation of the Linux v5.5 io_uring_enter(2) code is that it
6
shouldn't affect the io_uring_submit() case, but there is no guarantee
7
this will always be the case. Let's check for -EINTR around both APIs.
8
9
Note that the liburing APIs have -errno return values.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
14
Message-id: 20200408091139.273851-1-stefanha@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
util/fdmon-io_uring.c | 10 ++++++++--
18
1 file changed, 8 insertions(+), 2 deletions(-)
19
20
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/util/fdmon-io_uring.c
23
+++ b/util/fdmon-io_uring.c
24
@@ -XXX,XX +XXX,XX @@ static struct io_uring_sqe *get_sqe(AioContext *ctx)
25
}
26
27
/* No free sqes left, submit pending sqes first */
28
- ret = io_uring_submit(ring);
29
+ do {
30
+ ret = io_uring_submit(ring);
31
+ } while (ret == -EINTR);
32
+
33
assert(ret > 1);
34
sqe = io_uring_get_sqe(ring);
35
assert(sqe);
36
@@ -XXX,XX +XXX,XX @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
37
38
fill_sq_ring(ctx);
39
40
- ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
41
+ do {
42
+ ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
43
+ } while (ret == -EINTR);
44
+
45
assert(ret >= 0);
46
47
return process_cq_ring(ctx, ready_list);
48
--
49
2.25.1
50
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
Any thread that is not a iothread returns NULL for qemu_get_current_aio_context().
4
As a result, it would also return true for
5
in_aio_context_home_thread(qemu_get_aio_context()), causing
6
AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect
7
if the BQL is not held, because aio_poll() does not expect to
8
run concurrently from multiple threads, and it can actually
9
happen when savevm writes to the vmstate file from the
10
migration thread.
11
12
Therefore, restrict in_aio_context_home_thread to return true
13
for the main AioContext only if the BQL is held.
14
15
The function is moved to aio-wait.h because it is mostly used
16
there and to avoid a circular reference between main-loop.h
17
and block/aio.h.
18
19
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
20
Message-Id: <20200407140746.8041-5-pbonzini@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
include/block/aio-wait.h | 22 ++++++++++++++++++++++
24
include/block/aio.h | 29 ++++++++++-------------------
25
2 files changed, 32 insertions(+), 19 deletions(-)
26
27
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/block/aio-wait.h
30
+++ b/include/block/aio-wait.h
31
@@ -XXX,XX +XXX,XX @@
32
#define QEMU_AIO_WAIT_H
33
34
#include "block/aio.h"
35
+#include "qemu/main-loop.h"
36
37
/**
38
* AioWait:
39
@@ -XXX,XX +XXX,XX @@ void aio_wait_kick(void);
40
*/
41
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
42
43
+/**
44
+ * in_aio_context_home_thread:
45
+ * @ctx: the aio context
46
+ *
47
+ * Return whether we are running in the thread that normally runs @ctx. Note
48
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
49
+ * still only has one home thread that is responsible for running it.
50
+ */
51
+static inline bool in_aio_context_home_thread(AioContext *ctx)
52
+{
53
+ if (ctx == qemu_get_current_aio_context()) {
54
+ return true;
55
+ }
56
+
57
+ if (ctx == qemu_get_aio_context()) {
58
+ return qemu_mutex_iothread_locked();
59
+ } else {
60
+ return false;
61
+ }
62
+}
63
+
64
#endif /* QEMU_AIO_WAIT_H */
65
diff --git a/include/block/aio.h b/include/block/aio.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/include/block/aio.h
68
+++ b/include/block/aio.h
69
@@ -XXX,XX +XXX,XX @@ struct AioContext {
70
AioHandlerList deleted_aio_handlers;
71
72
/* Used to avoid unnecessary event_notifier_set calls in aio_notify;
73
- * accessed with atomic primitives. If this field is 0, everything
74
- * (file descriptors, bottom halves, timers) will be re-evaluated
75
- * before the next blocking poll(), thus the event_notifier_set call
76
- * can be skipped. If it is non-zero, you may need to wake up a
77
- * concurrent aio_poll or the glib main event loop, making
78
- * event_notifier_set necessary.
79
+ * only written from the AioContext home thread, or under the BQL in
80
+ * the case of the main AioContext. However, it is read from any
81
+ * thread so it is still accessed with atomic primitives.
82
+ *
83
+ * If this field is 0, everything (file descriptors, bottom halves,
84
+ * timers) will be re-evaluated before the next blocking poll() or
85
+ * io_uring wait; therefore, the event_notifier_set call can be
86
+ * skipped. If it is non-zero, you may need to wake up a concurrent
87
+ * aio_poll or the glib main event loop, making event_notifier_set
88
+ * necessary.
89
*
90
* Bit 0 is reserved for GSource usage of the AioContext, and is 1
91
* between a call to aio_ctx_prepare and the next call to aio_ctx_check.
92
@@ -XXX,XX +XXX,XX @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
93
*/
94
AioContext *qemu_get_current_aio_context(void);
95
96
-/**
97
- * in_aio_context_home_thread:
98
- * @ctx: the aio context
99
- *
100
- * Return whether we are running in the thread that normally runs @ctx. Note
101
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
102
- * still only has one home thread that is responsible for running it.
103
- */
104
-static inline bool in_aio_context_home_thread(AioContext *ctx)
105
-{
106
- return ctx == qemu_get_current_aio_context();
107
-}
108
-
109
/**
110
* aio_context_setup:
111
* @ctx: the aio context
112
--
113
2.25.1
114
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
The main loop thread can consume 100% CPU when using --device
2
virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but
3
reading virtqueue host notifiers fails with EAGAIN. The file descriptors
4
are stale and remain registered with the AioContext because of bugs in
5
the virtio-blk dataplane start/stop code.
2
6
3
When using C11 atomics, non-seqcst reads and writes do not participate
7
The problem is that the dataplane start/stop code involves drain
4
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
8
operations, which call virtio_blk_drained_begin() and
5
in particular, the pattern that we use
9
virtio_blk_drained_end() at points where the host notifier is not
10
operational:
11
- In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
12
vblk->dataplane_started has been set to true but the host notifier has
13
not been attached yet.
14
- In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
15
drain after the host notifier has already been detached but with
16
vblk->dataplane_started still set to true.
6
17
7
write ctx->notify_me write bh->scheduled
18
I would like to simplify ->ioeventfd_start/stop() to avoid interactions
8
read bh->scheduled read ctx->notify_me
19
with drain entirely, but couldn't find a way to do that. Instead, this
9
if !bh->scheduled, sleep if ctx->notify_me, notify
20
patch accepts the fragile nature of the code and reorders it so that
21
vblk->dataplane_started is false during drain operations. This way the
22
virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
23
touch the host notifier. The result is that
24
virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
25
complete control over the host notifier and stale file descriptors are
26
no longer left in the AioContext.
10
27
11
needs to use seqcst operations for both the write and the read. In
28
This patch fixes the 100% CPU consumption in the main loop thread and
12
general this is something that we do not want, because there can be
29
correctly moves host notifier processing to the IOThread.
13
many sources that are polled in addition to bottom halves. The
14
alternative is to place a seqcst memory barrier between the write
15
and the read. This also comes with a disadvantage, in that the
16
memory barrier is implicit on strongly-ordered architectures and
17
it wastes a few dozen clock cycles.
18
30
19
Fortunately, ctx->notify_me is never written concurrently by two
31
Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
20
threads, so we can assert that and relax the writes to ctx->notify_me.
32
Reported-by: Lukáš Doktor <ldoktor@redhat.com>
21
The resulting solution works and performs well on both aarch64 and x86.
33
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
34
Tested-by: Lukas Doktor <ldoktor@redhat.com>
23
Note that the atomic_set/atomic_read combination is not an atomic
35
Message-id: 20230704151527.193586-1-stefanha@redhat.com
24
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
25
on x86, ATOMIC_RELAXED compiles to a locked operation.
26
27
Analyzed-by: Ying Fang <fangying1@huawei.com>
28
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
29
Tested-by: Ying Fang <fangying1@huawei.com>
30
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
36
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
37
---
33
util/aio-posix.c | 16 ++++++++++++++--
38
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
34
util/aio-win32.c | 17 ++++++++++++++---
39
1 file changed, 38 insertions(+), 29 deletions(-)
35
util/async.c | 16 ++++++++++++----
36
3 files changed, 40 insertions(+), 9 deletions(-)
37
40
38
diff --git a/util/aio-posix.c b/util/aio-posix.c
41
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
39
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
40
--- a/util/aio-posix.c
43
--- a/hw/block/dataplane/virtio-blk.c
41
+++ b/util/aio-posix.c
44
+++ b/hw/block/dataplane/virtio-blk.c
42
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
45
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
43
int64_t timeout;
46
44
int64_t start = 0;
47
memory_region_transaction_commit();
48
49
- /*
50
- * These fields are visible to the IOThread so we rely on implicit barriers
51
- * in aio_context_acquire() on the write side and aio_notify_accept() on
52
- * the read side.
53
- */
54
- s->starting = false;
55
- vblk->dataplane_started = true;
56
trace_virtio_blk_data_plane_start(s);
57
58
old_context = blk_get_aio_context(s->conf->conf.blk);
59
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
60
event_notifier_set(virtio_queue_get_host_notifier(vq));
61
}
45
62
46
+ /*
63
+ /*
47
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
64
+ * These fields must be visible to the IOThread when it processes the
48
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
65
+ * virtqueue, otherwise it will think dataplane has not started yet.
49
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
66
+ *
67
+ * Make sure ->dataplane_started is false when blk_set_aio_context() is
68
+ * called above so that draining does not cause the host notifier to be
69
+ * detached/attached prematurely.
50
+ */
70
+ */
51
assert(in_aio_context_home_thread(ctx));
71
+ s->starting = false;
52
72
+ vblk->dataplane_started = true;
53
/* aio_notify can avoid the expensive event_notifier_set if
73
+ smp_wmb(); /* paired with aio_notify_accept() on the read side */
54
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
74
+
55
* so disable the optimization now.
75
/* Get this show started by hooking up our callbacks */
56
*/
76
if (!blk_in_drain(s->conf->conf.blk)) {
57
if (blocking) {
77
aio_context_acquire(s->ctx);
58
- atomic_add(&ctx->notify_me, 2);
78
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
59
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
79
fail_guest_notifiers:
60
+ /*
80
vblk->dataplane_disabled = true;
61
+ * Write ctx->notify_me before computing the timeout
81
s->starting = false;
62
+ * (reading bottom half flags, etc.). Pairs with
82
- vblk->dataplane_started = true;
63
+ * smp_mb in aio_notify().
83
return -ENOSYS;
64
+ */
84
}
65
+ smp_mb();
85
86
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
87
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
66
}
88
}
67
89
68
qemu_lockcnt_inc(&ctx->list_lock);
69
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
70
}
71
72
if (blocking) {
73
- atomic_sub(&ctx->notify_me, 2);
74
+ /* Finish the poll before clearing the flag. */
75
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
76
aio_notify_accept(ctx);
77
}
78
79
diff --git a/util/aio-win32.c b/util/aio-win32.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/util/aio-win32.c
82
+++ b/util/aio-win32.c
83
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
84
int count;
85
int timeout;
86
87
+ /*
90
+ /*
88
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
91
+ * Batch all the host notifiers in a single transaction to avoid
89
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
92
+ * quadratic time complexity in address_space_update_ioeventfds().
90
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
91
+ */
93
+ */
92
+ assert(in_aio_context_home_thread(ctx));
94
+ memory_region_transaction_begin();
93
progress = false;
95
+
94
96
+ for (i = 0; i < nvqs; i++) {
95
/* aio_notify can avoid the expensive event_notifier_set if
97
+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
96
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
98
+ }
97
* so disable the optimization now.
98
*/
99
if (blocking) {
100
- atomic_add(&ctx->notify_me, 2);
101
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
102
+ /*
103
+ * Write ctx->notify_me before computing the timeout
104
+ * (reading bottom half flags, etc.). Pairs with
105
+ * smp_mb in aio_notify().
106
+ */
107
+ smp_mb();
108
}
109
110
qemu_lockcnt_inc(&ctx->list_lock);
111
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
112
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
113
if (blocking) {
114
assert(first);
115
- assert(in_aio_context_home_thread(ctx));
116
- atomic_sub(&ctx->notify_me, 2);
117
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
118
aio_notify_accept(ctx);
119
}
120
121
diff --git a/util/async.c b/util/async.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/util/async.c
124
+++ b/util/async.c
125
@@ -XXX,XX +XXX,XX @@ aio_ctx_prepare(GSource *source, gint *timeout)
126
{
127
AioContext *ctx = (AioContext *) source;
128
129
- atomic_or(&ctx->notify_me, 1);
130
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
131
+
99
+
132
+ /*
100
+ /*
133
+ * Write ctx->notify_me before computing the timeout
101
+ * The transaction expects the ioeventfds to be open when it
134
+ * (reading bottom half flags, etc.). Pairs with
102
+ * commits. Do it now, before the cleanup loop.
135
+ * smp_mb in aio_notify().
136
+ */
103
+ */
137
+ smp_mb();
104
+ memory_region_transaction_commit();
138
105
+
139
/* We assume there is no timeout already supplied */
106
+ for (i = 0; i < nvqs; i++) {
140
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
107
+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
141
@@ -XXX,XX +XXX,XX @@ aio_ctx_check(GSource *source)
108
+ }
142
QEMUBH *bh;
109
+
143
BHListSlice *s;
110
+ /*
144
111
+ * Set ->dataplane_started to false before draining so that host notifiers
145
- atomic_and(&ctx->notify_me, ~1);
112
+ * are not detached/attached anymore.
146
+ /* Finish computing the timeout before clearing the flag. */
113
+ */
147
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
114
+ vblk->dataplane_started = false;
148
aio_notify_accept(ctx);
115
+
149
116
aio_context_acquire(s->ctx);
150
QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
117
151
@@ -XXX,XX +XXX,XX @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
118
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
152
void aio_notify(AioContext *ctx)
119
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
153
{
120
154
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
121
aio_context_release(s->ctx);
155
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
122
156
+ * with smp_mb in aio_ctx_prepare or aio_poll.
123
- /*
157
*/
124
- * Batch all the host notifiers in a single transaction to avoid
158
smp_mb();
125
- * quadratic time complexity in address_space_update_ioeventfds().
159
- if (ctx->notify_me) {
126
- */
160
+ if (atomic_read(&ctx->notify_me)) {
127
- memory_region_transaction_begin();
161
event_notifier_set(&ctx->notifier);
128
-
162
atomic_mb_set(&ctx->notified, true);
129
- for (i = 0; i < nvqs; i++) {
163
}
130
- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
131
- }
132
-
133
- /*
134
- * The transaction expects the ioeventfds to be open when it
135
- * commits. Do it now, before the cleanup loop.
136
- */
137
- memory_region_transaction_commit();
138
-
139
- for (i = 0; i < nvqs; i++) {
140
- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
141
- }
142
-
143
qemu_bh_cancel(s->bh);
144
notify_guest_bh(s); /* final chance to notify guest */
145
146
/* Clean up guest notifier (irq) */
147
k->set_guest_notifiers(qbus->parent, nvqs, false);
148
149
- vblk->dataplane_started = false;
150
s->stopping = false;
151
}
164
--
152
--
165
2.25.1
153
2.40.1
166
154
155
diff view generated by jsdifflib