1
The following changes since commit 8e9398e3b1a860b8c29c670c1b6c36afe8d87849:
1
The following changes since commit 887cba855bb6ff4775256f7968409281350b568c:
2
2
3
Merge tag 'pull-ppc-20220706' of https://gitlab.com/danielhb/qemu into staging (2022-07-07 06:21:05 +0530)
3
configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to be6a166fde652589761cf70471bcde623e9bd72a:
9
for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6:
10
10
11
block/io_uring: clarify that short reads can happen (2022-07-07 09:04:15 +0100)
11
virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
----------------------------------------------------------------
16
----------------------------------------------------------------
17
17
18
Dominique Martinet (1):
18
Stefan Hajnoczi (1):
19
io_uring: fix short read slow path
19
virtio-blk: fix host notifier issues during dataplane start/stop
20
20
21
Stefan Hajnoczi (1):
21
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
22
block/io_uring: clarify that short reads can happen
22
1 file changed, 38 insertions(+), 29 deletions(-)
23
24
block/io_uring.c | 12 ++++--------
25
1 file changed, 4 insertions(+), 8 deletions(-)
26
23
27
--
24
--
28
2.36.1
25
2.40.1
diff view generated by jsdifflib
Deleted patch
1
From: Dominique Martinet <dominique.martinet@atmark-techno.com>
2
1
3
sqeq.off here is the offset to read within the disk image, so obviously
4
not 'nread' (the amount we just read), but as the author meant to write
5
its current value incremented by the amount we just read.
6
7
Normally recent versions of linux will not issue short reads,
8
but it can happen so we should fix this.
9
10
This lead to weird image corruptions when short read happened
11
12
Fixes: 6663a0a33764 ("block/io_uring: implements interfaces for io_uring")
13
Link: https://lkml.kernel.org/r/YrrFGO4A1jS0GI0G@atmark-techno.com
14
Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
15
Message-Id: <20220630010137.2518851-1-dominique.martinet@atmark-techno.com>
16
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
17
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
block/io_uring.c | 4 ++--
21
1 file changed, 2 insertions(+), 2 deletions(-)
22
23
diff --git a/block/io_uring.c b/block/io_uring.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io_uring.c
26
+++ b/block/io_uring.c
27
@@ -XXX,XX +XXX,XX @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
28
trace_luring_resubmit_short_read(s, luringcb, nread);
29
30
/* Update read position */
31
- luringcb->total_read = nread;
32
+ luringcb->total_read += nread;
33
remaining = luringcb->qiov->size - luringcb->total_read;
34
35
/* Shorten qiov */
36
@@ -XXX,XX +XXX,XX @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
37
remaining);
38
39
/* Update sqe */
40
- luringcb->sqeq.off = nread;
41
+ luringcb->sqeq.off += nread;
42
luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
43
luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
44
45
--
46
2.36.1
diff view generated by jsdifflib
1
Jens Axboe has confirmed that short reads are rare but can happen:
1
The main loop thread can consume 100% CPU when using --device
2
https://lore.kernel.org/io-uring/YsU%2FCGkl9ZXUI+Tj@stefanha-x1.localdomain/T/#m729963dc577d709b709c191922e98ec79d7eef54
2
virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but
3
reading virtqueue host notifiers fails with EAGAIN. The file descriptors
4
are stale and remain registered with the AioContext because of bugs in
5
the virtio-blk dataplane start/stop code.
3
6
4
The luring_resubmit_short_read() comment claimed they were only due to a
7
The problem is that the dataplane start/stop code involves drain
5
specific io_uring bug that was fixed in Linux commit 9d93a3f5a0c
8
operations, which call virtio_blk_drained_begin() and
6
("io_uring: punt short reads to async context"), which is wrong.
9
virtio_blk_drained_end() at points where the host notifier is not
7
Dominique Martinet found that a btrfs bug also causes short reads. There
10
operational:
8
may be more kernel code paths that result in short reads.
11
- In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
12
vblk->dataplane_started has been set to true but the host notifier has
13
not been attached yet.
14
- In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
15
drain after the host notifier has already been detached but with
16
vblk->dataplane_started still set to true.
9
17
10
Let's consider short reads fair game.
18
I would like to simplify ->ioeventfd_start/stop() to avoid interactions
19
with drain entirely, but couldn't find a way to do that. Instead, this
20
patch accepts the fragile nature of the code and reorders it so that
21
vblk->dataplane_started is false during drain operations. This way the
22
virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
23
touch the host notifier. The result is that
24
virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
25
complete control over the host notifier and stale file descriptors are
26
no longer left in the AioContext.
11
27
12
Cc: Dominique Martinet <dominique.martinet@atmark-techno.com>
28
This patch fixes the 100% CPU consumption in the main loop thread and
13
Based-on: <20220630010137.2518851-1-dominique.martinet@atmark-techno.com>
29
correctly moves host notifier processing to the IOThread.
30
31
Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
32
Reported-by: Lukáš Doktor <ldoktor@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
33
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
34
Tested-by: Lukas Doktor <ldoktor@redhat.com>
16
Message-id: 20220706080341.1206476-1-stefanha@redhat.com
35
Message-id: 20230704151527.193586-1-stefanha@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
36
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
37
---
19
block/io_uring.c | 8 ++------
38
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
20
1 file changed, 2 insertions(+), 6 deletions(-)
39
1 file changed, 38 insertions(+), 29 deletions(-)
21
40
22
diff --git a/block/io_uring.c b/block/io_uring.c
41
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
23
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
24
--- a/block/io_uring.c
43
--- a/hw/block/dataplane/virtio-blk.c
25
+++ b/block/io_uring.c
44
+++ b/hw/block/dataplane/virtio-blk.c
26
@@ -XXX,XX +XXX,XX @@ static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
45
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
27
/**
46
28
* luring_resubmit_short_read:
47
memory_region_transaction_commit();
29
*
48
30
- * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
49
- /*
31
- * context") a buffered I/O request with the start of the file range in the
50
- * These fields are visible to the IOThread so we rely on implicit barriers
32
- * page cache could result in a short read. Applications need to resubmit the
51
- * in aio_context_acquire() on the write side and aio_notify_accept() on
33
- * remaining read request.
52
- * the read side.
34
- *
53
- */
35
- * This is a slow path but recent kernels never take it.
54
- s->starting = false;
36
+ * Short reads are rare but may occur. The remaining read request needs to be
55
- vblk->dataplane_started = true;
37
+ * resubmitted.
56
trace_virtio_blk_data_plane_start(s);
38
*/
57
39
static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
58
old_context = blk_get_aio_context(s->conf->conf.blk);
40
int nread)
59
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
60
event_notifier_set(virtio_queue_get_host_notifier(vq));
61
}
62
63
+ /*
64
+ * These fields must be visible to the IOThread when it processes the
65
+ * virtqueue, otherwise it will think dataplane has not started yet.
66
+ *
67
+ * Make sure ->dataplane_started is false when blk_set_aio_context() is
68
+ * called above so that draining does not cause the host notifier to be
69
+ * detached/attached prematurely.
70
+ */
71
+ s->starting = false;
72
+ vblk->dataplane_started = true;
73
+ smp_wmb(); /* paired with aio_notify_accept() on the read side */
74
+
75
/* Get this show started by hooking up our callbacks */
76
if (!blk_in_drain(s->conf->conf.blk)) {
77
aio_context_acquire(s->ctx);
78
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
79
fail_guest_notifiers:
80
vblk->dataplane_disabled = true;
81
s->starting = false;
82
- vblk->dataplane_started = true;
83
return -ENOSYS;
84
}
85
86
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
87
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
88
}
89
90
+ /*
91
+ * Batch all the host notifiers in a single transaction to avoid
92
+ * quadratic time complexity in address_space_update_ioeventfds().
93
+ */
94
+ memory_region_transaction_begin();
95
+
96
+ for (i = 0; i < nvqs; i++) {
97
+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
98
+ }
99
+
100
+ /*
101
+ * The transaction expects the ioeventfds to be open when it
102
+ * commits. Do it now, before the cleanup loop.
103
+ */
104
+ memory_region_transaction_commit();
105
+
106
+ for (i = 0; i < nvqs; i++) {
107
+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
108
+ }
109
+
110
+ /*
111
+ * Set ->dataplane_started to false before draining so that host notifiers
112
+ * are not detached/attached anymore.
113
+ */
114
+ vblk->dataplane_started = false;
115
+
116
aio_context_acquire(s->ctx);
117
118
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
119
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
120
121
aio_context_release(s->ctx);
122
123
- /*
124
- * Batch all the host notifiers in a single transaction to avoid
125
- * quadratic time complexity in address_space_update_ioeventfds().
126
- */
127
- memory_region_transaction_begin();
128
-
129
- for (i = 0; i < nvqs; i++) {
130
- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
131
- }
132
-
133
- /*
134
- * The transaction expects the ioeventfds to be open when it
135
- * commits. Do it now, before the cleanup loop.
136
- */
137
- memory_region_transaction_commit();
138
-
139
- for (i = 0; i < nvqs; i++) {
140
- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
141
- }
142
-
143
qemu_bh_cancel(s->bh);
144
notify_guest_bh(s); /* final chance to notify guest */
145
146
/* Clean up guest notifier (irq) */
147
k->set_guest_notifiers(qbus->parent, nvqs, false);
148
149
- vblk->dataplane_started = false;
150
s->stopping = false;
151
}
41
--
152
--
42
2.36.1
153
2.40.1
154
155
diff view generated by jsdifflib