1
The following changes since commit 6d40ce00c1166c317e298ad82ecf10e650c4f87d:
1
The following changes since commit c6a5fc2ac76c5ab709896ee1b0edd33685a67ed1:
2
2
3
Update version for v6.0.0-rc1 release (2021-03-30 18:19:07 +0100)
3
decodetree: Add --output-null for meson testing (2023-05-31 19:56:42 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to b6489ac06695e257ea0a9841364577e247fdee30:
9
for you to fetch changes up to 98b126f5e3228a346c774e569e26689943b401dd:
10
10
11
test-coroutine: Add rwlock downgrade test (2021-03-31 10:44:21 +0100)
11
qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa (2023-06-01 11:08:21 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
A fix for VDI image files and more generally for CoRwlock.
16
- Stefano Garzarella's blkio block driver 'fd' parameter
17
- My thread-local blk_io_plug() series
17
18
18
----------------------------------------------------------------
19
----------------------------------------------------------------
19
20
20
David Edmondson (4):
21
Stefan Hajnoczi (6):
21
block/vdi: When writing new bmap entry fails, don't leak the buffer
22
block: add blk_io_plug_call() API
22
block/vdi: Don't assume that blocks are larger than VdiHeader
23
block/nvme: convert to blk_io_plug_call() API
23
coroutine-lock: Store the coroutine in the CoWaitRecord only once
24
block/blkio: convert to blk_io_plug_call() API
24
test-coroutine: Add rwlock downgrade test
25
block/io_uring: convert to blk_io_plug_call() API
26
block/linux-aio: convert to blk_io_plug_call() API
27
block: remove bdrv_co_io_plug() API
25
28
26
Paolo Bonzini (2):
29
Stefano Garzarella (2):
27
coroutine-lock: Reimplement CoRwlock to fix downgrade bug
30
block/blkio: use qemu_open() to support fd passing for virtio-blk
28
test-coroutine: Add rwlock upgrade test
31
qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa
29
32
30
include/qemu/coroutine.h | 17 ++--
33
MAINTAINERS | 1 +
31
block/vdi.c | 11 ++-
34
qapi/block-core.json | 6 ++
32
tests/unit/test-coroutine.c | 161 +++++++++++++++++++++++++++++++++++
35
meson.build | 4 +
33
util/qemu-coroutine-lock.c | 165 +++++++++++++++++++++++-------------
36
include/block/block-io.h | 3 -
34
4 files changed, 282 insertions(+), 72 deletions(-)
37
include/block/block_int-common.h | 11 ---
38
include/block/raw-aio.h | 14 ---
39
include/sysemu/block-backend-io.h | 13 +--
40
block/blkio.c | 96 ++++++++++++------
41
block/block-backend.c | 22 -----
42
block/file-posix.c | 38 -------
43
block/io.c | 37 -------
44
block/io_uring.c | 44 ++++-----
45
block/linux-aio.c | 41 +++-----
46
block/nvme.c | 44 +++------
47
block/plug.c | 159 ++++++++++++++++++++++++++++++
48
hw/block/dataplane/xen-block.c | 8 +-
49
hw/block/virtio-blk.c | 4 +-
50
hw/scsi/virtio-scsi.c | 6 +-
51
block/meson.build | 1 +
52
block/trace-events | 6 +-
53
20 files changed, 293 insertions(+), 265 deletions(-)
54
create mode 100644 block/plug.c
35
55
36
--
56
--
37
2.30.2
57
2.40.1
38
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
Introduce a new API for thread-local blk_io_plug() that does not
2
2
traverse the block graph. The goal is to make blk_io_plug() multi-queue
3
An invariant of the current rwlock is that if multiple coroutines hold a
3
friendly.
4
reader lock, all must be runnable. The unlock implementation relies on
4
5
this, choosing to wake a single coroutine when the final read lock
5
Instead of having block drivers track whether or not we're in a plugged
6
holder exits the critical section, assuming that it will wake a
6
section, provide an API that allows them to defer a function call until
7
coroutine attempting to acquire a write lock.
7
we're unplugged: blk_io_plug_call(fn, opaque). If blk_io_plug_call() is
8
8
called multiple times with the same fn/opaque pair, then fn() is only
9
The downgrade implementation violates this assumption by creating a
9
called once at the end of the function - resulting in batching.
10
read lock owning coroutine that is exclusively runnable - any other
10
11
coroutines that are waiting to acquire a read lock are *not* made
11
This patch introduces the API and changes blk_io_plug()/blk_io_unplug().
12
runnable when the write lock holder converts its ownership to read
12
blk_io_plug()/blk_io_unplug() no longer require a BlockBackend argument
13
only.
13
because the plug state is now thread-local.
14
14
15
More in general, the old implementation had lots of other fairness bugs.
15
Later patches convert block drivers to blk_io_plug_call() and then we
16
The root cause of the bugs was that CoQueue would wake up readers even
16
can finally remove .bdrv_co_io_plug() once all block drivers have been
17
if there were pending writers, and would wake up writers even if there
17
converted.
18
were readers. In that case, the coroutine would go back to sleep *at
18
19
the end* of the CoQueue, losing its place at the head of the line.
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
20
Reviewed-by: Eric Blake <eblake@redhat.com>
21
To fix this, keep the queue of waiters explicitly in the CoRwlock
21
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
22
instead of using CoQueue, and store for each whether it is a
22
Acked-by: Kevin Wolf <kwolf@redhat.com>
23
potential reader or a writer. This way, downgrade can look at the
23
Message-id: 20230530180959.1108766-2-stefanha@redhat.com
24
first queued coroutines and wake it only if it is a reader, causing
25
all other readers in line to be released in turn.
26
27
Reported-by: David Edmondson <david.edmondson@oracle.com>
28
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
29
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
30
Message-id: 20210325112941.365238-5-pbonzini@redhat.com
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
25
---
33
include/qemu/coroutine.h | 17 ++--
26
MAINTAINERS | 1 +
34
util/qemu-coroutine-lock.c | 164 +++++++++++++++++++++++--------------
27
include/sysemu/block-backend-io.h | 13 +--
35
2 files changed, 114 insertions(+), 67 deletions(-)
28
block/block-backend.c | 22 -----
36
29
block/plug.c | 159 ++++++++++++++++++++++++++++++
37
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
30
hw/block/dataplane/xen-block.c | 8 +-
38
index XXXXXXX..XXXXXXX 100644
31
hw/block/virtio-blk.c | 4 +-
39
--- a/include/qemu/coroutine.h
32
hw/scsi/virtio-scsi.c | 6 +-
40
+++ b/include/qemu/coroutine.h
33
block/meson.build | 1 +
41
@@ -XXX,XX +XXX,XX @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock);
34
8 files changed, 173 insertions(+), 41 deletions(-)
42
bool qemu_co_queue_empty(CoQueue *queue);
35
create mode 100644 block/plug.c
43
36
44
37
diff --git a/MAINTAINERS b/MAINTAINERS
45
+typedef struct CoRwTicket CoRwTicket;
38
index XXXXXXX..XXXXXXX 100644
46
typedef struct CoRwlock {
39
--- a/MAINTAINERS
47
- int pending_writer;
40
+++ b/MAINTAINERS
48
- int reader;
41
@@ -XXX,XX +XXX,XX @@ F: util/aio-*.c
49
CoMutex mutex;
42
F: util/aio-*.h
50
- CoQueue queue;
43
F: util/fdmon-*.c
51
+
44
F: block/io.c
52
+ /* Number of readers, or -1 if owned for writing. */
45
+F: block/plug.c
53
+ int owners;
46
F: migration/block*
54
+
47
F: include/block/aio.h
55
+ /* Waiting coroutines. */
48
F: include/block/aio-wait.h
56
+ QSIMPLEQ_HEAD(, CoRwTicket) tickets;
49
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
57
} CoRwlock;
50
index XXXXXXX..XXXXXXX 100644
58
51
--- a/include/sysemu/block-backend-io.h
59
/**
52
+++ b/include/sysemu/block-backend-io.h
60
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock);
53
@@ -XXX,XX +XXX,XX @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
61
/**
54
int blk_get_max_iov(BlockBackend *blk);
62
* Write Locks the CoRwlock from a reader. This is a bit more efficient than
55
int blk_get_max_hw_iov(BlockBackend *blk);
63
* @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
56
64
- * However, if the lock cannot be upgraded immediately, control is transferred
57
-/*
65
- * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade
58
- * blk_io_plug/unplug are thread-local operations. This means that multiple
66
- * only overrides CoRwlock fairness if there are no concurrent readers, so
59
- * IOThreads can simultaneously call plug/unplug, but the caller must ensure
67
- * another writer might run while @qemu_co_rwlock_upgrade blocks.
60
- * that each unplug() is called in the same IOThread of the matching plug().
68
+ * Note that if the lock cannot be upgraded immediately, control is transferred
61
- */
69
+ * to the caller of the current coroutine; another writer might run while
62
-void coroutine_fn blk_co_io_plug(BlockBackend *blk);
70
+ * @qemu_co_rwlock_upgrade blocks.
63
-void co_wrapper blk_io_plug(BlockBackend *blk);
71
*/
64
-
72
void qemu_co_rwlock_upgrade(CoRwlock *lock);
65
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk);
73
66
-void co_wrapper blk_io_unplug(BlockBackend *blk);
74
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
67
+void blk_io_plug(void);
75
index XXXXXXX..XXXXXXX 100644
68
+void blk_io_unplug(void);
76
--- a/util/qemu-coroutine-lock.c
69
+void blk_io_plug_call(void (*fn)(void *), void *opaque);
77
+++ b/util/qemu-coroutine-lock.c
70
78
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
71
AioContext *blk_get_aio_context(BlockBackend *blk);
79
trace_qemu_co_mutex_unlock_return(mutex, self);
72
BlockAcctStats *blk_get_stats(BlockBackend *blk);
73
diff --git a/block/block-backend.c b/block/block-backend.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/block/block-backend.c
76
+++ b/block/block-backend.c
77
@@ -XXX,XX +XXX,XX @@ void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
78
notifier_list_add(&blk->insert_bs_notifiers, notify);
80
}
79
}
81
80
82
+struct CoRwTicket {
81
-void coroutine_fn blk_co_io_plug(BlockBackend *blk)
83
+ bool read;
82
-{
84
+ Coroutine *co;
83
- BlockDriverState *bs = blk_bs(blk);
85
+ QSIMPLEQ_ENTRY(CoRwTicket) next;
84
- IO_CODE();
86
+};
85
- GRAPH_RDLOCK_GUARD();
87
+
86
-
88
void qemu_co_rwlock_init(CoRwlock *lock)
87
- if (bs) {
88
- bdrv_co_io_plug(bs);
89
- }
90
-}
91
-
92
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
93
-{
94
- BlockDriverState *bs = blk_bs(blk);
95
- IO_CODE();
96
- GRAPH_RDLOCK_GUARD();
97
-
98
- if (bs) {
99
- bdrv_co_io_unplug(bs);
100
- }
101
-}
102
-
103
BlockAcctStats *blk_get_stats(BlockBackend *blk)
89
{
104
{
90
- memset(lock, 0, sizeof(*lock));
105
IO_CODE();
91
- qemu_co_queue_init(&lock->queue);
106
diff --git a/block/plug.c b/block/plug.c
92
qemu_co_mutex_init(&lock->mutex);
107
new file mode 100644
93
+ lock->owners = 0;
108
index XXXXXXX..XXXXXXX
94
+ QSIMPLEQ_INIT(&lock->tickets);
109
--- /dev/null
110
+++ b/block/plug.c
111
@@ -XXX,XX +XXX,XX @@
112
+/* SPDX-License-Identifier: GPL-2.0-or-later */
113
+/*
114
+ * Block I/O plugging
115
+ *
116
+ * Copyright Red Hat.
117
+ *
118
+ * This API defers a function call within a blk_io_plug()/blk_io_unplug()
119
+ * section, allowing multiple calls to batch up. This is a performance
120
+ * optimization that is used in the block layer to submit several I/O requests
121
+ * at once instead of individually:
122
+ *
123
+ * blk_io_plug(); <-- start of plugged region
124
+ * ...
125
+ * blk_io_plug_call(my_func, my_obj); <-- deferred my_func(my_obj) call
126
+ * blk_io_plug_call(my_func, my_obj); <-- another
127
+ * blk_io_plug_call(my_func, my_obj); <-- another
128
+ * ...
129
+ * blk_io_unplug(); <-- end of plugged region, my_func(my_obj) is called once
130
+ *
131
+ * This code is actually generic and not tied to the block layer. If another
132
+ * subsystem needs this functionality, it could be renamed.
133
+ */
134
+
135
+#include "qemu/osdep.h"
136
+#include "qemu/coroutine-tls.h"
137
+#include "qemu/notify.h"
138
+#include "qemu/thread.h"
139
+#include "sysemu/block-backend.h"
140
+
141
+/* A function call that has been deferred until unplug() */
142
+typedef struct {
143
+ void (*fn)(void *);
144
+ void *opaque;
145
+} UnplugFn;
146
+
147
+/* Per-thread state */
148
+typedef struct {
149
+ unsigned count; /* how many times has plug() been called? */
150
+ GArray *unplug_fns; /* functions to call at unplug time */
151
+} Plug;
152
+
153
+/* Use get_ptr_plug() to fetch this thread-local value */
154
+QEMU_DEFINE_STATIC_CO_TLS(Plug, plug);
155
+
156
+/* Called at thread cleanup time */
157
+static void blk_io_plug_atexit(Notifier *n, void *value)
158
+{
159
+ Plug *plug = get_ptr_plug();
160
+ g_array_free(plug->unplug_fns, TRUE);
95
+}
161
+}
96
+
162
+
97
+/* Releases the internal CoMutex. */
163
+/* This won't involve coroutines, so use __thread */
98
+static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock)
164
+static __thread Notifier blk_io_plug_atexit_notifier;
165
+
166
+/**
167
+ * blk_io_plug_call:
168
+ * @fn: a function pointer to be invoked
169
+ * @opaque: a user-defined argument to @fn()
170
+ *
171
+ * Call @fn(@opaque) immediately if not within a blk_io_plug()/blk_io_unplug()
172
+ * section.
173
+ *
174
+ * Otherwise defer the call until the end of the outermost
175
+ * blk_io_plug()/blk_io_unplug() section in this thread. If the same
176
+ * @fn/@opaque pair has already been deferred, it will only be called once upon
177
+ * blk_io_unplug() so that accumulated calls are batched into a single call.
178
+ *
179
+ * The caller must ensure that @opaque is not freed before @fn() is invoked.
180
+ */
181
+void blk_io_plug_call(void (*fn)(void *), void *opaque)
99
+{
182
+{
100
+ CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets);
183
+ Plug *plug = get_ptr_plug();
101
+ Coroutine *co = NULL;
184
+
185
+ /* Call immediately if we're not plugged */
186
+ if (plug->count == 0) {
187
+ fn(opaque);
188
+ return;
189
+ }
190
+
191
+ GArray *array = plug->unplug_fns;
192
+ if (!array) {
193
+ array = g_array_new(FALSE, FALSE, sizeof(UnplugFn));
194
+ plug->unplug_fns = array;
195
+ blk_io_plug_atexit_notifier.notify = blk_io_plug_atexit;
196
+ qemu_thread_atexit_add(&blk_io_plug_atexit_notifier);
197
+ }
198
+
199
+ UnplugFn *fns = (UnplugFn *)array->data;
200
+ UnplugFn new_fn = {
201
+ .fn = fn,
202
+ .opaque = opaque,
203
+ };
102
+
204
+
103
+ /*
205
+ /*
104
+ * Setting lock->owners here prevents rdlock and wrlock from
206
+ * There won't be many, so do a linear search. If this becomes a bottleneck
105
+ * sneaking in between unlock and wake.
207
+ * then a binary search (glib 2.62+) or different data structure could be
208
+ * used.
106
+ */
209
+ */
107
+
210
+ for (guint i = 0; i < array->len; i++) {
108
+ if (tkt) {
211
+ if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) {
109
+ if (tkt->read) {
212
+ return; /* already exists */
110
+ if (lock->owners >= 0) {
111
+ lock->owners++;
112
+ co = tkt->co;
113
+ }
114
+ } else {
115
+ if (lock->owners == 0) {
116
+ lock->owners = -1;
117
+ co = tkt->co;
118
+ }
119
+ }
213
+ }
120
+ }
214
+ }
121
+
215
+
122
+ if (co) {
216
+ g_array_append_val(array, new_fn);
123
+ QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next);
217
+}
124
+ qemu_co_mutex_unlock(&lock->mutex);
218
+
125
+ aio_co_wake(co);
219
+/**
126
+ } else {
220
+ * blk_io_plug: Defer blk_io_plug_call() functions until blk_io_unplug()
127
+ qemu_co_mutex_unlock(&lock->mutex);
221
+ *
128
+ }
222
+ * blk_io_plug/unplug are thread-local operations. This means that multiple
223
+ * threads can simultaneously call plug/unplug, but the caller must ensure that
224
+ * each unplug() is called in the same thread of the matching plug().
225
+ *
226
+ * Nesting is supported. blk_io_plug_call() functions are only called at the
227
+ * outermost blk_io_unplug().
228
+ */
229
+void blk_io_plug(void)
230
+{
231
+ Plug *plug = get_ptr_plug();
232
+
233
+ assert(plug->count < UINT32_MAX);
234
+
235
+ plug->count++;
236
+}
237
+
238
+/**
239
+ * blk_io_unplug: Run any pending blk_io_plug_call() functions
240
+ *
241
+ * There must have been a matching blk_io_plug() call in the same thread prior
242
+ * to this blk_io_unplug() call.
243
+ */
244
+void blk_io_unplug(void)
245
+{
246
+ Plug *plug = get_ptr_plug();
247
+
248
+ assert(plug->count > 0);
249
+
250
+ if (--plug->count > 0) {
251
+ return;
252
+ }
253
+
254
+ GArray *array = plug->unplug_fns;
255
+ if (!array) {
256
+ return;
257
+ }
258
+
259
+ UnplugFn *fns = (UnplugFn *)array->data;
260
+
261
+ for (guint i = 0; i < array->len; i++) {
262
+ fns[i].fn(fns[i].opaque);
263
+ }
264
+
265
+ /*
266
+ * This resets the array without freeing memory so that appending is cheap
267
+ * in the future.
268
+ */
269
+ g_array_set_size(array, 0);
270
+}
271
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
272
index XXXXXXX..XXXXXXX 100644
273
--- a/hw/block/dataplane/xen-block.c
274
+++ b/hw/block/dataplane/xen-block.c
275
@@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
276
* is below us.
277
*/
278
if (inflight_atstart > IO_PLUG_THRESHOLD) {
279
- blk_io_plug(dataplane->blk);
280
+ blk_io_plug();
281
}
282
while (rc != rp) {
283
/* pull request from ring */
284
@@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
285
286
if (inflight_atstart > IO_PLUG_THRESHOLD &&
287
batched >= inflight_atstart) {
288
- blk_io_unplug(dataplane->blk);
289
+ blk_io_unplug();
290
}
291
xen_block_do_aio(request);
292
if (inflight_atstart > IO_PLUG_THRESHOLD) {
293
if (batched >= inflight_atstart) {
294
- blk_io_plug(dataplane->blk);
295
+ blk_io_plug();
296
batched = 0;
297
} else {
298
batched++;
299
@@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
300
}
301
}
302
if (inflight_atstart > IO_PLUG_THRESHOLD) {
303
- blk_io_unplug(dataplane->blk);
304
+ blk_io_unplug();
305
}
306
307
return done_something;
308
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
309
index XXXXXXX..XXXXXXX 100644
310
--- a/hw/block/virtio-blk.c
311
+++ b/hw/block/virtio-blk.c
312
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
313
bool suppress_notifications = virtio_queue_get_notification(vq);
314
315
aio_context_acquire(blk_get_aio_context(s->blk));
316
- blk_io_plug(s->blk);
317
+ blk_io_plug();
318
319
do {
320
if (suppress_notifications) {
321
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
322
virtio_blk_submit_multireq(s, &mrb);
323
}
324
325
- blk_io_unplug(s->blk);
326
+ blk_io_unplug();
327
aio_context_release(blk_get_aio_context(s->blk));
129
}
328
}
130
329
131
void qemu_co_rwlock_rdlock(CoRwlock *lock)
330
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
132
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock)
331
index XXXXXXX..XXXXXXX 100644
133
332
--- a/hw/scsi/virtio-scsi.c
134
qemu_co_mutex_lock(&lock->mutex);
333
+++ b/hw/scsi/virtio-scsi.c
135
/* For fairness, wait if a writer is in line. */
334
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
136
- while (lock->pending_writer) {
335
return -ENOBUFS;
137
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
336
}
138
- }
337
scsi_req_ref(req->sreq);
139
- lock->reader++;
338
- blk_io_plug(d->conf.blk);
140
- qemu_co_mutex_unlock(&lock->mutex);
339
+ blk_io_plug();
141
-
340
object_unref(OBJECT(d));
142
- /* The rest of the read-side critical section is run without the mutex. */
341
return 0;
143
- self->locks_held++;
144
-}
145
-
146
-void qemu_co_rwlock_unlock(CoRwlock *lock)
147
-{
148
- Coroutine *self = qemu_coroutine_self();
149
-
150
- assert(qemu_in_coroutine());
151
- if (!lock->reader) {
152
- /* The critical section started in qemu_co_rwlock_wrlock. */
153
- qemu_co_queue_restart_all(&lock->queue);
154
+ if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) {
155
+ lock->owners++;
156
+ qemu_co_mutex_unlock(&lock->mutex);
157
} else {
158
- self->locks_held--;
159
+ CoRwTicket my_ticket = { true, self };
160
161
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
162
+ qemu_co_mutex_unlock(&lock->mutex);
163
+ qemu_coroutine_yield();
164
+ assert(lock->owners >= 1);
165
+
166
+ /* Possibly wake another reader, which will wake the next in line. */
167
qemu_co_mutex_lock(&lock->mutex);
168
- lock->reader--;
169
- assert(lock->reader >= 0);
170
- /* Wakeup only one waiting writer */
171
- if (!lock->reader) {
172
- qemu_co_queue_next(&lock->queue);
173
- }
174
+ qemu_co_rwlock_maybe_wake_one(lock);
175
}
176
- qemu_co_mutex_unlock(&lock->mutex);
177
+
178
+ self->locks_held++;
179
+}
180
+
181
+void qemu_co_rwlock_unlock(CoRwlock *lock)
182
+{
183
+ Coroutine *self = qemu_coroutine_self();
184
+
185
+ assert(qemu_in_coroutine());
186
+ self->locks_held--;
187
+
188
+ qemu_co_mutex_lock(&lock->mutex);
189
+ if (lock->owners > 0) {
190
+ lock->owners--;
191
+ } else {
192
+ assert(lock->owners == -1);
193
+ lock->owners = 0;
194
+ }
195
+
196
+ qemu_co_rwlock_maybe_wake_one(lock);
197
}
342
}
198
343
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
199
void qemu_co_rwlock_downgrade(CoRwlock *lock)
344
if (scsi_req_enqueue(sreq)) {
200
{
345
scsi_req_continue(sreq);
201
- Coroutine *self = qemu_coroutine_self();
346
}
202
+ qemu_co_mutex_lock(&lock->mutex);
347
- blk_io_unplug(sreq->dev->conf.blk);
203
+ assert(lock->owners == -1);
348
+ blk_io_unplug();
204
+ lock->owners = 1;
349
scsi_req_unref(sreq);
205
206
- /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
207
- * qemu_co_rwlock_upgrade.
208
- */
209
- assert(lock->reader == 0);
210
- lock->reader++;
211
- qemu_co_mutex_unlock(&lock->mutex);
212
-
213
- /* The rest of the read-side critical section is run without the mutex. */
214
- self->locks_held++;
215
+ /* Possibly wake another reader, which will wake the next in line. */
216
+ qemu_co_rwlock_maybe_wake_one(lock);
217
}
350
}
218
351
219
void qemu_co_rwlock_wrlock(CoRwlock *lock)
352
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
220
{
353
while (!QTAILQ_EMPTY(&reqs)) {
221
+ Coroutine *self = qemu_coroutine_self();
354
req = QTAILQ_FIRST(&reqs);
222
+
355
QTAILQ_REMOVE(&reqs, req, next);
223
qemu_co_mutex_lock(&lock->mutex);
356
- blk_io_unplug(req->sreq->dev->conf.blk);
224
- lock->pending_writer++;
357
+ blk_io_unplug();
225
- while (lock->reader) {
358
scsi_req_unref(req->sreq);
226
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
359
virtqueue_detach_element(req->vq, &req->elem, 0);
227
+ if (lock->owners == 0) {
360
virtio_scsi_free_req(req);
228
+ lock->owners = -1;
361
diff --git a/block/meson.build b/block/meson.build
229
+ qemu_co_mutex_unlock(&lock->mutex);
362
index XXXXXXX..XXXXXXX 100644
230
+ } else {
363
--- a/block/meson.build
231
+ CoRwTicket my_ticket = { false, qemu_coroutine_self() };
364
+++ b/block/meson.build
232
+
365
@@ -XXX,XX +XXX,XX @@ block_ss.add(files(
233
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
366
'mirror.c',
234
+ qemu_co_mutex_unlock(&lock->mutex);
367
'nbd.c',
235
+ qemu_coroutine_yield();
368
'null.c',
236
+ assert(lock->owners == -1);
369
+ 'plug.c',
237
}
370
'qapi.c',
238
- lock->pending_writer--;
371
'qcow2-bitmap.c',
239
372
'qcow2-cache.c',
240
- /* The rest of the write-side critical section is run with
241
- * the mutex taken, so that lock->reader remains zero.
242
- * There is no need to update self->locks_held.
243
- */
244
+ self->locks_held++;
245
}
246
247
void qemu_co_rwlock_upgrade(CoRwlock *lock)
248
{
249
- Coroutine *self = qemu_coroutine_self();
250
-
251
qemu_co_mutex_lock(&lock->mutex);
252
- assert(lock->reader > 0);
253
- lock->reader--;
254
- lock->pending_writer++;
255
- while (lock->reader) {
256
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
257
+ assert(lock->owners > 0);
258
+ /* For fairness, wait if a writer is in line. */
259
+ if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) {
260
+ lock->owners = -1;
261
+ qemu_co_mutex_unlock(&lock->mutex);
262
+ } else {
263
+ CoRwTicket my_ticket = { false, qemu_coroutine_self() };
264
+
265
+ lock->owners--;
266
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
267
+ qemu_co_rwlock_maybe_wake_one(lock);
268
+ qemu_coroutine_yield();
269
+ assert(lock->owners == -1);
270
}
271
- lock->pending_writer--;
272
-
273
- /* The rest of the write-side critical section is run with
274
- * the mutex taken, similar to qemu_co_rwlock_wrlock. Do
275
- * not account for the lock twice in self->locks_held.
276
- */
277
- self->locks_held--;
278
}
279
--
373
--
280
2.30.2
374
2.40.1
281
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
Stop using the .bdrv_co_io_plug() API because it is not multi-queue
2
block layer friendly. Use the new blk_io_plug_call() API to batch I/O
3
submission instead.
2
4
3
Test that downgrading an rwlock does not result in a failure to
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
schedule coroutines queued on the rwlock.
6
Reviewed-by: Eric Blake <eblake@redhat.com>
5
7
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
6
The diagram associated with test_co_rwlock_downgrade() describes the
8
Acked-by: Kevin Wolf <kwolf@redhat.com>
7
intended behaviour, but what was observed previously corresponds to:
9
Message-id: 20230530180959.1108766-3-stefanha@redhat.com
8
9
| c1 | c2 | c3 | c4 |
10
|--------+------------+------------+----------|
11
| rdlock | | | |
12
| yield | | | |
13
| | wrlock | | |
14
| | <queued> | | |
15
| | | rdlock | |
16
| | | <queued> | |
17
| | | | wrlock |
18
| | | | <queued> |
19
| unlock | | | |
20
| yield | | | |
21
| | <dequeued> | | |
22
| | downgrade | | |
23
| | ... | | |
24
| | unlock | | |
25
| | | <dequeued> | |
26
| | | <queued> | |
27
28
This results in a failure...
29
30
ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done)
31
Bail out! ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done)
32
33
...as a result of the c3 coroutine failing to run to completion.
34
35
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
36
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
37
Message-id: 20210325112941.365238-7-pbonzini@redhat.com
38
Message-Id: <20210309144015.557477-5-david.edmondson@oracle.com>
39
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
40
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
41
---
11
---
42
tests/unit/test-coroutine.c | 99 +++++++++++++++++++++++++++++++++++++
12
block/nvme.c | 44 ++++++++++++--------------------------------
43
1 file changed, 99 insertions(+)
13
block/trace-events | 1 -
14
2 files changed, 12 insertions(+), 33 deletions(-)
44
15
45
diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c
16
diff --git a/block/nvme.c b/block/nvme.c
46
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
47
--- a/tests/unit/test-coroutine.c
18
--- a/block/nvme.c
48
+++ b/tests/unit/test-coroutine.c
19
+++ b/block/nvme.c
49
@@ -XXX,XX +XXX,XX @@ static void test_co_rwlock_upgrade(void)
20
@@ -XXX,XX +XXX,XX @@
50
g_assert(c2_done);
21
#include "qemu/vfio-helpers.h"
22
#include "block/block-io.h"
23
#include "block/block_int.h"
24
+#include "sysemu/block-backend.h"
25
#include "sysemu/replay.h"
26
#include "trace.h"
27
28
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
29
int blkshift;
30
31
uint64_t max_transfer;
32
- bool plugged;
33
34
bool supports_write_zeroes;
35
bool supports_discard;
36
@@ -XXX,XX +XXX,XX @@ static void nvme_kick(NVMeQueuePair *q)
37
{
38
BDRVNVMeState *s = q->s;
39
40
- if (s->plugged || !q->need_kick) {
41
+ if (!q->need_kick) {
42
return;
43
}
44
trace_nvme_kick(s, q->index);
45
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
46
NvmeCqe *c;
47
48
trace_nvme_process_completion(s, q->index, q->inflight);
49
- if (s->plugged) {
50
- trace_nvme_process_completion_queue_plugged(s, q->index);
51
- return false;
52
- }
53
54
/*
55
* Support re-entrancy when a request cb() function invokes aio_poll().
56
@@ -XXX,XX +XXX,XX @@ static void nvme_trace_command(const NvmeCmd *cmd)
57
}
51
}
58
}
52
59
53
+static void coroutine_fn rwlock_rdlock_yield(void *opaque)
60
+static void nvme_unplug_fn(void *opaque)
54
+{
61
+{
55
+ qemu_co_rwlock_rdlock(&rwlock);
62
+ NVMeQueuePair *q = opaque;
56
+ qemu_coroutine_yield();
57
+
63
+
58
+ qemu_co_rwlock_unlock(&rwlock);
64
+ QEMU_LOCK_GUARD(&q->lock);
59
+ qemu_coroutine_yield();
65
+ nvme_kick(q);
60
+
66
+ nvme_process_completion(q);
61
+ *(bool *)opaque = true;
62
+}
67
+}
63
+
68
+
64
+static void coroutine_fn rwlock_wrlock_downgrade(void *opaque)
69
static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
65
+{
70
NvmeCmd *cmd, BlockCompletionFunc cb,
66
+ qemu_co_rwlock_wrlock(&rwlock);
71
void *opaque)
67
+
72
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
68
+ qemu_co_rwlock_downgrade(&rwlock);
73
q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
69
+ qemu_co_rwlock_unlock(&rwlock);
74
q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
70
+ *(bool *)opaque = true;
75
q->need_kick++;
71
+}
76
- nvme_kick(q);
72
+
77
- nvme_process_completion(q);
73
+static void coroutine_fn rwlock_rdlock(void *opaque)
78
+ blk_io_plug_call(nvme_unplug_fn, q);
74
+{
79
qemu_mutex_unlock(&q->lock);
75
+ qemu_co_rwlock_rdlock(&rwlock);
80
}
76
+
81
77
+ qemu_co_rwlock_unlock(&rwlock);
82
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
78
+ *(bool *)opaque = true;
83
}
79
+}
84
}
80
+
85
81
+static void coroutine_fn rwlock_wrlock(void *opaque)
86
-static void coroutine_fn nvme_co_io_plug(BlockDriverState *bs)
82
+{
87
-{
83
+ qemu_co_rwlock_wrlock(&rwlock);
88
- BDRVNVMeState *s = bs->opaque;
84
+
89
- assert(!s->plugged);
85
+ qemu_co_rwlock_unlock(&rwlock);
90
- s->plugged = true;
86
+ *(bool *)opaque = true;
91
-}
87
+}
92
-
88
+
93
-static void coroutine_fn nvme_co_io_unplug(BlockDriverState *bs)
89
+/*
94
-{
90
+ * Check that downgrading a reader-writer lock does not cause a hang.
95
- BDRVNVMeState *s = bs->opaque;
91
+ *
96
- assert(s->plugged);
92
+ * Four coroutines are used to produce a situation where there are
97
- s->plugged = false;
93
+ * both reader and writer hopefuls waiting to acquire an rwlock that
98
- for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) {
94
+ * is held by a reader.
99
- NVMeQueuePair *q = s->queues[i];
95
+ *
100
- qemu_mutex_lock(&q->lock);
96
+ * The correct sequence of operations we aim to provoke can be
101
- nvme_kick(q);
97
+ * represented as:
102
- nvme_process_completion(q);
98
+ *
103
- qemu_mutex_unlock(&q->lock);
99
+ * | c1 | c2 | c3 | c4 |
104
- }
100
+ * |--------+------------+------------+------------|
105
-}
101
+ * | rdlock | | | |
106
-
102
+ * | yield | | | |
107
static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size,
103
+ * | | wrlock | | |
108
Error **errp)
104
+ * | | <queued> | | |
109
{
105
+ * | | | rdlock | |
110
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = {
106
+ * | | | <queued> | |
111
.bdrv_detach_aio_context = nvme_detach_aio_context,
107
+ * | | | | wrlock |
112
.bdrv_attach_aio_context = nvme_attach_aio_context,
108
+ * | | | | <queued> |
113
109
+ * | unlock | | | |
114
- .bdrv_co_io_plug = nvme_co_io_plug,
110
+ * | yield | | | |
115
- .bdrv_co_io_unplug = nvme_co_io_unplug,
111
+ * | | <dequeued> | | |
116
-
112
+ * | | downgrade | | |
117
.bdrv_register_buf = nvme_register_buf,
113
+ * | | | <dequeued> | |
118
.bdrv_unregister_buf = nvme_unregister_buf,
114
+ * | | | unlock | |
119
};
115
+ * | | ... | | |
120
diff --git a/block/trace-events b/block/trace-events
116
+ * | | unlock | | |
121
index XXXXXXX..XXXXXXX 100644
117
+ * | | | | <dequeued> |
122
--- a/block/trace-events
118
+ * | | | | unlock |
123
+++ b/block/trace-events
119
+ */
124
@@ -XXX,XX +XXX,XX @@ nvme_kick(void *s, unsigned q_index) "s %p q #%u"
120
+static void test_co_rwlock_downgrade(void)
125
nvme_dma_flush_queue_wait(void *s) "s %p"
121
+{
126
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
122
+ bool c1_done = false;
127
nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d"
123
+ bool c2_done = false;
128
-nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u"
124
+ bool c3_done = false;
129
nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
125
+ bool c4_done = false;
130
nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
126
+ Coroutine *c1, *c2, *c3, *c4;
131
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
127
+
128
+ qemu_co_rwlock_init(&rwlock);
129
+
130
+ c1 = qemu_coroutine_create(rwlock_rdlock_yield, &c1_done);
131
+ c2 = qemu_coroutine_create(rwlock_wrlock_downgrade, &c2_done);
132
+ c3 = qemu_coroutine_create(rwlock_rdlock, &c3_done);
133
+ c4 = qemu_coroutine_create(rwlock_wrlock, &c4_done);
134
+
135
+ qemu_coroutine_enter(c1);
136
+ qemu_coroutine_enter(c2);
137
+ qemu_coroutine_enter(c3);
138
+ qemu_coroutine_enter(c4);
139
+
140
+ qemu_coroutine_enter(c1);
141
+
142
+ g_assert(c2_done);
143
+ g_assert(c3_done);
144
+ g_assert(c4_done);
145
+
146
+ qemu_coroutine_enter(c1);
147
+
148
+ g_assert(c1_done);
149
+}
150
+
151
/*
152
* Check that creation, enter, and return work
153
*/
154
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
155
g_test_add_func("/locking/co-mutex", test_co_mutex);
156
g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable);
157
g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade);
158
+ g_test_add_func("/locking/co-rwlock/downgrade", test_co_rwlock_downgrade);
159
if (g_test_perf()) {
160
g_test_add_func("/perf/lifecycle", perf_lifecycle);
161
g_test_add_func("/perf/nesting", perf_nesting);
162
--
132
--
163
2.30.2
133
2.40.1
164
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
Stop using the .bdrv_co_io_plug() API because it is not multi-queue
2
block layer friendly. Use the new blk_io_plug_call() API to batch I/O
3
submission instead.
2
4
3
Test that rwlock upgrade is fair, and that readers go back to sleep if
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
a writer is in line.
6
Reviewed-by: Eric Blake <eblake@redhat.com>
5
7
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Acked-by: Kevin Wolf <kwolf@redhat.com>
7
Message-id: 20210325112941.365238-6-pbonzini@redhat.com
9
Message-id: 20230530180959.1108766-4-stefanha@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
11
---
10
tests/unit/test-coroutine.c | 62 +++++++++++++++++++++++++++++++++++++
12
block/blkio.c | 43 ++++++++++++++++++++++++-------------------
11
1 file changed, 62 insertions(+)
13
1 file changed, 24 insertions(+), 19 deletions(-)
12
14
13
diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c
15
diff --git a/block/blkio.c b/block/blkio.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/unit/test-coroutine.c
17
--- a/block/blkio.c
16
+++ b/tests/unit/test-coroutine.c
18
+++ b/block/blkio.c
17
@@ -XXX,XX +XXX,XX @@ static void test_co_mutex_lockable(void)
19
@@ -XXX,XX +XXX,XX @@
18
g_assert(QEMU_MAKE_LOCKABLE(null_pointer) == NULL);
20
#include "qemu/error-report.h"
21
#include "qapi/qmp/qdict.h"
22
#include "qemu/module.h"
23
+#include "sysemu/block-backend.h"
24
#include "exec/memory.h" /* for ram_block_discard_disable() */
25
26
#include "block/block-io.h"
27
@@ -XXX,XX +XXX,XX @@ static void blkio_detach_aio_context(BlockDriverState *bs)
28
NULL, NULL, NULL);
19
}
29
}
20
30
21
+static CoRwlock rwlock;
31
-/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
32
-static void blkio_submit_io(BlockDriverState *bs)
33
+/*
34
+ * Called by blk_io_unplug() or immediately if not plugged. Called without
35
+ * blkio_lock.
36
+ */
37
+static void blkio_unplug_fn(void *opaque)
38
{
39
- if (qatomic_read(&bs->io_plugged) == 0) {
40
- BDRVBlkioState *s = bs->opaque;
41
+ BDRVBlkioState *s = opaque;
42
43
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
44
blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
45
}
46
}
47
48
+/*
49
+ * Schedule I/O submission after enqueuing a new request. Called without
50
+ * blkio_lock.
51
+ */
52
+static void blkio_submit_io(BlockDriverState *bs)
53
+{
54
+ BDRVBlkioState *s = bs->opaque;
22
+
55
+
23
+/* Test that readers are properly sent back to the queue when upgrading,
56
+ blk_io_plug_call(blkio_unplug_fn, s);
24
+ * even if they are the sole readers. The test scenario is as follows:
25
+ *
26
+ *
27
+ * | c1 | c2 |
28
+ * |--------------+------------+
29
+ * | rdlock | |
30
+ * | yield | |
31
+ * | | wrlock |
32
+ * | | <queued> |
33
+ * | upgrade | |
34
+ * | <queued> | <dequeued> |
35
+ * | | unlock |
36
+ * | <dequeued> | |
37
+ * | unlock | |
38
+ */
39
+
40
+static void coroutine_fn rwlock_yield_upgrade(void *opaque)
41
+{
42
+ qemu_co_rwlock_rdlock(&rwlock);
43
+ qemu_coroutine_yield();
44
+
45
+ qemu_co_rwlock_upgrade(&rwlock);
46
+ qemu_co_rwlock_unlock(&rwlock);
47
+
48
+ *(bool *)opaque = true;
49
+}
57
+}
50
+
58
+
51
+static void coroutine_fn rwlock_wrlock_yield(void *opaque)
59
static int coroutine_fn
52
+{
60
blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
53
+ qemu_co_rwlock_wrlock(&rwlock);
61
{
54
+ qemu_coroutine_yield();
62
@@ -XXX,XX +XXX,XX @@ blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
55
+
63
56
+ qemu_co_rwlock_unlock(&rwlock);
64
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
57
+ *(bool *)opaque = true;
65
blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
58
+}
66
- blkio_submit_io(bs);
59
+
67
}
60
+static void test_co_rwlock_upgrade(void)
68
61
+{
69
+ blkio_submit_io(bs);
62
+ bool c1_done = false;
70
qemu_coroutine_yield();
63
+ bool c2_done = false;
71
return cod.ret;
64
+ Coroutine *c1, *c2;
72
}
65
+
73
@@ -XXX,XX +XXX,XX @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
66
+ qemu_co_rwlock_init(&rwlock);
74
67
+ c1 = qemu_coroutine_create(rwlock_yield_upgrade, &c1_done);
75
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
68
+ c2 = qemu_coroutine_create(rwlock_wrlock_yield, &c2_done);
76
blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
69
+
77
- blkio_submit_io(bs);
70
+ qemu_coroutine_enter(c1);
78
}
71
+ qemu_coroutine_enter(c2);
79
72
+
80
+ blkio_submit_io(bs);
73
+ /* c1 now should go to sleep. */
81
qemu_coroutine_yield();
74
+ qemu_coroutine_enter(c1);
82
75
+ g_assert(!c1_done);
83
if (use_bounce_buffer) {
76
+
84
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
77
+ qemu_coroutine_enter(c2);
85
78
+ g_assert(c1_done);
86
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
79
+ g_assert(c2_done);
87
blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
80
+}
88
- blkio_submit_io(bs);
81
+
89
}
82
/*
90
83
* Check that creation, enter, and return work
91
+ blkio_submit_io(bs);
84
*/
92
qemu_coroutine_yield();
85
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
93
86
g_test_add_func("/basic/order", test_order);
94
if (use_bounce_buffer) {
87
g_test_add_func("/locking/co-mutex", test_co_mutex);
95
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
88
g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable);
96
89
+ g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade);
97
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
90
if (g_test_perf()) {
98
blkioq_flush(s->blkioq, &cod, 0);
91
g_test_add_func("/perf/lifecycle", perf_lifecycle);
99
- blkio_submit_io(bs);
92
g_test_add_func("/perf/nesting", perf_nesting);
100
}
101
102
+ blkio_submit_io(bs);
103
qemu_coroutine_yield();
104
return cod.ret;
105
}
106
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
107
108
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
109
blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
110
- blkio_submit_io(bs);
111
}
112
113
+ blkio_submit_io(bs);
114
qemu_coroutine_yield();
115
return cod.ret;
116
}
117
118
-static void coroutine_fn blkio_co_io_unplug(BlockDriverState *bs)
119
-{
120
- BDRVBlkioState *s = bs->opaque;
121
-
122
- WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
123
- blkio_submit_io(bs);
124
- }
125
-}
126
-
127
typedef enum {
128
BMRR_OK,
129
BMRR_SKIP,
130
@@ -XXX,XX +XXX,XX @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
131
.bdrv_co_pwritev = blkio_co_pwritev, \
132
.bdrv_co_flush_to_disk = blkio_co_flush, \
133
.bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
134
- .bdrv_co_io_unplug = blkio_co_io_unplug, \
135
.bdrv_refresh_limits = blkio_refresh_limits, \
136
.bdrv_register_buf = blkio_register_buf, \
137
.bdrv_unregister_buf = blkio_unregister_buf, \
93
--
138
--
94
2.30.2
139
2.40.1
95
diff view generated by jsdifflib
New patch
1
Stop using the .bdrv_co_io_plug() API because it is not multi-queue
2
block layer friendly. Use the new blk_io_plug_call() API to batch I/O
3
submission instead.
1
4
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
8
Acked-by: Kevin Wolf <kwolf@redhat.com>
9
Message-id: 20230530180959.1108766-5-stefanha@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
include/block/raw-aio.h | 7 -------
13
block/file-posix.c | 10 ----------
14
block/io_uring.c | 44 ++++++++++++++++-------------------------
15
block/trace-events | 5 ++---
16
4 files changed, 19 insertions(+), 47 deletions(-)
17
18
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/raw-aio.h
21
+++ b/include/block/raw-aio.h
22
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
23
QEMUIOVector *qiov, int type);
24
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
25
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
26
-
27
-/*
28
- * luring_io_plug/unplug work in the thread's current AioContext, therefore the
29
- * caller must ensure that they are paired in the same IOThread.
30
- */
31
-void luring_io_plug(void);
32
-void luring_io_unplug(void);
33
#endif
34
35
#ifdef _WIN32
36
diff --git a/block/file-posix.c b/block/file-posix.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/file-posix.c
39
+++ b/block/file-posix.c
40
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs)
41
laio_io_plug();
42
}
43
#endif
44
-#ifdef CONFIG_LINUX_IO_URING
45
- if (s->use_linux_io_uring) {
46
- luring_io_plug();
47
- }
48
-#endif
49
}
50
51
static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
52
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
53
laio_io_unplug(s->aio_max_batch);
54
}
55
#endif
56
-#ifdef CONFIG_LINUX_IO_URING
57
- if (s->use_linux_io_uring) {
58
- luring_io_unplug();
59
- }
60
-#endif
61
}
62
63
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
64
diff --git a/block/io_uring.c b/block/io_uring.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/block/io_uring.c
67
+++ b/block/io_uring.c
68
@@ -XXX,XX +XXX,XX @@
69
#include "block/raw-aio.h"
70
#include "qemu/coroutine.h"
71
#include "qapi/error.h"
72
+#include "sysemu/block-backend.h"
73
#include "trace.h"
74
75
/* Only used for assertions. */
76
@@ -XXX,XX +XXX,XX @@ typedef struct LuringAIOCB {
77
} LuringAIOCB;
78
79
typedef struct LuringQueue {
80
- int plugged;
81
unsigned int in_queue;
82
unsigned int in_flight;
83
bool blocked;
84
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions_and_submit(LuringState *s)
85
{
86
luring_process_completions(s);
87
88
- if (!s->io_q.plugged && s->io_q.in_queue > 0) {
89
+ if (s->io_q.in_queue > 0) {
90
ioq_submit(s);
91
}
92
}
93
@@ -XXX,XX +XXX,XX @@ static void qemu_luring_poll_ready(void *opaque)
94
static void ioq_init(LuringQueue *io_q)
95
{
96
QSIMPLEQ_INIT(&io_q->submit_queue);
97
- io_q->plugged = 0;
98
io_q->in_queue = 0;
99
io_q->in_flight = 0;
100
io_q->blocked = false;
101
}
102
103
-void luring_io_plug(void)
104
+static void luring_unplug_fn(void *opaque)
105
{
106
- AioContext *ctx = qemu_get_current_aio_context();
107
- LuringState *s = aio_get_linux_io_uring(ctx);
108
- trace_luring_io_plug(s);
109
- s->io_q.plugged++;
110
-}
111
-
112
-void luring_io_unplug(void)
113
-{
114
- AioContext *ctx = qemu_get_current_aio_context();
115
- LuringState *s = aio_get_linux_io_uring(ctx);
116
- assert(s->io_q.plugged);
117
- trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
118
- s->io_q.in_queue, s->io_q.in_flight);
119
- if (--s->io_q.plugged == 0 &&
120
- !s->io_q.blocked && s->io_q.in_queue > 0) {
121
+ LuringState *s = opaque;
122
+ trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue,
123
+ s->io_q.in_flight);
124
+ if (!s->io_q.blocked && s->io_q.in_queue > 0) {
125
ioq_submit(s);
126
}
127
}
128
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
129
130
QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
131
s->io_q.in_queue++;
132
- trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged,
133
- s->io_q.in_queue, s->io_q.in_flight);
134
- if (!s->io_q.blocked &&
135
- (!s->io_q.plugged ||
136
- s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
137
- ret = ioq_submit(s);
138
- trace_luring_do_submit_done(s, ret);
139
- return ret;
140
+ trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue,
141
+ s->io_q.in_flight);
142
+ if (!s->io_q.blocked) {
143
+ if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) {
144
+ ret = ioq_submit(s);
145
+ trace_luring_do_submit_done(s, ret);
146
+ return ret;
147
+ }
148
+
149
+ blk_io_plug_call(luring_unplug_fn, s);
150
}
151
return 0;
152
}
153
diff --git a/block/trace-events b/block/trace-events
154
index XXXXXXX..XXXXXXX 100644
155
--- a/block/trace-events
156
+++ b/block/trace-events
157
@@ -XXX,XX +XXX,XX @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
158
# io_uring.c
159
luring_init_state(void *s, size_t size) "s %p size %zu"
160
luring_cleanup_state(void *s) "%p freed"
161
-luring_io_plug(void *s) "LuringState %p plug"
162
-luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
163
-luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
164
+luring_unplug_fn(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
165
+luring_do_submit(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
166
luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d"
167
luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d"
168
luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d"
169
--
170
2.40.1
diff view generated by jsdifflib
New patch
1
1
Stop using the .bdrv_co_io_plug() API because it is not multi-queue
2
block layer friendly. Use the new blk_io_plug_call() API to batch I/O
3
submission instead.
4
5
Note that a dev_max_batch check is dropped in laio_io_unplug() because
6
the semantics of unplug_fn() are different from .bdrv_co_unplug():
7
1. unplug_fn() is only called when the last blk_io_unplug() call occurs,
8
not every time blk_io_unplug() is called.
9
2. unplug_fn() is per-thread, not per-BlockDriverState, so there is no
10
way to get per-BlockDriverState fields like dev_max_batch.
11
12
Therefore this condition cannot be moved to laio_unplug_fn(). It is not
13
obvious that this condition affects performance in practice, so I am
14
removing it instead of trying to come up with a more complex mechanism
15
to preserve the condition.
16
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Reviewed-by: Eric Blake <eblake@redhat.com>
19
Acked-by: Kevin Wolf <kwolf@redhat.com>
20
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
21
Message-id: 20230530180959.1108766-6-stefanha@redhat.com
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
---
24
include/block/raw-aio.h | 7 -------
25
block/file-posix.c | 28 ----------------------------
26
block/linux-aio.c | 41 +++++++++++------------------------------
27
3 files changed, 11 insertions(+), 65 deletions(-)
28
29
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/block/raw-aio.h
32
+++ b/include/block/raw-aio.h
33
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
34
35
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
36
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
37
-
38
-/*
39
- * laio_io_plug/unplug work in the thread's current AioContext, therefore the
40
- * caller must ensure that they are paired in the same IOThread.
41
- */
42
-void laio_io_plug(void);
43
-void laio_io_unplug(uint64_t dev_max_batch);
44
#endif
45
/* io_uring.c - Linux io_uring implementation */
46
#ifdef CONFIG_LINUX_IO_URING
47
diff --git a/block/file-posix.c b/block/file-posix.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/file-posix.c
50
+++ b/block/file-posix.c
51
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
52
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
53
}
54
55
-static void coroutine_fn raw_co_io_plug(BlockDriverState *bs)
56
-{
57
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
58
-#ifdef CONFIG_LINUX_AIO
59
- if (s->use_linux_aio) {
60
- laio_io_plug();
61
- }
62
-#endif
63
-}
64
-
65
-static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
66
-{
67
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
68
-#ifdef CONFIG_LINUX_AIO
69
- if (s->use_linux_aio) {
70
- laio_io_unplug(s->aio_max_batch);
71
- }
72
-#endif
73
-}
74
-
75
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
76
{
77
BDRVRawState *s = bs->opaque;
78
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
79
.bdrv_co_copy_range_from = raw_co_copy_range_from,
80
.bdrv_co_copy_range_to = raw_co_copy_range_to,
81
.bdrv_refresh_limits = raw_refresh_limits,
82
- .bdrv_co_io_plug = raw_co_io_plug,
83
- .bdrv_co_io_unplug = raw_co_io_unplug,
84
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
85
86
.bdrv_co_truncate = raw_co_truncate,
87
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = {
88
.bdrv_co_copy_range_from = raw_co_copy_range_from,
89
.bdrv_co_copy_range_to = raw_co_copy_range_to,
90
.bdrv_refresh_limits = raw_refresh_limits,
91
- .bdrv_co_io_plug = raw_co_io_plug,
92
- .bdrv_co_io_unplug = raw_co_io_unplug,
93
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
94
95
.bdrv_co_truncate = raw_co_truncate,
96
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = {
97
.bdrv_co_pwritev = raw_co_pwritev,
98
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
99
.bdrv_refresh_limits = cdrom_refresh_limits,
100
- .bdrv_co_io_plug = raw_co_io_plug,
101
- .bdrv_co_io_unplug = raw_co_io_unplug,
102
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
103
104
.bdrv_co_truncate = raw_co_truncate,
105
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = {
106
.bdrv_co_pwritev = raw_co_pwritev,
107
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
108
.bdrv_refresh_limits = cdrom_refresh_limits,
109
- .bdrv_co_io_plug = raw_co_io_plug,
110
- .bdrv_co_io_unplug = raw_co_io_unplug,
111
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
112
113
.bdrv_co_truncate = raw_co_truncate,
114
diff --git a/block/linux-aio.c b/block/linux-aio.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/block/linux-aio.c
117
+++ b/block/linux-aio.c
118
@@ -XXX,XX +XXX,XX @@
119
#include "qemu/event_notifier.h"
120
#include "qemu/coroutine.h"
121
#include "qapi/error.h"
122
+#include "sysemu/block-backend.h"
123
124
/* Only used for assertions. */
125
#include "qemu/coroutine_int.h"
126
@@ -XXX,XX +XXX,XX @@ struct qemu_laiocb {
127
};
128
129
typedef struct {
130
- int plugged;
131
unsigned int in_queue;
132
unsigned int in_flight;
133
bool blocked;
134
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
135
{
136
qemu_laio_process_completions(s);
137
138
- if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
139
+ if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
140
ioq_submit(s);
141
}
142
}
143
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_poll_ready(EventNotifier *opaque)
144
static void ioq_init(LaioQueue *io_q)
145
{
146
QSIMPLEQ_INIT(&io_q->pending);
147
- io_q->plugged = 0;
148
io_q->in_queue = 0;
149
io_q->in_flight = 0;
150
io_q->blocked = false;
151
@@ -XXX,XX +XXX,XX @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
152
return max_batch;
153
}
154
155
-void laio_io_plug(void)
156
+static void laio_unplug_fn(void *opaque)
157
{
158
- AioContext *ctx = qemu_get_current_aio_context();
159
- LinuxAioState *s = aio_get_linux_aio(ctx);
160
+ LinuxAioState *s = opaque;
161
162
- s->io_q.plugged++;
163
-}
164
-
165
-void laio_io_unplug(uint64_t dev_max_batch)
166
-{
167
- AioContext *ctx = qemu_get_current_aio_context();
168
- LinuxAioState *s = aio_get_linux_aio(ctx);
169
-
170
- assert(s->io_q.plugged);
171
- s->io_q.plugged--;
172
-
173
- /*
174
- * Why max batch checking is performed here:
175
- * Another BDS may have queued requests with a higher dev_max_batch and
176
- * therefore in_queue could now exceed our dev_max_batch. Re-check the max
177
- * batch so we can honor our device's dev_max_batch.
178
- */
179
- if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
180
- (!s->io_q.plugged &&
181
- !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
182
+ if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
183
ioq_submit(s);
184
}
185
}
186
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
187
188
QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
189
s->io_q.in_queue++;
190
- if (!s->io_q.blocked &&
191
- (!s->io_q.plugged ||
192
- s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
193
- ioq_submit(s);
194
+ if (!s->io_q.blocked) {
195
+ if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
196
+ ioq_submit(s);
197
+ } else {
198
+ blk_io_plug_call(laio_unplug_fn, s);
199
+ }
200
}
201
202
return 0;
203
--
204
2.40.1
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
No block driver implements .bdrv_co_io_plug() anymore. Get rid of the
2
function pointers.
2
3
3
When taking the slow path for mutex acquisition, set the coroutine
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
value in the CoWaitRecord in push_waiter(), rather than both there and
5
Reviewed-by: Eric Blake <eblake@redhat.com>
5
in the caller.
6
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
6
7
Acked-by: Kevin Wolf <kwolf@redhat.com>
7
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-id: 20230530180959.1108766-7-stefanha@redhat.com
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Message-id: 20210325112941.365238-4-pbonzini@redhat.com
12
Message-Id: <20210309144015.557477-4-david.edmondson@oracle.com>
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
10
---
16
util/qemu-coroutine-lock.c | 1 -
11
include/block/block-io.h | 3 ---
17
1 file changed, 1 deletion(-)
12
include/block/block_int-common.h | 11 ----------
13
block/io.c | 37 --------------------------------
14
3 files changed, 51 deletions(-)
18
15
19
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
16
diff --git a/include/block/block-io.h b/include/block/block-io.h
20
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
21
--- a/util/qemu-coroutine-lock.c
18
--- a/include/block/block-io.h
22
+++ b/util/qemu-coroutine-lock.c
19
+++ b/include/block/block-io.h
23
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
20
@@ -XXX,XX +XXX,XX @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
24
unsigned old_handoff;
21
25
22
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
26
trace_qemu_co_mutex_lock_entry(mutex, self);
23
27
- w.co = self;
24
-void coroutine_fn GRAPH_RDLOCK bdrv_co_io_plug(BlockDriverState *bs);
28
push_waiter(mutex, &w);
25
-void coroutine_fn GRAPH_RDLOCK bdrv_co_io_unplug(BlockDriverState *bs);
29
26
-
30
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
27
bool coroutine_fn GRAPH_RDLOCK
28
bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
29
uint32_t granularity, Error **errp);
30
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/include/block/block_int-common.h
33
+++ b/include/block/block_int-common.h
34
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
35
void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)(
36
BlockDriverState *bs, BlkdebugEvent event);
37
38
- /* io queue for linux-aio */
39
- void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs);
40
- void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)(
41
- BlockDriverState *bs);
42
-
43
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
44
45
bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
46
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
47
unsigned int in_flight;
48
unsigned int serialising_in_flight;
49
50
- /*
51
- * counter for nested bdrv_io_plug.
52
- * Accessed with atomic ops.
53
- */
54
- unsigned io_plugged;
55
-
56
/* do we need to tell the quest if we have a volatile write cache? */
57
int enable_write_cache;
58
59
diff --git a/block/io.c b/block/io.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/block/io.c
62
+++ b/block/io.c
63
@@ -XXX,XX +XXX,XX @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
64
return mem;
65
}
66
67
-void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs)
68
-{
69
- BdrvChild *child;
70
- IO_CODE();
71
- assert_bdrv_graph_readable();
72
-
73
- QLIST_FOREACH(child, &bs->children, next) {
74
- bdrv_co_io_plug(child->bs);
75
- }
76
-
77
- if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
78
- BlockDriver *drv = bs->drv;
79
- if (drv && drv->bdrv_co_io_plug) {
80
- drv->bdrv_co_io_plug(bs);
81
- }
82
- }
83
-}
84
-
85
-void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs)
86
-{
87
- BdrvChild *child;
88
- IO_CODE();
89
- assert_bdrv_graph_readable();
90
-
91
- assert(bs->io_plugged);
92
- if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
93
- BlockDriver *drv = bs->drv;
94
- if (drv && drv->bdrv_co_io_unplug) {
95
- drv->bdrv_co_io_unplug(bs);
96
- }
97
- }
98
-
99
- QLIST_FOREACH(child, &bs->children, next) {
100
- bdrv_co_io_unplug(child->bs);
101
- }
102
-}
103
-
104
/* Helper that undoes bdrv_register_buf() when it fails partway through */
105
static void GRAPH_RDLOCK
106
bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
31
--
107
--
32
2.30.2
108
2.40.1
33
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
If a new bitmap entry is allocated, requiring the entire block to be
3
Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd
4
written, avoiding leaking the buffer allocated for the block should
4
passing. Let's expose this to the user, so the management layer
5
the write fail.
5
can pass the file descriptor of an already opened path.
6
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
If the libblkio virtio-blk driver supports fd passing, let's always
8
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
8
use qemu_open() to open the `path`, so we can handle fd passing
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
from the management layer through the "/dev/fdset/N" special path.
10
Acked-by: Max Reitz <mreitz@redhat.com>
10
11
Message-id: 20210325112941.365238-2-pbonzini@redhat.com
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-Id: <20210309144015.557477-2-david.edmondson@oracle.com>
12
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
13
Acked-by: Max Reitz <mreitz@redhat.com>
13
Message-id: 20230530071941.8954-2-sgarzare@redhat.com
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
15
---
17
block/vdi.c | 1 +
16
block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
18
1 file changed, 1 insertion(+)
17
1 file changed, 44 insertions(+), 9 deletions(-)
19
18
20
diff --git a/block/vdi.c b/block/vdi.c
19
diff --git a/block/blkio.c b/block/blkio.c
21
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
22
--- a/block/vdi.c
21
--- a/block/blkio.c
23
+++ b/block/vdi.c
22
+++ b/block/blkio.c
24
@@ -XXX,XX +XXX,XX @@ nonallocating_write:
23
@@ -XXX,XX +XXX,XX @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs,
25
24
{
26
logout("finished data write\n");
25
const char *path = qdict_get_try_str(options, "path");
27
if (ret < 0) {
26
BDRVBlkioState *s = bs->opaque;
28
+ g_free(block);
27
- int ret;
29
return ret;
28
+ bool fd_supported = false;
29
+ int fd, ret;
30
31
if (!path) {
32
error_setg(errp, "missing 'path' option");
33
return -EINVAL;
30
}
34
}
31
35
36
- ret = blkio_set_str(s->blkio, "path", path);
37
- qdict_del(options, "path");
38
- if (ret < 0) {
39
- error_setg_errno(errp, -ret, "failed to set path: %s",
40
- blkio_get_error_msg());
41
- return ret;
42
- }
43
-
44
if (!(flags & BDRV_O_NOCACHE)) {
45
error_setg(errp, "cache.direct=off is not supported");
46
return -EINVAL;
47
}
48
+
49
+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) {
50
+ fd_supported = true;
51
+ }
52
+
53
+ /*
54
+ * If the libblkio driver supports fd passing, let's always use qemu_open()
55
+ * to open the `path`, so we can handle fd passing from the management
56
+ * layer through the "/dev/fdset/N" special path.
57
+ */
58
+ if (fd_supported) {
59
+ int open_flags;
60
+
61
+ if (flags & BDRV_O_RDWR) {
62
+ open_flags = O_RDWR;
63
+ } else {
64
+ open_flags = O_RDONLY;
65
+ }
66
+
67
+ fd = qemu_open(path, open_flags, errp);
68
+ if (fd < 0) {
69
+ return -EINVAL;
70
+ }
71
+
72
+ ret = blkio_set_int(s->blkio, "fd", fd);
73
+ if (ret < 0) {
74
+ error_setg_errno(errp, -ret, "failed to set fd: %s",
75
+ blkio_get_error_msg());
76
+ qemu_close(fd);
77
+ return ret;
78
+ }
79
+ } else {
80
+ ret = blkio_set_str(s->blkio, "path", path);
81
+ if (ret < 0) {
82
+ error_setg_errno(errp, -ret, "failed to set path: %s",
83
+ blkio_get_error_msg());
84
+ return ret;
85
+ }
86
+ }
87
+
88
+ qdict_del(options, "path");
89
+
90
return 0;
91
}
92
32
--
93
--
33
2.30.2
94
2.40.1
34
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
Given that the block size is read from the header of the VDI file, a
3
The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd
4
wide variety of sizes might be seen. Rather than re-using a block
4
passing through the new 'fd' property.
5
sized memory region when writing the VDI header, allocate an
6
appropriately sized buffer.
7
5
8
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
6
Since now we are using qemu_open() on '@path' if the virtio-blk driver
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
supports the fd passing, let's announce it.
10
Acked-by: Max Reitz <mreitz@redhat.com>
8
In this way, the management layer can pass the file descriptor of an
11
Message-id: 20210325112941.365238-3-pbonzini@redhat.com
9
already opened vhost-vdpa character device. This is useful especially
12
Message-Id: <20210309144015.557477-3-david.edmondson@oracle.com>
10
when the device can only be accessed with certain privileges.
13
Acked-by: Max Reitz <mreitz@redhat.com>
11
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver
13
in libblkio supports it.
14
15
Suggested-by: Markus Armbruster <armbru@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
18
Message-id: 20230530071941.8954-3-sgarzare@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
20
---
17
block/vdi.c | 10 ++++++----
21
qapi/block-core.json | 6 ++++++
18
1 file changed, 6 insertions(+), 4 deletions(-)
22
meson.build | 4 ++++
23
2 files changed, 10 insertions(+)
19
24
20
diff --git a/block/vdi.c b/block/vdi.c
25
diff --git a/qapi/block-core.json b/qapi/block-core.json
21
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
22
--- a/block/vdi.c
27
--- a/qapi/block-core.json
23
+++ b/block/vdi.c
28
+++ b/qapi/block-core.json
24
@@ -XXX,XX +XXX,XX @@ nonallocating_write:
29
@@ -XXX,XX +XXX,XX @@
25
30
#
26
if (block) {
31
# @path: path to the vhost-vdpa character device.
27
/* One or more new blocks were allocated. */
32
#
28
- VdiHeader *header = (VdiHeader *) block;
33
+# Features:
29
+ VdiHeader *header;
34
+# @fdset: Member @path supports the special "/dev/fdset/N" path
30
uint8_t *base;
35
+# (since 8.1)
31
uint64_t offset;
36
+#
32
uint32_t n_sectors;
37
# Since: 7.2
33
38
##
34
+ g_free(block);
39
{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
35
+ header = g_malloc(sizeof(*header));
40
'data': { 'path': 'str' },
36
+
41
+ 'features': [ { 'name' :'fdset',
37
logout("now writing modified header\n");
42
+ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ],
38
assert(VDI_IS_ALLOCATED(bmap_first));
43
'if': 'CONFIG_BLKIO' }
39
*header = s->header;
44
40
vdi_header_to_le(header);
45
##
41
- ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader));
46
diff --git a/meson.build b/meson.build
42
- g_free(block);
47
index XXXXXXX..XXXXXXX 100644
43
- block = NULL;
48
--- a/meson.build
44
+ ret = bdrv_pwrite(bs->file, 0, header, sizeof(*header));
49
+++ b/meson.build
45
+ g_free(header);
50
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LZO', lzo.found())
46
51
config_host_data.set('CONFIG_MPATH', mpathpersist.found())
47
if (ret < 0) {
52
config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
48
return ret;
53
config_host_data.set('CONFIG_BLKIO', blkio.found())
54
+if blkio.found()
55
+ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD',
56
+ blkio.version().version_compare('>=1.3.0'))
57
+endif
58
config_host_data.set('CONFIG_CURL', curl.found())
59
config_host_data.set('CONFIG_CURSES', curses.found())
60
config_host_data.set('CONFIG_GBM', gbm.found())
49
--
61
--
50
2.30.2
62
2.40.1
51
diff view generated by jsdifflib