1 | The following changes since commit 6d40ce00c1166c317e298ad82ecf10e650c4f87d: | 1 | The following changes since commit c6a5fc2ac76c5ab709896ee1b0edd33685a67ed1: |
---|---|---|---|
2 | 2 | ||
3 | Update version for v6.0.0-rc1 release (2021-03-30 18:19:07 +0100) | 3 | decodetree: Add --output-null for meson testing (2023-05-31 19:56:42 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to b6489ac06695e257ea0a9841364577e247fdee30: | 9 | for you to fetch changes up to 98b126f5e3228a346c774e569e26689943b401dd: |
10 | 10 | ||
11 | test-coroutine: Add rwlock downgrade test (2021-03-31 10:44:21 +0100) | 11 | qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa (2023-06-01 11:08:21 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | A fix for VDI image files and more generally for CoRwlock. | 16 | - Stefano Garzarella's blkio block driver 'fd' parameter |
17 | - My thread-local blk_io_plug() series | ||
17 | 18 | ||
18 | ---------------------------------------------------------------- | 19 | ---------------------------------------------------------------- |
19 | 20 | ||
20 | David Edmondson (4): | 21 | Stefan Hajnoczi (6): |
21 | block/vdi: When writing new bmap entry fails, don't leak the buffer | 22 | block: add blk_io_plug_call() API |
22 | block/vdi: Don't assume that blocks are larger than VdiHeader | 23 | block/nvme: convert to blk_io_plug_call() API |
23 | coroutine-lock: Store the coroutine in the CoWaitRecord only once | 24 | block/blkio: convert to blk_io_plug_call() API |
24 | test-coroutine: Add rwlock downgrade test | 25 | block/io_uring: convert to blk_io_plug_call() API |
26 | block/linux-aio: convert to blk_io_plug_call() API | ||
27 | block: remove bdrv_co_io_plug() API | ||
25 | 28 | ||
26 | Paolo Bonzini (2): | 29 | Stefano Garzarella (2): |
27 | coroutine-lock: Reimplement CoRwlock to fix downgrade bug | 30 | block/blkio: use qemu_open() to support fd passing for virtio-blk |
28 | test-coroutine: Add rwlock upgrade test | 31 | qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa |
29 | 32 | ||
30 | include/qemu/coroutine.h | 17 ++-- | 33 | MAINTAINERS | 1 + |
31 | block/vdi.c | 11 ++- | 34 | qapi/block-core.json | 6 ++ |
32 | tests/unit/test-coroutine.c | 161 +++++++++++++++++++++++++++++++++++ | 35 | meson.build | 4 + |
33 | util/qemu-coroutine-lock.c | 165 +++++++++++++++++++++++------------- | 36 | include/block/block-io.h | 3 - |
34 | 4 files changed, 282 insertions(+), 72 deletions(-) | 37 | include/block/block_int-common.h | 11 --- |
38 | include/block/raw-aio.h | 14 --- | ||
39 | include/sysemu/block-backend-io.h | 13 +-- | ||
40 | block/blkio.c | 96 ++++++++++++------ | ||
41 | block/block-backend.c | 22 ----- | ||
42 | block/file-posix.c | 38 ------- | ||
43 | block/io.c | 37 ------- | ||
44 | block/io_uring.c | 44 ++++----- | ||
45 | block/linux-aio.c | 41 +++----- | ||
46 | block/nvme.c | 44 +++------ | ||
47 | block/plug.c | 159 ++++++++++++++++++++++++++++++ | ||
48 | hw/block/dataplane/xen-block.c | 8 +- | ||
49 | hw/block/virtio-blk.c | 4 +- | ||
50 | hw/scsi/virtio-scsi.c | 6 +- | ||
51 | block/meson.build | 1 + | ||
52 | block/trace-events | 6 +- | ||
53 | 20 files changed, 293 insertions(+), 265 deletions(-) | ||
54 | create mode 100644 block/plug.c | ||
35 | 55 | ||
36 | -- | 56 | -- |
37 | 2.30.2 | 57 | 2.40.1 |
38 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | Introduce a new API for thread-local blk_io_plug() that does not |
---|---|---|---|
2 | 2 | traverse the block graph. The goal is to make blk_io_plug() multi-queue | |
3 | An invariant of the current rwlock is that if multiple coroutines hold a | 3 | friendly. |
4 | reader lock, all must be runnable. The unlock implementation relies on | 4 | |
5 | this, choosing to wake a single coroutine when the final read lock | 5 | Instead of having block drivers track whether or not we're in a plugged |
6 | holder exits the critical section, assuming that it will wake a | 6 | section, provide an API that allows them to defer a function call until |
7 | coroutine attempting to acquire a write lock. | 7 | we're unplugged: blk_io_plug_call(fn, opaque). If blk_io_plug_call() is |
8 | 8 | called multiple times with the same fn/opaque pair, then fn() is only | |
9 | The downgrade implementation violates this assumption by creating a | 9 | called once at the end of the function - resulting in batching. |
10 | read lock owning coroutine that is exclusively runnable - any other | 10 | |
11 | coroutines that are waiting to acquire a read lock are *not* made | 11 | This patch introduces the API and changes blk_io_plug()/blk_io_unplug(). |
12 | runnable when the write lock holder converts its ownership to read | 12 | blk_io_plug()/blk_io_unplug() no longer require a BlockBackend argument |
13 | only. | 13 | because the plug state is now thread-local. |
14 | 14 | ||
15 | More in general, the old implementation had lots of other fairness bugs. | 15 | Later patches convert block drivers to blk_io_plug_call() and then we |
16 | The root cause of the bugs was that CoQueue would wake up readers even | 16 | can finally remove .bdrv_co_io_plug() once all block drivers have been |
17 | if there were pending writers, and would wake up writers even if there | 17 | converted. |
18 | were readers. In that case, the coroutine would go back to sleep *at | 18 | |
19 | the end* of the CoQueue, losing its place at the head of the line. | 19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
20 | 20 | Reviewed-by: Eric Blake <eblake@redhat.com> | |
21 | To fix this, keep the queue of waiters explicitly in the CoRwlock | 21 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> |
22 | instead of using CoQueue, and store for each whether it is a | 22 | Acked-by: Kevin Wolf <kwolf@redhat.com> |
23 | potential reader or a writer. This way, downgrade can look at the | 23 | Message-id: 20230530180959.1108766-2-stefanha@redhat.com |
24 | first queued coroutines and wake it only if it is a reader, causing | ||
25 | all other readers in line to be released in turn. | ||
26 | |||
27 | Reported-by: David Edmondson <david.edmondson@oracle.com> | ||
28 | Reviewed-by: David Edmondson <david.edmondson@oracle.com> | ||
29 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
30 | Message-id: 20210325112941.365238-5-pbonzini@redhat.com | ||
31 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 24 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
32 | --- | 25 | --- |
33 | include/qemu/coroutine.h | 17 ++-- | 26 | MAINTAINERS | 1 + |
34 | util/qemu-coroutine-lock.c | 164 +++++++++++++++++++++++-------------- | 27 | include/sysemu/block-backend-io.h | 13 +-- |
35 | 2 files changed, 114 insertions(+), 67 deletions(-) | 28 | block/block-backend.c | 22 ----- |
36 | 29 | block/plug.c | 159 ++++++++++++++++++++++++++++++ | |
37 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h | 30 | hw/block/dataplane/xen-block.c | 8 +- |
38 | index XXXXXXX..XXXXXXX 100644 | 31 | hw/block/virtio-blk.c | 4 +- |
39 | --- a/include/qemu/coroutine.h | 32 | hw/scsi/virtio-scsi.c | 6 +- |
40 | +++ b/include/qemu/coroutine.h | 33 | block/meson.build | 1 + |
41 | @@ -XXX,XX +XXX,XX @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); | 34 | 8 files changed, 173 insertions(+), 41 deletions(-) |
42 | bool qemu_co_queue_empty(CoQueue *queue); | 35 | create mode 100644 block/plug.c |
43 | 36 | ||
44 | 37 | diff --git a/MAINTAINERS b/MAINTAINERS | |
45 | +typedef struct CoRwTicket CoRwTicket; | 38 | index XXXXXXX..XXXXXXX 100644 |
46 | typedef struct CoRwlock { | 39 | --- a/MAINTAINERS |
47 | - int pending_writer; | 40 | +++ b/MAINTAINERS |
48 | - int reader; | 41 | @@ -XXX,XX +XXX,XX @@ F: util/aio-*.c |
49 | CoMutex mutex; | 42 | F: util/aio-*.h |
50 | - CoQueue queue; | 43 | F: util/fdmon-*.c |
51 | + | 44 | F: block/io.c |
52 | + /* Number of readers, or -1 if owned for writing. */ | 45 | +F: block/plug.c |
53 | + int owners; | 46 | F: migration/block* |
54 | + | 47 | F: include/block/aio.h |
55 | + /* Waiting coroutines. */ | 48 | F: include/block/aio-wait.h |
56 | + QSIMPLEQ_HEAD(, CoRwTicket) tickets; | 49 | diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h |
57 | } CoRwlock; | 50 | index XXXXXXX..XXXXXXX 100644 |
58 | 51 | --- a/include/sysemu/block-backend-io.h | |
59 | /** | 52 | +++ b/include/sysemu/block-backend-io.h |
60 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock); | 53 | @@ -XXX,XX +XXX,XX @@ void blk_iostatus_set_err(BlockBackend *blk, int error); |
61 | /** | 54 | int blk_get_max_iov(BlockBackend *blk); |
62 | * Write Locks the CoRwlock from a reader. This is a bit more efficient than | 55 | int blk_get_max_hw_iov(BlockBackend *blk); |
63 | * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock. | 56 | |
64 | - * However, if the lock cannot be upgraded immediately, control is transferred | 57 | -/* |
65 | - * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade | 58 | - * blk_io_plug/unplug are thread-local operations. This means that multiple |
66 | - * only overrides CoRwlock fairness if there are no concurrent readers, so | 59 | - * IOThreads can simultaneously call plug/unplug, but the caller must ensure |
67 | - * another writer might run while @qemu_co_rwlock_upgrade blocks. | 60 | - * that each unplug() is called in the same IOThread of the matching plug(). |
68 | + * Note that if the lock cannot be upgraded immediately, control is transferred | 61 | - */ |
69 | + * to the caller of the current coroutine; another writer might run while | 62 | -void coroutine_fn blk_co_io_plug(BlockBackend *blk); |
70 | + * @qemu_co_rwlock_upgrade blocks. | 63 | -void co_wrapper blk_io_plug(BlockBackend *blk); |
71 | */ | 64 | - |
72 | void qemu_co_rwlock_upgrade(CoRwlock *lock); | 65 | -void coroutine_fn blk_co_io_unplug(BlockBackend *blk); |
73 | 66 | -void co_wrapper blk_io_unplug(BlockBackend *blk); | |
74 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c | 67 | +void blk_io_plug(void); |
75 | index XXXXXXX..XXXXXXX 100644 | 68 | +void blk_io_unplug(void); |
76 | --- a/util/qemu-coroutine-lock.c | 69 | +void blk_io_plug_call(void (*fn)(void *), void *opaque); |
77 | +++ b/util/qemu-coroutine-lock.c | 70 | |
78 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) | 71 | AioContext *blk_get_aio_context(BlockBackend *blk); |
79 | trace_qemu_co_mutex_unlock_return(mutex, self); | 72 | BlockAcctStats *blk_get_stats(BlockBackend *blk); |
73 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/block/block-backend.c | ||
76 | +++ b/block/block-backend.c | ||
77 | @@ -XXX,XX +XXX,XX @@ void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) | ||
78 | notifier_list_add(&blk->insert_bs_notifiers, notify); | ||
80 | } | 79 | } |
81 | 80 | ||
82 | +struct CoRwTicket { | 81 | -void coroutine_fn blk_co_io_plug(BlockBackend *blk) |
83 | + bool read; | 82 | -{ |
84 | + Coroutine *co; | 83 | - BlockDriverState *bs = blk_bs(blk); |
85 | + QSIMPLEQ_ENTRY(CoRwTicket) next; | 84 | - IO_CODE(); |
86 | +}; | 85 | - GRAPH_RDLOCK_GUARD(); |
87 | + | 86 | - |
88 | void qemu_co_rwlock_init(CoRwlock *lock) | 87 | - if (bs) { |
88 | - bdrv_co_io_plug(bs); | ||
89 | - } | ||
90 | -} | ||
91 | - | ||
92 | -void coroutine_fn blk_co_io_unplug(BlockBackend *blk) | ||
93 | -{ | ||
94 | - BlockDriverState *bs = blk_bs(blk); | ||
95 | - IO_CODE(); | ||
96 | - GRAPH_RDLOCK_GUARD(); | ||
97 | - | ||
98 | - if (bs) { | ||
99 | - bdrv_co_io_unplug(bs); | ||
100 | - } | ||
101 | -} | ||
102 | - | ||
103 | BlockAcctStats *blk_get_stats(BlockBackend *blk) | ||
89 | { | 104 | { |
90 | - memset(lock, 0, sizeof(*lock)); | 105 | IO_CODE(); |
91 | - qemu_co_queue_init(&lock->queue); | 106 | diff --git a/block/plug.c b/block/plug.c |
92 | qemu_co_mutex_init(&lock->mutex); | 107 | new file mode 100644 |
93 | + lock->owners = 0; | 108 | index XXXXXXX..XXXXXXX |
94 | + QSIMPLEQ_INIT(&lock->tickets); | 109 | --- /dev/null |
110 | +++ b/block/plug.c | ||
111 | @@ -XXX,XX +XXX,XX @@ | ||
112 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
113 | +/* | ||
114 | + * Block I/O plugging | ||
115 | + * | ||
116 | + * Copyright Red Hat. | ||
117 | + * | ||
118 | + * This API defers a function call within a blk_io_plug()/blk_io_unplug() | ||
119 | + * section, allowing multiple calls to batch up. This is a performance | ||
120 | + * optimization that is used in the block layer to submit several I/O requests | ||
121 | + * at once instead of individually: | ||
122 | + * | ||
123 | + * blk_io_plug(); <-- start of plugged region | ||
124 | + * ... | ||
125 | + * blk_io_plug_call(my_func, my_obj); <-- deferred my_func(my_obj) call | ||
126 | + * blk_io_plug_call(my_func, my_obj); <-- another | ||
127 | + * blk_io_plug_call(my_func, my_obj); <-- another | ||
128 | + * ... | ||
129 | + * blk_io_unplug(); <-- end of plugged region, my_func(my_obj) is called once | ||
130 | + * | ||
131 | + * This code is actually generic and not tied to the block layer. If another | ||
132 | + * subsystem needs this functionality, it could be renamed. | ||
133 | + */ | ||
134 | + | ||
135 | +#include "qemu/osdep.h" | ||
136 | +#include "qemu/coroutine-tls.h" | ||
137 | +#include "qemu/notify.h" | ||
138 | +#include "qemu/thread.h" | ||
139 | +#include "sysemu/block-backend.h" | ||
140 | + | ||
141 | +/* A function call that has been deferred until unplug() */ | ||
142 | +typedef struct { | ||
143 | + void (*fn)(void *); | ||
144 | + void *opaque; | ||
145 | +} UnplugFn; | ||
146 | + | ||
147 | +/* Per-thread state */ | ||
148 | +typedef struct { | ||
149 | + unsigned count; /* how many times has plug() been called? */ | ||
150 | + GArray *unplug_fns; /* functions to call at unplug time */ | ||
151 | +} Plug; | ||
152 | + | ||
153 | +/* Use get_ptr_plug() to fetch this thread-local value */ | ||
154 | +QEMU_DEFINE_STATIC_CO_TLS(Plug, plug); | ||
155 | + | ||
156 | +/* Called at thread cleanup time */ | ||
157 | +static void blk_io_plug_atexit(Notifier *n, void *value) | ||
158 | +{ | ||
159 | + Plug *plug = get_ptr_plug(); | ||
160 | + g_array_free(plug->unplug_fns, TRUE); | ||
95 | +} | 161 | +} |
96 | + | 162 | + |
97 | +/* Releases the internal CoMutex. */ | 163 | +/* This won't involve coroutines, so use __thread */ |
98 | +static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) | 164 | +static __thread Notifier blk_io_plug_atexit_notifier; |
165 | + | ||
166 | +/** | ||
167 | + * blk_io_plug_call: | ||
168 | + * @fn: a function pointer to be invoked | ||
169 | + * @opaque: a user-defined argument to @fn() | ||
170 | + * | ||
171 | + * Call @fn(@opaque) immediately if not within a blk_io_plug()/blk_io_unplug() | ||
172 | + * section. | ||
173 | + * | ||
174 | + * Otherwise defer the call until the end of the outermost | ||
175 | + * blk_io_plug()/blk_io_unplug() section in this thread. If the same | ||
176 | + * @fn/@opaque pair has already been deferred, it will only be called once upon | ||
177 | + * blk_io_unplug() so that accumulated calls are batched into a single call. | ||
178 | + * | ||
179 | + * The caller must ensure that @opaque is not freed before @fn() is invoked. | ||
180 | + */ | ||
181 | +void blk_io_plug_call(void (*fn)(void *), void *opaque) | ||
99 | +{ | 182 | +{ |
100 | + CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets); | 183 | + Plug *plug = get_ptr_plug(); |
101 | + Coroutine *co = NULL; | 184 | + |
185 | + /* Call immediately if we're not plugged */ | ||
186 | + if (plug->count == 0) { | ||
187 | + fn(opaque); | ||
188 | + return; | ||
189 | + } | ||
190 | + | ||
191 | + GArray *array = plug->unplug_fns; | ||
192 | + if (!array) { | ||
193 | + array = g_array_new(FALSE, FALSE, sizeof(UnplugFn)); | ||
194 | + plug->unplug_fns = array; | ||
195 | + blk_io_plug_atexit_notifier.notify = blk_io_plug_atexit; | ||
196 | + qemu_thread_atexit_add(&blk_io_plug_atexit_notifier); | ||
197 | + } | ||
198 | + | ||
199 | + UnplugFn *fns = (UnplugFn *)array->data; | ||
200 | + UnplugFn new_fn = { | ||
201 | + .fn = fn, | ||
202 | + .opaque = opaque, | ||
203 | + }; | ||
102 | + | 204 | + |
103 | + /* | 205 | + /* |
104 | + * Setting lock->owners here prevents rdlock and wrlock from | 206 | + * There won't be many, so do a linear search. If this becomes a bottleneck |
105 | + * sneaking in between unlock and wake. | 207 | + * then a binary search (glib 2.62+) or different data structure could be |
208 | + * used. | ||
106 | + */ | 209 | + */ |
107 | + | 210 | + for (guint i = 0; i < array->len; i++) { |
108 | + if (tkt) { | 211 | + if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) { |
109 | + if (tkt->read) { | 212 | + return; /* already exists */ |
110 | + if (lock->owners >= 0) { | ||
111 | + lock->owners++; | ||
112 | + co = tkt->co; | ||
113 | + } | ||
114 | + } else { | ||
115 | + if (lock->owners == 0) { | ||
116 | + lock->owners = -1; | ||
117 | + co = tkt->co; | ||
118 | + } | ||
119 | + } | 213 | + } |
120 | + } | 214 | + } |
121 | + | 215 | + |
122 | + if (co) { | 216 | + g_array_append_val(array, new_fn); |
123 | + QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next); | 217 | +} |
124 | + qemu_co_mutex_unlock(&lock->mutex); | 218 | + |
125 | + aio_co_wake(co); | 219 | +/** |
126 | + } else { | 220 | + * blk_io_plug: Defer blk_io_plug_call() functions until blk_io_unplug() |
127 | + qemu_co_mutex_unlock(&lock->mutex); | 221 | + * |
128 | + } | 222 | + * blk_io_plug/unplug are thread-local operations. This means that multiple |
223 | + * threads can simultaneously call plug/unplug, but the caller must ensure that | ||
224 | + * each unplug() is called in the same thread of the matching plug(). | ||
225 | + * | ||
226 | + * Nesting is supported. blk_io_plug_call() functions are only called at the | ||
227 | + * outermost blk_io_unplug(). | ||
228 | + */ | ||
229 | +void blk_io_plug(void) | ||
230 | +{ | ||
231 | + Plug *plug = get_ptr_plug(); | ||
232 | + | ||
233 | + assert(plug->count < UINT32_MAX); | ||
234 | + | ||
235 | + plug->count++; | ||
236 | +} | ||
237 | + | ||
238 | +/** | ||
239 | + * blk_io_unplug: Run any pending blk_io_plug_call() functions | ||
240 | + * | ||
241 | + * There must have been a matching blk_io_plug() call in the same thread prior | ||
242 | + * to this blk_io_unplug() call. | ||
243 | + */ | ||
244 | +void blk_io_unplug(void) | ||
245 | +{ | ||
246 | + Plug *plug = get_ptr_plug(); | ||
247 | + | ||
248 | + assert(plug->count > 0); | ||
249 | + | ||
250 | + if (--plug->count > 0) { | ||
251 | + return; | ||
252 | + } | ||
253 | + | ||
254 | + GArray *array = plug->unplug_fns; | ||
255 | + if (!array) { | ||
256 | + return; | ||
257 | + } | ||
258 | + | ||
259 | + UnplugFn *fns = (UnplugFn *)array->data; | ||
260 | + | ||
261 | + for (guint i = 0; i < array->len; i++) { | ||
262 | + fns[i].fn(fns[i].opaque); | ||
263 | + } | ||
264 | + | ||
265 | + /* | ||
266 | + * This resets the array without freeing memory so that appending is cheap | ||
267 | + * in the future. | ||
268 | + */ | ||
269 | + g_array_set_size(array, 0); | ||
270 | +} | ||
271 | diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c | ||
272 | index XXXXXXX..XXXXXXX 100644 | ||
273 | --- a/hw/block/dataplane/xen-block.c | ||
274 | +++ b/hw/block/dataplane/xen-block.c | ||
275 | @@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) | ||
276 | * is below us. | ||
277 | */ | ||
278 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | ||
279 | - blk_io_plug(dataplane->blk); | ||
280 | + blk_io_plug(); | ||
281 | } | ||
282 | while (rc != rp) { | ||
283 | /* pull request from ring */ | ||
284 | @@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) | ||
285 | |||
286 | if (inflight_atstart > IO_PLUG_THRESHOLD && | ||
287 | batched >= inflight_atstart) { | ||
288 | - blk_io_unplug(dataplane->blk); | ||
289 | + blk_io_unplug(); | ||
290 | } | ||
291 | xen_block_do_aio(request); | ||
292 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | ||
293 | if (batched >= inflight_atstart) { | ||
294 | - blk_io_plug(dataplane->blk); | ||
295 | + blk_io_plug(); | ||
296 | batched = 0; | ||
297 | } else { | ||
298 | batched++; | ||
299 | @@ -XXX,XX +XXX,XX @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) | ||
300 | } | ||
301 | } | ||
302 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | ||
303 | - blk_io_unplug(dataplane->blk); | ||
304 | + blk_io_unplug(); | ||
305 | } | ||
306 | |||
307 | return done_something; | ||
308 | diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c | ||
309 | index XXXXXXX..XXXXXXX 100644 | ||
310 | --- a/hw/block/virtio-blk.c | ||
311 | +++ b/hw/block/virtio-blk.c | ||
312 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) | ||
313 | bool suppress_notifications = virtio_queue_get_notification(vq); | ||
314 | |||
315 | aio_context_acquire(blk_get_aio_context(s->blk)); | ||
316 | - blk_io_plug(s->blk); | ||
317 | + blk_io_plug(); | ||
318 | |||
319 | do { | ||
320 | if (suppress_notifications) { | ||
321 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) | ||
322 | virtio_blk_submit_multireq(s, &mrb); | ||
323 | } | ||
324 | |||
325 | - blk_io_unplug(s->blk); | ||
326 | + blk_io_unplug(); | ||
327 | aio_context_release(blk_get_aio_context(s->blk)); | ||
129 | } | 328 | } |
130 | 329 | ||
131 | void qemu_co_rwlock_rdlock(CoRwlock *lock) | 330 | diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c |
132 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock) | 331 | index XXXXXXX..XXXXXXX 100644 |
133 | 332 | --- a/hw/scsi/virtio-scsi.c | |
134 | qemu_co_mutex_lock(&lock->mutex); | 333 | +++ b/hw/scsi/virtio-scsi.c |
135 | /* For fairness, wait if a writer is in line. */ | 334 | @@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) |
136 | - while (lock->pending_writer) { | 335 | return -ENOBUFS; |
137 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | 336 | } |
138 | - } | 337 | scsi_req_ref(req->sreq); |
139 | - lock->reader++; | 338 | - blk_io_plug(d->conf.blk); |
140 | - qemu_co_mutex_unlock(&lock->mutex); | 339 | + blk_io_plug(); |
141 | - | 340 | object_unref(OBJECT(d)); |
142 | - /* The rest of the read-side critical section is run without the mutex. */ | 341 | return 0; |
143 | - self->locks_held++; | ||
144 | -} | ||
145 | - | ||
146 | -void qemu_co_rwlock_unlock(CoRwlock *lock) | ||
147 | -{ | ||
148 | - Coroutine *self = qemu_coroutine_self(); | ||
149 | - | ||
150 | - assert(qemu_in_coroutine()); | ||
151 | - if (!lock->reader) { | ||
152 | - /* The critical section started in qemu_co_rwlock_wrlock. */ | ||
153 | - qemu_co_queue_restart_all(&lock->queue); | ||
154 | + if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) { | ||
155 | + lock->owners++; | ||
156 | + qemu_co_mutex_unlock(&lock->mutex); | ||
157 | } else { | ||
158 | - self->locks_held--; | ||
159 | + CoRwTicket my_ticket = { true, self }; | ||
160 | |||
161 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
162 | + qemu_co_mutex_unlock(&lock->mutex); | ||
163 | + qemu_coroutine_yield(); | ||
164 | + assert(lock->owners >= 1); | ||
165 | + | ||
166 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
167 | qemu_co_mutex_lock(&lock->mutex); | ||
168 | - lock->reader--; | ||
169 | - assert(lock->reader >= 0); | ||
170 | - /* Wakeup only one waiting writer */ | ||
171 | - if (!lock->reader) { | ||
172 | - qemu_co_queue_next(&lock->queue); | ||
173 | - } | ||
174 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
175 | } | ||
176 | - qemu_co_mutex_unlock(&lock->mutex); | ||
177 | + | ||
178 | + self->locks_held++; | ||
179 | +} | ||
180 | + | ||
181 | +void qemu_co_rwlock_unlock(CoRwlock *lock) | ||
182 | +{ | ||
183 | + Coroutine *self = qemu_coroutine_self(); | ||
184 | + | ||
185 | + assert(qemu_in_coroutine()); | ||
186 | + self->locks_held--; | ||
187 | + | ||
188 | + qemu_co_mutex_lock(&lock->mutex); | ||
189 | + if (lock->owners > 0) { | ||
190 | + lock->owners--; | ||
191 | + } else { | ||
192 | + assert(lock->owners == -1); | ||
193 | + lock->owners = 0; | ||
194 | + } | ||
195 | + | ||
196 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
197 | } | 342 | } |
198 | 343 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) | |
199 | void qemu_co_rwlock_downgrade(CoRwlock *lock) | 344 | if (scsi_req_enqueue(sreq)) { |
200 | { | 345 | scsi_req_continue(sreq); |
201 | - Coroutine *self = qemu_coroutine_self(); | 346 | } |
202 | + qemu_co_mutex_lock(&lock->mutex); | 347 | - blk_io_unplug(sreq->dev->conf.blk); |
203 | + assert(lock->owners == -1); | 348 | + blk_io_unplug(); |
204 | + lock->owners = 1; | 349 | scsi_req_unref(sreq); |
205 | |||
206 | - /* lock->mutex critical section started in qemu_co_rwlock_wrlock or | ||
207 | - * qemu_co_rwlock_upgrade. | ||
208 | - */ | ||
209 | - assert(lock->reader == 0); | ||
210 | - lock->reader++; | ||
211 | - qemu_co_mutex_unlock(&lock->mutex); | ||
212 | - | ||
213 | - /* The rest of the read-side critical section is run without the mutex. */ | ||
214 | - self->locks_held++; | ||
215 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
216 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
217 | } | 350 | } |
218 | 351 | ||
219 | void qemu_co_rwlock_wrlock(CoRwlock *lock) | 352 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) |
220 | { | 353 | while (!QTAILQ_EMPTY(&reqs)) { |
221 | + Coroutine *self = qemu_coroutine_self(); | 354 | req = QTAILQ_FIRST(&reqs); |
222 | + | 355 | QTAILQ_REMOVE(&reqs, req, next); |
223 | qemu_co_mutex_lock(&lock->mutex); | 356 | - blk_io_unplug(req->sreq->dev->conf.blk); |
224 | - lock->pending_writer++; | 357 | + blk_io_unplug(); |
225 | - while (lock->reader) { | 358 | scsi_req_unref(req->sreq); |
226 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | 359 | virtqueue_detach_element(req->vq, &req->elem, 0); |
227 | + if (lock->owners == 0) { | 360 | virtio_scsi_free_req(req); |
228 | + lock->owners = -1; | 361 | diff --git a/block/meson.build b/block/meson.build |
229 | + qemu_co_mutex_unlock(&lock->mutex); | 362 | index XXXXXXX..XXXXXXX 100644 |
230 | + } else { | 363 | --- a/block/meson.build |
231 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | 364 | +++ b/block/meson.build |
232 | + | 365 | @@ -XXX,XX +XXX,XX @@ block_ss.add(files( |
233 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | 366 | 'mirror.c', |
234 | + qemu_co_mutex_unlock(&lock->mutex); | 367 | 'nbd.c', |
235 | + qemu_coroutine_yield(); | 368 | 'null.c', |
236 | + assert(lock->owners == -1); | 369 | + 'plug.c', |
237 | } | 370 | 'qapi.c', |
238 | - lock->pending_writer--; | 371 | 'qcow2-bitmap.c', |
239 | 372 | 'qcow2-cache.c', | |
240 | - /* The rest of the write-side critical section is run with | ||
241 | - * the mutex taken, so that lock->reader remains zero. | ||
242 | - * There is no need to update self->locks_held. | ||
243 | - */ | ||
244 | + self->locks_held++; | ||
245 | } | ||
246 | |||
247 | void qemu_co_rwlock_upgrade(CoRwlock *lock) | ||
248 | { | ||
249 | - Coroutine *self = qemu_coroutine_self(); | ||
250 | - | ||
251 | qemu_co_mutex_lock(&lock->mutex); | ||
252 | - assert(lock->reader > 0); | ||
253 | - lock->reader--; | ||
254 | - lock->pending_writer++; | ||
255 | - while (lock->reader) { | ||
256 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
257 | + assert(lock->owners > 0); | ||
258 | + /* For fairness, wait if a writer is in line. */ | ||
259 | + if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) { | ||
260 | + lock->owners = -1; | ||
261 | + qemu_co_mutex_unlock(&lock->mutex); | ||
262 | + } else { | ||
263 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | ||
264 | + | ||
265 | + lock->owners--; | ||
266 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
267 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
268 | + qemu_coroutine_yield(); | ||
269 | + assert(lock->owners == -1); | ||
270 | } | ||
271 | - lock->pending_writer--; | ||
272 | - | ||
273 | - /* The rest of the write-side critical section is run with | ||
274 | - * the mutex taken, similar to qemu_co_rwlock_wrlock. Do | ||
275 | - * not account for the lock twice in self->locks_held. | ||
276 | - */ | ||
277 | - self->locks_held--; | ||
278 | } | ||
279 | -- | 373 | -- |
280 | 2.30.2 | 374 | 2.40.1 |
281 | diff view generated by jsdifflib |
1 | From: David Edmondson <david.edmondson@oracle.com> | 1 | Stop using the .bdrv_co_io_plug() API because it is not multi-queue |
---|---|---|---|
2 | block layer friendly. Use the new blk_io_plug_call() API to batch I/O | ||
3 | submission instead. | ||
2 | 4 | ||
3 | Test that downgrading an rwlock does not result in a failure to | 5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
4 | schedule coroutines queued on the rwlock. | 6 | Reviewed-by: Eric Blake <eblake@redhat.com> |
5 | 7 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | |
6 | The diagram associated with test_co_rwlock_downgrade() describes the | 8 | Acked-by: Kevin Wolf <kwolf@redhat.com> |
7 | intended behaviour, but what was observed previously corresponds to: | 9 | Message-id: 20230530180959.1108766-3-stefanha@redhat.com |
8 | |||
9 | | c1 | c2 | c3 | c4 | | ||
10 | |--------+------------+------------+----------| | ||
11 | | rdlock | | | | | ||
12 | | yield | | | | | ||
13 | | | wrlock | | | | ||
14 | | | <queued> | | | | ||
15 | | | | rdlock | | | ||
16 | | | | <queued> | | | ||
17 | | | | | wrlock | | ||
18 | | | | | <queued> | | ||
19 | | unlock | | | | | ||
20 | | yield | | | | | ||
21 | | | <dequeued> | | | | ||
22 | | | downgrade | | | | ||
23 | | | ... | | | | ||
24 | | | unlock | | | | ||
25 | | | | <dequeued> | | | ||
26 | | | | <queued> | | | ||
27 | |||
28 | This results in a failure... | ||
29 | |||
30 | ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) | ||
31 | Bail out! ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) | ||
32 | |||
33 | ...as a result of the c3 coroutine failing to run to completion. | ||
34 | |||
35 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
36 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
37 | Message-id: 20210325112941.365238-7-pbonzini@redhat.com | ||
38 | Message-Id: <20210309144015.557477-5-david.edmondson@oracle.com> | ||
39 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
40 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
41 | --- | 11 | --- |
42 | tests/unit/test-coroutine.c | 99 +++++++++++++++++++++++++++++++++++++ | 12 | block/nvme.c | 44 ++++++++++++-------------------------------- |
43 | 1 file changed, 99 insertions(+) | 13 | block/trace-events | 1 - |
14 | 2 files changed, 12 insertions(+), 33 deletions(-) | ||
44 | 15 | ||
45 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c | 16 | diff --git a/block/nvme.c b/block/nvme.c |
46 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
47 | --- a/tests/unit/test-coroutine.c | 18 | --- a/block/nvme.c |
48 | +++ b/tests/unit/test-coroutine.c | 19 | +++ b/block/nvme.c |
49 | @@ -XXX,XX +XXX,XX @@ static void test_co_rwlock_upgrade(void) | 20 | @@ -XXX,XX +XXX,XX @@ |
50 | g_assert(c2_done); | 21 | #include "qemu/vfio-helpers.h" |
22 | #include "block/block-io.h" | ||
23 | #include "block/block_int.h" | ||
24 | +#include "sysemu/block-backend.h" | ||
25 | #include "sysemu/replay.h" | ||
26 | #include "trace.h" | ||
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState { | ||
29 | int blkshift; | ||
30 | |||
31 | uint64_t max_transfer; | ||
32 | - bool plugged; | ||
33 | |||
34 | bool supports_write_zeroes; | ||
35 | bool supports_discard; | ||
36 | @@ -XXX,XX +XXX,XX @@ static void nvme_kick(NVMeQueuePair *q) | ||
37 | { | ||
38 | BDRVNVMeState *s = q->s; | ||
39 | |||
40 | - if (s->plugged || !q->need_kick) { | ||
41 | + if (!q->need_kick) { | ||
42 | return; | ||
43 | } | ||
44 | trace_nvme_kick(s, q->index); | ||
45 | @@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q) | ||
46 | NvmeCqe *c; | ||
47 | |||
48 | trace_nvme_process_completion(s, q->index, q->inflight); | ||
49 | - if (s->plugged) { | ||
50 | - trace_nvme_process_completion_queue_plugged(s, q->index); | ||
51 | - return false; | ||
52 | - } | ||
53 | |||
54 | /* | ||
55 | * Support re-entrancy when a request cb() function invokes aio_poll(). | ||
56 | @@ -XXX,XX +XXX,XX @@ static void nvme_trace_command(const NvmeCmd *cmd) | ||
57 | } | ||
51 | } | 58 | } |
52 | 59 | ||
53 | +static void coroutine_fn rwlock_rdlock_yield(void *opaque) | 60 | +static void nvme_unplug_fn(void *opaque) |
54 | +{ | 61 | +{ |
55 | + qemu_co_rwlock_rdlock(&rwlock); | 62 | + NVMeQueuePair *q = opaque; |
56 | + qemu_coroutine_yield(); | ||
57 | + | 63 | + |
58 | + qemu_co_rwlock_unlock(&rwlock); | 64 | + QEMU_LOCK_GUARD(&q->lock); |
59 | + qemu_coroutine_yield(); | 65 | + nvme_kick(q); |
60 | + | 66 | + nvme_process_completion(q); |
61 | + *(bool *)opaque = true; | ||
62 | +} | 67 | +} |
63 | + | 68 | + |
64 | +static void coroutine_fn rwlock_wrlock_downgrade(void *opaque) | 69 | static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, |
65 | +{ | 70 | NvmeCmd *cmd, BlockCompletionFunc cb, |
66 | + qemu_co_rwlock_wrlock(&rwlock); | 71 | void *opaque) |
67 | + | 72 | @@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, |
68 | + qemu_co_rwlock_downgrade(&rwlock); | 73 | q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd)); |
69 | + qemu_co_rwlock_unlock(&rwlock); | 74 | q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE; |
70 | + *(bool *)opaque = true; | 75 | q->need_kick++; |
71 | +} | 76 | - nvme_kick(q); |
72 | + | 77 | - nvme_process_completion(q); |
73 | +static void coroutine_fn rwlock_rdlock(void *opaque) | 78 | + blk_io_plug_call(nvme_unplug_fn, q); |
74 | +{ | 79 | qemu_mutex_unlock(&q->lock); |
75 | + qemu_co_rwlock_rdlock(&rwlock); | 80 | } |
76 | + | 81 | |
77 | + qemu_co_rwlock_unlock(&rwlock); | 82 | @@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs, |
78 | + *(bool *)opaque = true; | 83 | } |
79 | +} | 84 | } |
80 | + | 85 | |
81 | +static void coroutine_fn rwlock_wrlock(void *opaque) | 86 | -static void coroutine_fn nvme_co_io_plug(BlockDriverState *bs) |
82 | +{ | 87 | -{ |
83 | + qemu_co_rwlock_wrlock(&rwlock); | 88 | - BDRVNVMeState *s = bs->opaque; |
84 | + | 89 | - assert(!s->plugged); |
85 | + qemu_co_rwlock_unlock(&rwlock); | 90 | - s->plugged = true; |
86 | + *(bool *)opaque = true; | 91 | -} |
87 | +} | 92 | - |
88 | + | 93 | -static void coroutine_fn nvme_co_io_unplug(BlockDriverState *bs) |
89 | +/* | 94 | -{ |
90 | + * Check that downgrading a reader-writer lock does not cause a hang. | 95 | - BDRVNVMeState *s = bs->opaque; |
91 | + * | 96 | - assert(s->plugged); |
92 | + * Four coroutines are used to produce a situation where there are | 97 | - s->plugged = false; |
93 | + * both reader and writer hopefuls waiting to acquire an rwlock that | 98 | - for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) { |
94 | + * is held by a reader. | 99 | - NVMeQueuePair *q = s->queues[i]; |
95 | + * | 100 | - qemu_mutex_lock(&q->lock); |
96 | + * The correct sequence of operations we aim to provoke can be | 101 | - nvme_kick(q); |
97 | + * represented as: | 102 | - nvme_process_completion(q); |
98 | + * | 103 | - qemu_mutex_unlock(&q->lock); |
99 | + * | c1 | c2 | c3 | c4 | | 104 | - } |
100 | + * |--------+------------+------------+------------| | 105 | -} |
101 | + * | rdlock | | | | | 106 | - |
102 | + * | yield | | | | | 107 | static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size, |
103 | + * | | wrlock | | | | 108 | Error **errp) |
104 | + * | | <queued> | | | | 109 | { |
105 | + * | | | rdlock | | | 110 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = { |
106 | + * | | | <queued> | | | 111 | .bdrv_detach_aio_context = nvme_detach_aio_context, |
107 | + * | | | | wrlock | | 112 | .bdrv_attach_aio_context = nvme_attach_aio_context, |
108 | + * | | | | <queued> | | 113 | |
109 | + * | unlock | | | | | 114 | - .bdrv_co_io_plug = nvme_co_io_plug, |
110 | + * | yield | | | | | 115 | - .bdrv_co_io_unplug = nvme_co_io_unplug, |
111 | + * | | <dequeued> | | | | 116 | - |
112 | + * | | downgrade | | | | 117 | .bdrv_register_buf = nvme_register_buf, |
113 | + * | | | <dequeued> | | | 118 | .bdrv_unregister_buf = nvme_unregister_buf, |
114 | + * | | | unlock | | | 119 | }; |
115 | + * | | ... | | | | 120 | diff --git a/block/trace-events b/block/trace-events |
116 | + * | | unlock | | | | 121 | index XXXXXXX..XXXXXXX 100644 |
117 | + * | | | | <dequeued> | | 122 | --- a/block/trace-events |
118 | + * | | | | unlock | | 123 | +++ b/block/trace-events |
119 | + */ | 124 | @@ -XXX,XX +XXX,XX @@ nvme_kick(void *s, unsigned q_index) "s %p q #%u" |
120 | +static void test_co_rwlock_downgrade(void) | 125 | nvme_dma_flush_queue_wait(void *s) "s %p" |
121 | +{ | 126 | nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" |
122 | + bool c1_done = false; | 127 | nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d" |
123 | + bool c2_done = false; | 128 | -nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u" |
124 | + bool c3_done = false; | 129 | nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" |
125 | + bool c4_done = false; | 130 | nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" |
126 | + Coroutine *c1, *c2, *c3, *c4; | 131 | nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" |
127 | + | ||
128 | + qemu_co_rwlock_init(&rwlock); | ||
129 | + | ||
130 | + c1 = qemu_coroutine_create(rwlock_rdlock_yield, &c1_done); | ||
131 | + c2 = qemu_coroutine_create(rwlock_wrlock_downgrade, &c2_done); | ||
132 | + c3 = qemu_coroutine_create(rwlock_rdlock, &c3_done); | ||
133 | + c4 = qemu_coroutine_create(rwlock_wrlock, &c4_done); | ||
134 | + | ||
135 | + qemu_coroutine_enter(c1); | ||
136 | + qemu_coroutine_enter(c2); | ||
137 | + qemu_coroutine_enter(c3); | ||
138 | + qemu_coroutine_enter(c4); | ||
139 | + | ||
140 | + qemu_coroutine_enter(c1); | ||
141 | + | ||
142 | + g_assert(c2_done); | ||
143 | + g_assert(c3_done); | ||
144 | + g_assert(c4_done); | ||
145 | + | ||
146 | + qemu_coroutine_enter(c1); | ||
147 | + | ||
148 | + g_assert(c1_done); | ||
149 | +} | ||
150 | + | ||
151 | /* | ||
152 | * Check that creation, enter, and return work | ||
153 | */ | ||
154 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
155 | g_test_add_func("/locking/co-mutex", test_co_mutex); | ||
156 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | ||
157 | g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | ||
158 | + g_test_add_func("/locking/co-rwlock/downgrade", test_co_rwlock_downgrade); | ||
159 | if (g_test_perf()) { | ||
160 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | ||
161 | g_test_add_func("/perf/nesting", perf_nesting); | ||
162 | -- | 132 | -- |
163 | 2.30.2 | 133 | 2.40.1 |
164 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | Stop using the .bdrv_co_io_plug() API because it is not multi-queue |
---|---|---|---|
2 | block layer friendly. Use the new blk_io_plug_call() API to batch I/O | ||
3 | submission instead. | ||
2 | 4 | ||
3 | Test that rwlock upgrade is fair, and that readers go back to sleep if | 5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
4 | a writer is in line. | 6 | Reviewed-by: Eric Blake <eblake@redhat.com> |
5 | 7 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | |
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | Acked-by: Kevin Wolf <kwolf@redhat.com> |
7 | Message-id: 20210325112941.365238-6-pbonzini@redhat.com | 9 | Message-id: 20230530180959.1108766-4-stefanha@redhat.com |
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | --- | 11 | --- |
10 | tests/unit/test-coroutine.c | 62 +++++++++++++++++++++++++++++++++++++ | 12 | block/blkio.c | 43 ++++++++++++++++++++++++------------------- |
11 | 1 file changed, 62 insertions(+) | 13 | 1 file changed, 24 insertions(+), 19 deletions(-) |
12 | 14 | ||
13 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c | 15 | diff --git a/block/blkio.c b/block/blkio.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/unit/test-coroutine.c | 17 | --- a/block/blkio.c |
16 | +++ b/tests/unit/test-coroutine.c | 18 | +++ b/block/blkio.c |
17 | @@ -XXX,XX +XXX,XX @@ static void test_co_mutex_lockable(void) | 19 | @@ -XXX,XX +XXX,XX @@ |
18 | g_assert(QEMU_MAKE_LOCKABLE(null_pointer) == NULL); | 20 | #include "qemu/error-report.h" |
21 | #include "qapi/qmp/qdict.h" | ||
22 | #include "qemu/module.h" | ||
23 | +#include "sysemu/block-backend.h" | ||
24 | #include "exec/memory.h" /* for ram_block_discard_disable() */ | ||
25 | |||
26 | #include "block/block-io.h" | ||
27 | @@ -XXX,XX +XXX,XX @@ static void blkio_detach_aio_context(BlockDriverState *bs) | ||
28 | NULL, NULL, NULL); | ||
19 | } | 29 | } |
20 | 30 | ||
21 | +static CoRwlock rwlock; | 31 | -/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */ |
32 | -static void blkio_submit_io(BlockDriverState *bs) | ||
33 | +/* | ||
34 | + * Called by blk_io_unplug() or immediately if not plugged. Called without | ||
35 | + * blkio_lock. | ||
36 | + */ | ||
37 | +static void blkio_unplug_fn(void *opaque) | ||
38 | { | ||
39 | - if (qatomic_read(&bs->io_plugged) == 0) { | ||
40 | - BDRVBlkioState *s = bs->opaque; | ||
41 | + BDRVBlkioState *s = opaque; | ||
42 | |||
43 | + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { | ||
44 | blkioq_do_io(s->blkioq, NULL, 0, 0, NULL); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | +/* | ||
49 | + * Schedule I/O submission after enqueuing a new request. Called without | ||
50 | + * blkio_lock. | ||
51 | + */ | ||
52 | +static void blkio_submit_io(BlockDriverState *bs) | ||
53 | +{ | ||
54 | + BDRVBlkioState *s = bs->opaque; | ||
22 | + | 55 | + |
23 | +/* Test that readers are properly sent back to the queue when upgrading, | 56 | + blk_io_plug_call(blkio_unplug_fn, s); |
24 | + * even if they are the sole readers. The test scenario is as follows: | ||
25 | + * | ||
26 | + * | ||
27 | + * | c1 | c2 | | ||
28 | + * |--------------+------------+ | ||
29 | + * | rdlock | | | ||
30 | + * | yield | | | ||
31 | + * | | wrlock | | ||
32 | + * | | <queued> | | ||
33 | + * | upgrade | | | ||
34 | + * | <queued> | <dequeued> | | ||
35 | + * | | unlock | | ||
36 | + * | <dequeued> | | | ||
37 | + * | unlock | | | ||
38 | + */ | ||
39 | + | ||
40 | +static void coroutine_fn rwlock_yield_upgrade(void *opaque) | ||
41 | +{ | ||
42 | + qemu_co_rwlock_rdlock(&rwlock); | ||
43 | + qemu_coroutine_yield(); | ||
44 | + | ||
45 | + qemu_co_rwlock_upgrade(&rwlock); | ||
46 | + qemu_co_rwlock_unlock(&rwlock); | ||
47 | + | ||
48 | + *(bool *)opaque = true; | ||
49 | +} | 57 | +} |
50 | + | 58 | + |
51 | +static void coroutine_fn rwlock_wrlock_yield(void *opaque) | 59 | static int coroutine_fn |
52 | +{ | 60 | blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) |
53 | + qemu_co_rwlock_wrlock(&rwlock); | 61 | { |
54 | + qemu_coroutine_yield(); | 62 | @@ -XXX,XX +XXX,XX @@ blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) |
55 | + | 63 | |
56 | + qemu_co_rwlock_unlock(&rwlock); | 64 | WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
57 | + *(bool *)opaque = true; | 65 | blkioq_discard(s->blkioq, offset, bytes, &cod, 0); |
58 | +} | 66 | - blkio_submit_io(bs); |
59 | + | 67 | } |
60 | +static void test_co_rwlock_upgrade(void) | 68 | |
61 | +{ | 69 | + blkio_submit_io(bs); |
62 | + bool c1_done = false; | 70 | qemu_coroutine_yield(); |
63 | + bool c2_done = false; | 71 | return cod.ret; |
64 | + Coroutine *c1, *c2; | 72 | } |
65 | + | 73 | @@ -XXX,XX +XXX,XX @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, |
66 | + qemu_co_rwlock_init(&rwlock); | 74 | |
67 | + c1 = qemu_coroutine_create(rwlock_yield_upgrade, &c1_done); | 75 | WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
68 | + c2 = qemu_coroutine_create(rwlock_wrlock_yield, &c2_done); | 76 | blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0); |
69 | + | 77 | - blkio_submit_io(bs); |
70 | + qemu_coroutine_enter(c1); | 78 | } |
71 | + qemu_coroutine_enter(c2); | 79 | |
72 | + | 80 | + blkio_submit_io(bs); |
73 | + /* c1 now should go to sleep. */ | 81 | qemu_coroutine_yield(); |
74 | + qemu_coroutine_enter(c1); | 82 | |
75 | + g_assert(!c1_done); | 83 | if (use_bounce_buffer) { |
76 | + | 84 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset, |
77 | + qemu_coroutine_enter(c2); | 85 | |
78 | + g_assert(c1_done); | 86 | WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
79 | + g_assert(c2_done); | 87 | blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags); |
80 | +} | 88 | - blkio_submit_io(bs); |
81 | + | 89 | } |
82 | /* | 90 | |
83 | * Check that creation, enter, and return work | 91 | + blkio_submit_io(bs); |
84 | */ | 92 | qemu_coroutine_yield(); |
85 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 93 | |
86 | g_test_add_func("/basic/order", test_order); | 94 | if (use_bounce_buffer) { |
87 | g_test_add_func("/locking/co-mutex", test_co_mutex); | 95 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_flush(BlockDriverState *bs) |
88 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | 96 | |
89 | + g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | 97 | WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
90 | if (g_test_perf()) { | 98 | blkioq_flush(s->blkioq, &cod, 0); |
91 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | 99 | - blkio_submit_io(bs); |
92 | g_test_add_func("/perf/nesting", perf_nesting); | 100 | } |
101 | |||
102 | + blkio_submit_io(bs); | ||
103 | qemu_coroutine_yield(); | ||
104 | return cod.ret; | ||
105 | } | ||
106 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs, | ||
107 | |||
108 | WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { | ||
109 | blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags); | ||
110 | - blkio_submit_io(bs); | ||
111 | } | ||
112 | |||
113 | + blkio_submit_io(bs); | ||
114 | qemu_coroutine_yield(); | ||
115 | return cod.ret; | ||
116 | } | ||
117 | |||
118 | -static void coroutine_fn blkio_co_io_unplug(BlockDriverState *bs) | ||
119 | -{ | ||
120 | - BDRVBlkioState *s = bs->opaque; | ||
121 | - | ||
122 | - WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { | ||
123 | - blkio_submit_io(bs); | ||
124 | - } | ||
125 | -} | ||
126 | - | ||
127 | typedef enum { | ||
128 | BMRR_OK, | ||
129 | BMRR_SKIP, | ||
130 | @@ -XXX,XX +XXX,XX @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) | ||
131 | .bdrv_co_pwritev = blkio_co_pwritev, \ | ||
132 | .bdrv_co_flush_to_disk = blkio_co_flush, \ | ||
133 | .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ | ||
134 | - .bdrv_co_io_unplug = blkio_co_io_unplug, \ | ||
135 | .bdrv_refresh_limits = blkio_refresh_limits, \ | ||
136 | .bdrv_register_buf = blkio_register_buf, \ | ||
137 | .bdrv_unregister_buf = blkio_unregister_buf, \ | ||
93 | -- | 138 | -- |
94 | 2.30.2 | 139 | 2.40.1 |
95 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Stop using the .bdrv_co_io_plug() API because it is not multi-queue | ||
2 | block layer friendly. Use the new blk_io_plug_call() API to batch I/O | ||
3 | submission instead. | ||
1 | 4 | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
8 | Acked-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | Message-id: 20230530180959.1108766-5-stefanha@redhat.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | include/block/raw-aio.h | 7 ------- | ||
13 | block/file-posix.c | 10 ---------- | ||
14 | block/io_uring.c | 44 ++++++++++++++++------------------------- | ||
15 | block/trace-events | 5 ++--- | ||
16 | 4 files changed, 19 insertions(+), 47 deletions(-) | ||
17 | |||
18 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/block/raw-aio.h | ||
21 | +++ b/include/block/raw-aio.h | ||
22 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, | ||
23 | QEMUIOVector *qiov, int type); | ||
24 | void luring_detach_aio_context(LuringState *s, AioContext *old_context); | ||
25 | void luring_attach_aio_context(LuringState *s, AioContext *new_context); | ||
26 | - | ||
27 | -/* | ||
28 | - * luring_io_plug/unplug work in the thread's current AioContext, therefore the | ||
29 | - * caller must ensure that they are paired in the same IOThread. | ||
30 | - */ | ||
31 | -void luring_io_plug(void); | ||
32 | -void luring_io_unplug(void); | ||
33 | #endif | ||
34 | |||
35 | #ifdef _WIN32 | ||
36 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/block/file-posix.c | ||
39 | +++ b/block/file-posix.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs) | ||
41 | laio_io_plug(); | ||
42 | } | ||
43 | #endif | ||
44 | -#ifdef CONFIG_LINUX_IO_URING | ||
45 | - if (s->use_linux_io_uring) { | ||
46 | - luring_io_plug(); | ||
47 | - } | ||
48 | -#endif | ||
49 | } | ||
50 | |||
51 | static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) | ||
52 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) | ||
53 | laio_io_unplug(s->aio_max_batch); | ||
54 | } | ||
55 | #endif | ||
56 | -#ifdef CONFIG_LINUX_IO_URING | ||
57 | - if (s->use_linux_io_uring) { | ||
58 | - luring_io_unplug(); | ||
59 | - } | ||
60 | -#endif | ||
61 | } | ||
62 | |||
63 | static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) | ||
64 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/block/io_uring.c | ||
67 | +++ b/block/io_uring.c | ||
68 | @@ -XXX,XX +XXX,XX @@ | ||
69 | #include "block/raw-aio.h" | ||
70 | #include "qemu/coroutine.h" | ||
71 | #include "qapi/error.h" | ||
72 | +#include "sysemu/block-backend.h" | ||
73 | #include "trace.h" | ||
74 | |||
75 | /* Only used for assertions. */ | ||
76 | @@ -XXX,XX +XXX,XX @@ typedef struct LuringAIOCB { | ||
77 | } LuringAIOCB; | ||
78 | |||
79 | typedef struct LuringQueue { | ||
80 | - int plugged; | ||
81 | unsigned int in_queue; | ||
82 | unsigned int in_flight; | ||
83 | bool blocked; | ||
84 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions_and_submit(LuringState *s) | ||
85 | { | ||
86 | luring_process_completions(s); | ||
87 | |||
88 | - if (!s->io_q.plugged && s->io_q.in_queue > 0) { | ||
89 | + if (s->io_q.in_queue > 0) { | ||
90 | ioq_submit(s); | ||
91 | } | ||
92 | } | ||
93 | @@ -XXX,XX +XXX,XX @@ static void qemu_luring_poll_ready(void *opaque) | ||
94 | static void ioq_init(LuringQueue *io_q) | ||
95 | { | ||
96 | QSIMPLEQ_INIT(&io_q->submit_queue); | ||
97 | - io_q->plugged = 0; | ||
98 | io_q->in_queue = 0; | ||
99 | io_q->in_flight = 0; | ||
100 | io_q->blocked = false; | ||
101 | } | ||
102 | |||
103 | -void luring_io_plug(void) | ||
104 | +static void luring_unplug_fn(void *opaque) | ||
105 | { | ||
106 | - AioContext *ctx = qemu_get_current_aio_context(); | ||
107 | - LuringState *s = aio_get_linux_io_uring(ctx); | ||
108 | - trace_luring_io_plug(s); | ||
109 | - s->io_q.plugged++; | ||
110 | -} | ||
111 | - | ||
112 | -void luring_io_unplug(void) | ||
113 | -{ | ||
114 | - AioContext *ctx = qemu_get_current_aio_context(); | ||
115 | - LuringState *s = aio_get_linux_io_uring(ctx); | ||
116 | - assert(s->io_q.plugged); | ||
117 | - trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged, | ||
118 | - s->io_q.in_queue, s->io_q.in_flight); | ||
119 | - if (--s->io_q.plugged == 0 && | ||
120 | - !s->io_q.blocked && s->io_q.in_queue > 0) { | ||
121 | + LuringState *s = opaque; | ||
122 | + trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue, | ||
123 | + s->io_q.in_flight); | ||
124 | + if (!s->io_q.blocked && s->io_q.in_queue > 0) { | ||
125 | ioq_submit(s); | ||
126 | } | ||
127 | } | ||
128 | @@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, | ||
129 | |||
130 | QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); | ||
131 | s->io_q.in_queue++; | ||
132 | - trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged, | ||
133 | - s->io_q.in_queue, s->io_q.in_flight); | ||
134 | - if (!s->io_q.blocked && | ||
135 | - (!s->io_q.plugged || | ||
136 | - s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) { | ||
137 | - ret = ioq_submit(s); | ||
138 | - trace_luring_do_submit_done(s, ret); | ||
139 | - return ret; | ||
140 | + trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue, | ||
141 | + s->io_q.in_flight); | ||
142 | + if (!s->io_q.blocked) { | ||
143 | + if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) { | ||
144 | + ret = ioq_submit(s); | ||
145 | + trace_luring_do_submit_done(s, ret); | ||
146 | + return ret; | ||
147 | + } | ||
148 | + | ||
149 | + blk_io_plug_call(luring_unplug_fn, s); | ||
150 | } | ||
151 | return 0; | ||
152 | } | ||
153 | diff --git a/block/trace-events b/block/trace-events | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/block/trace-events | ||
156 | +++ b/block/trace-events | ||
157 | @@ -XXX,XX +XXX,XX @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) " | ||
158 | # io_uring.c | ||
159 | luring_init_state(void *s, size_t size) "s %p size %zu" | ||
160 | luring_cleanup_state(void *s) "%p freed" | ||
161 | -luring_io_plug(void *s) "LuringState %p plug" | ||
162 | -luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" | ||
163 | -luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" | ||
164 | +luring_unplug_fn(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d" | ||
165 | +luring_do_submit(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d" | ||
166 | luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d" | ||
167 | luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d" | ||
168 | luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d" | ||
169 | -- | ||
170 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Stop using the .bdrv_co_io_plug() API because it is not multi-queue | |
2 | block layer friendly. Use the new blk_io_plug_call() API to batch I/O | ||
3 | submission instead. | ||
4 | |||
5 | Note that a dev_max_batch check is dropped in laio_io_unplug() because | ||
6 | the semantics of unplug_fn() are different from .bdrv_co_unplug(): | ||
7 | 1. unplug_fn() is only called when the last blk_io_unplug() call occurs, | ||
8 | not every time blk_io_unplug() is called. | ||
9 | 2. unplug_fn() is per-thread, not per-BlockDriverState, so there is no | ||
10 | way to get per-BlockDriverState fields like dev_max_batch. | ||
11 | |||
12 | Therefore this condition cannot be moved to laio_unplug_fn(). It is not | ||
13 | obvious that this condition affects performance in practice, so I am | ||
14 | removing it instead of trying to come up with a more complex mechanism | ||
15 | to preserve the condition. | ||
16 | |||
17 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
18 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
19 | Acked-by: Kevin Wolf <kwolf@redhat.com> | ||
20 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
21 | Message-id: 20230530180959.1108766-6-stefanha@redhat.com | ||
22 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
23 | --- | ||
24 | include/block/raw-aio.h | 7 ------- | ||
25 | block/file-posix.c | 28 ---------------------------- | ||
26 | block/linux-aio.c | 41 +++++++++++------------------------------ | ||
27 | 3 files changed, 11 insertions(+), 65 deletions(-) | ||
28 | |||
29 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/include/block/raw-aio.h | ||
32 | +++ b/include/block/raw-aio.h | ||
33 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, | ||
34 | |||
35 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); | ||
36 | void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); | ||
37 | - | ||
38 | -/* | ||
39 | - * laio_io_plug/unplug work in the thread's current AioContext, therefore the | ||
40 | - * caller must ensure that they are paired in the same IOThread. | ||
41 | - */ | ||
42 | -void laio_io_plug(void); | ||
43 | -void laio_io_unplug(uint64_t dev_max_batch); | ||
44 | #endif | ||
45 | /* io_uring.c - Linux io_uring implementation */ | ||
46 | #ifdef CONFIG_LINUX_IO_URING | ||
47 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/block/file-posix.c | ||
50 | +++ b/block/file-posix.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, | ||
52 | return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); | ||
53 | } | ||
54 | |||
55 | -static void coroutine_fn raw_co_io_plug(BlockDriverState *bs) | ||
56 | -{ | ||
57 | - BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
58 | -#ifdef CONFIG_LINUX_AIO | ||
59 | - if (s->use_linux_aio) { | ||
60 | - laio_io_plug(); | ||
61 | - } | ||
62 | -#endif | ||
63 | -} | ||
64 | - | ||
65 | -static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) | ||
66 | -{ | ||
67 | - BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
68 | -#ifdef CONFIG_LINUX_AIO | ||
69 | - if (s->use_linux_aio) { | ||
70 | - laio_io_unplug(s->aio_max_batch); | ||
71 | - } | ||
72 | -#endif | ||
73 | -} | ||
74 | - | ||
75 | static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) | ||
76 | { | ||
77 | BDRVRawState *s = bs->opaque; | ||
78 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
79 | .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
80 | .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
81 | .bdrv_refresh_limits = raw_refresh_limits, | ||
82 | - .bdrv_co_io_plug = raw_co_io_plug, | ||
83 | - .bdrv_co_io_unplug = raw_co_io_unplug, | ||
84 | .bdrv_attach_aio_context = raw_aio_attach_aio_context, | ||
85 | |||
86 | .bdrv_co_truncate = raw_co_truncate, | ||
87 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = { | ||
88 | .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
89 | .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
90 | .bdrv_refresh_limits = raw_refresh_limits, | ||
91 | - .bdrv_co_io_plug = raw_co_io_plug, | ||
92 | - .bdrv_co_io_unplug = raw_co_io_unplug, | ||
93 | .bdrv_attach_aio_context = raw_aio_attach_aio_context, | ||
94 | |||
95 | .bdrv_co_truncate = raw_co_truncate, | ||
96 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
97 | .bdrv_co_pwritev = raw_co_pwritev, | ||
98 | .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
99 | .bdrv_refresh_limits = cdrom_refresh_limits, | ||
100 | - .bdrv_co_io_plug = raw_co_io_plug, | ||
101 | - .bdrv_co_io_unplug = raw_co_io_unplug, | ||
102 | .bdrv_attach_aio_context = raw_aio_attach_aio_context, | ||
103 | |||
104 | .bdrv_co_truncate = raw_co_truncate, | ||
105 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
106 | .bdrv_co_pwritev = raw_co_pwritev, | ||
107 | .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
108 | .bdrv_refresh_limits = cdrom_refresh_limits, | ||
109 | - .bdrv_co_io_plug = raw_co_io_plug, | ||
110 | - .bdrv_co_io_unplug = raw_co_io_unplug, | ||
111 | .bdrv_attach_aio_context = raw_aio_attach_aio_context, | ||
112 | |||
113 | .bdrv_co_truncate = raw_co_truncate, | ||
114 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
115 | index XXXXXXX..XXXXXXX 100644 | ||
116 | --- a/block/linux-aio.c | ||
117 | +++ b/block/linux-aio.c | ||
118 | @@ -XXX,XX +XXX,XX @@ | ||
119 | #include "qemu/event_notifier.h" | ||
120 | #include "qemu/coroutine.h" | ||
121 | #include "qapi/error.h" | ||
122 | +#include "sysemu/block-backend.h" | ||
123 | |||
124 | /* Only used for assertions. */ | ||
125 | #include "qemu/coroutine_int.h" | ||
126 | @@ -XXX,XX +XXX,XX @@ struct qemu_laiocb { | ||
127 | }; | ||
128 | |||
129 | typedef struct { | ||
130 | - int plugged; | ||
131 | unsigned int in_queue; | ||
132 | unsigned int in_flight; | ||
133 | bool blocked; | ||
134 | @@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions_and_submit(LinuxAioState *s) | ||
135 | { | ||
136 | qemu_laio_process_completions(s); | ||
137 | |||
138 | - if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { | ||
139 | + if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) { | ||
140 | ioq_submit(s); | ||
141 | } | ||
142 | } | ||
143 | @@ -XXX,XX +XXX,XX @@ static void qemu_laio_poll_ready(EventNotifier *opaque) | ||
144 | static void ioq_init(LaioQueue *io_q) | ||
145 | { | ||
146 | QSIMPLEQ_INIT(&io_q->pending); | ||
147 | - io_q->plugged = 0; | ||
148 | io_q->in_queue = 0; | ||
149 | io_q->in_flight = 0; | ||
150 | io_q->blocked = false; | ||
151 | @@ -XXX,XX +XXX,XX @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch) | ||
152 | return max_batch; | ||
153 | } | ||
154 | |||
155 | -void laio_io_plug(void) | ||
156 | +static void laio_unplug_fn(void *opaque) | ||
157 | { | ||
158 | - AioContext *ctx = qemu_get_current_aio_context(); | ||
159 | - LinuxAioState *s = aio_get_linux_aio(ctx); | ||
160 | + LinuxAioState *s = opaque; | ||
161 | |||
162 | - s->io_q.plugged++; | ||
163 | -} | ||
164 | - | ||
165 | -void laio_io_unplug(uint64_t dev_max_batch) | ||
166 | -{ | ||
167 | - AioContext *ctx = qemu_get_current_aio_context(); | ||
168 | - LinuxAioState *s = aio_get_linux_aio(ctx); | ||
169 | - | ||
170 | - assert(s->io_q.plugged); | ||
171 | - s->io_q.plugged--; | ||
172 | - | ||
173 | - /* | ||
174 | - * Why max batch checking is performed here: | ||
175 | - * Another BDS may have queued requests with a higher dev_max_batch and | ||
176 | - * therefore in_queue could now exceed our dev_max_batch. Re-check the max | ||
177 | - * batch so we can honor our device's dev_max_batch. | ||
178 | - */ | ||
179 | - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || | ||
180 | - (!s->io_q.plugged && | ||
181 | - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { | ||
182 | + if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { | ||
183 | ioq_submit(s); | ||
184 | } | ||
185 | } | ||
186 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
187 | |||
188 | QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); | ||
189 | s->io_q.in_queue++; | ||
190 | - if (!s->io_q.blocked && | ||
191 | - (!s->io_q.plugged || | ||
192 | - s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) { | ||
193 | - ioq_submit(s); | ||
194 | + if (!s->io_q.blocked) { | ||
195 | + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) { | ||
196 | + ioq_submit(s); | ||
197 | + } else { | ||
198 | + blk_io_plug_call(laio_unplug_fn, s); | ||
199 | + } | ||
200 | } | ||
201 | |||
202 | return 0; | ||
203 | -- | ||
204 | 2.40.1 | diff view generated by jsdifflib |
1 | From: David Edmondson <david.edmondson@oracle.com> | 1 | No block driver implements .bdrv_co_io_plug() anymore. Get rid of the |
---|---|---|---|
2 | function pointers. | ||
2 | 3 | ||
3 | When taking the slow path for mutex acquisition, set the coroutine | 4 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
4 | value in the CoWaitRecord in push_waiter(), rather than both there and | 5 | Reviewed-by: Eric Blake <eblake@redhat.com> |
5 | in the caller. | 6 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> |
6 | 7 | Acked-by: Kevin Wolf <kwolf@redhat.com> | |
7 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | Message-id: 20230530180959.1108766-7-stefanha@redhat.com |
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Message-id: 20210325112941.365238-4-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-4-david.edmondson@oracle.com> | ||
13 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
15 | --- | 10 | --- |
16 | util/qemu-coroutine-lock.c | 1 - | 11 | include/block/block-io.h | 3 --- |
17 | 1 file changed, 1 deletion(-) | 12 | include/block/block_int-common.h | 11 ---------- |
13 | block/io.c | 37 -------------------------------- | ||
14 | 3 files changed, 51 deletions(-) | ||
18 | 15 | ||
19 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c | 16 | diff --git a/include/block/block-io.h b/include/block/block-io.h |
20 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/util/qemu-coroutine-lock.c | 18 | --- a/include/block/block-io.h |
22 | +++ b/util/qemu-coroutine-lock.c | 19 | +++ b/include/block/block-io.h |
23 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, | 20 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); |
24 | unsigned old_handoff; | 21 | |
25 | 22 | AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); | |
26 | trace_qemu_co_mutex_lock_entry(mutex, self); | 23 | |
27 | - w.co = self; | 24 | -void coroutine_fn GRAPH_RDLOCK bdrv_co_io_plug(BlockDriverState *bs); |
28 | push_waiter(mutex, &w); | 25 | -void coroutine_fn GRAPH_RDLOCK bdrv_co_io_unplug(BlockDriverState *bs); |
29 | 26 | - | |
30 | /* This is the "Responsibility Hand-Off" protocol; a lock() picks from | 27 | bool coroutine_fn GRAPH_RDLOCK |
28 | bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, | ||
29 | uint32_t granularity, Error **errp); | ||
30 | diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/include/block/block_int-common.h | ||
33 | +++ b/include/block/block_int-common.h | ||
34 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
35 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)( | ||
36 | BlockDriverState *bs, BlkdebugEvent event); | ||
37 | |||
38 | - /* io queue for linux-aio */ | ||
39 | - void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs); | ||
40 | - void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)( | ||
41 | - BlockDriverState *bs); | ||
42 | - | ||
43 | bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); | ||
44 | |||
45 | bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)( | ||
46 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
47 | unsigned int in_flight; | ||
48 | unsigned int serialising_in_flight; | ||
49 | |||
50 | - /* | ||
51 | - * counter for nested bdrv_io_plug. | ||
52 | - * Accessed with atomic ops. | ||
53 | - */ | ||
54 | - unsigned io_plugged; | ||
55 | - | ||
56 | /* do we need to tell the quest if we have a volatile write cache? */ | ||
57 | int enable_write_cache; | ||
58 | |||
59 | diff --git a/block/io.c b/block/io.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/block/io.c | ||
62 | +++ b/block/io.c | ||
63 | @@ -XXX,XX +XXX,XX @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) | ||
64 | return mem; | ||
65 | } | ||
66 | |||
67 | -void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs) | ||
68 | -{ | ||
69 | - BdrvChild *child; | ||
70 | - IO_CODE(); | ||
71 | - assert_bdrv_graph_readable(); | ||
72 | - | ||
73 | - QLIST_FOREACH(child, &bs->children, next) { | ||
74 | - bdrv_co_io_plug(child->bs); | ||
75 | - } | ||
76 | - | ||
77 | - if (qatomic_fetch_inc(&bs->io_plugged) == 0) { | ||
78 | - BlockDriver *drv = bs->drv; | ||
79 | - if (drv && drv->bdrv_co_io_plug) { | ||
80 | - drv->bdrv_co_io_plug(bs); | ||
81 | - } | ||
82 | - } | ||
83 | -} | ||
84 | - | ||
85 | -void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs) | ||
86 | -{ | ||
87 | - BdrvChild *child; | ||
88 | - IO_CODE(); | ||
89 | - assert_bdrv_graph_readable(); | ||
90 | - | ||
91 | - assert(bs->io_plugged); | ||
92 | - if (qatomic_fetch_dec(&bs->io_plugged) == 1) { | ||
93 | - BlockDriver *drv = bs->drv; | ||
94 | - if (drv && drv->bdrv_co_io_unplug) { | ||
95 | - drv->bdrv_co_io_unplug(bs); | ||
96 | - } | ||
97 | - } | ||
98 | - | ||
99 | - QLIST_FOREACH(child, &bs->children, next) { | ||
100 | - bdrv_co_io_unplug(child->bs); | ||
101 | - } | ||
102 | -} | ||
103 | - | ||
104 | /* Helper that undoes bdrv_register_buf() when it fails partway through */ | ||
105 | static void GRAPH_RDLOCK | ||
106 | bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size, | ||
31 | -- | 107 | -- |
32 | 2.30.2 | 108 | 2.40.1 |
33 | diff view generated by jsdifflib |
1 | From: David Edmondson <david.edmondson@oracle.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | If a new bitmap entry is allocated, requiring the entire block to be | 3 | Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd |
4 | written, avoiding leaking the buffer allocated for the block should | 4 | passing. Let's expose this to the user, so the management layer |
5 | the write fail. | 5 | can pass the file descriptor of an already opened path. |
6 | 6 | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 7 | If the libblkio virtio-blk driver supports fd passing, let's always |
8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | 8 | use qemu_open() to open the `path`, so we can handle fd passing |
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 9 | from the management layer through the "/dev/fdset/N" special path. |
10 | Acked-by: Max Reitz <mreitz@redhat.com> | 10 | |
11 | Message-id: 20210325112941.365238-2-pbonzini@redhat.com | 11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | Message-Id: <20210309144015.557477-2-david.edmondson@oracle.com> | 12 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> |
13 | Acked-by: Max Reitz <mreitz@redhat.com> | 13 | Message-id: 20230530071941.8954-2-sgarzare@redhat.com |
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 15 | --- |
17 | block/vdi.c | 1 + | 16 | block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- |
18 | 1 file changed, 1 insertion(+) | 17 | 1 file changed, 44 insertions(+), 9 deletions(-) |
19 | 18 | ||
20 | diff --git a/block/vdi.c b/block/vdi.c | 19 | diff --git a/block/blkio.c b/block/blkio.c |
21 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/vdi.c | 21 | --- a/block/blkio.c |
23 | +++ b/block/vdi.c | 22 | +++ b/block/blkio.c |
24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: | 23 | @@ -XXX,XX +XXX,XX @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, |
25 | 24 | { | |
26 | logout("finished data write\n"); | 25 | const char *path = qdict_get_try_str(options, "path"); |
27 | if (ret < 0) { | 26 | BDRVBlkioState *s = bs->opaque; |
28 | + g_free(block); | 27 | - int ret; |
29 | return ret; | 28 | + bool fd_supported = false; |
29 | + int fd, ret; | ||
30 | |||
31 | if (!path) { | ||
32 | error_setg(errp, "missing 'path' option"); | ||
33 | return -EINVAL; | ||
30 | } | 34 | } |
31 | 35 | ||
36 | - ret = blkio_set_str(s->blkio, "path", path); | ||
37 | - qdict_del(options, "path"); | ||
38 | - if (ret < 0) { | ||
39 | - error_setg_errno(errp, -ret, "failed to set path: %s", | ||
40 | - blkio_get_error_msg()); | ||
41 | - return ret; | ||
42 | - } | ||
43 | - | ||
44 | if (!(flags & BDRV_O_NOCACHE)) { | ||
45 | error_setg(errp, "cache.direct=off is not supported"); | ||
46 | return -EINVAL; | ||
47 | } | ||
48 | + | ||
49 | + if (blkio_get_int(s->blkio, "fd", &fd) == 0) { | ||
50 | + fd_supported = true; | ||
51 | + } | ||
52 | + | ||
53 | + /* | ||
54 | + * If the libblkio driver supports fd passing, let's always use qemu_open() | ||
55 | + * to open the `path`, so we can handle fd passing from the management | ||
56 | + * layer through the "/dev/fdset/N" special path. | ||
57 | + */ | ||
58 | + if (fd_supported) { | ||
59 | + int open_flags; | ||
60 | + | ||
61 | + if (flags & BDRV_O_RDWR) { | ||
62 | + open_flags = O_RDWR; | ||
63 | + } else { | ||
64 | + open_flags = O_RDONLY; | ||
65 | + } | ||
66 | + | ||
67 | + fd = qemu_open(path, open_flags, errp); | ||
68 | + if (fd < 0) { | ||
69 | + return -EINVAL; | ||
70 | + } | ||
71 | + | ||
72 | + ret = blkio_set_int(s->blkio, "fd", fd); | ||
73 | + if (ret < 0) { | ||
74 | + error_setg_errno(errp, -ret, "failed to set fd: %s", | ||
75 | + blkio_get_error_msg()); | ||
76 | + qemu_close(fd); | ||
77 | + return ret; | ||
78 | + } | ||
79 | + } else { | ||
80 | + ret = blkio_set_str(s->blkio, "path", path); | ||
81 | + if (ret < 0) { | ||
82 | + error_setg_errno(errp, -ret, "failed to set path: %s", | ||
83 | + blkio_get_error_msg()); | ||
84 | + return ret; | ||
85 | + } | ||
86 | + } | ||
87 | + | ||
88 | + qdict_del(options, "path"); | ||
89 | + | ||
90 | return 0; | ||
91 | } | ||
92 | |||
32 | -- | 93 | -- |
33 | 2.30.2 | 94 | 2.40.1 |
34 | diff view generated by jsdifflib |
1 | From: David Edmondson <david.edmondson@oracle.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Given that the block size is read from the header of the VDI file, a | 3 | The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd |
4 | wide variety of sizes might be seen. Rather than re-using a block | 4 | passing through the new 'fd' property. |
5 | sized memory region when writing the VDI header, allocate an | ||
6 | appropriately sized buffer. | ||
7 | 5 | ||
8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | 6 | Since now we are using qemu_open() on '@path' if the virtio-blk driver |
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 7 | supports the fd passing, let's announce it. |
10 | Acked-by: Max Reitz <mreitz@redhat.com> | 8 | In this way, the management layer can pass the file descriptor of an |
11 | Message-id: 20210325112941.365238-3-pbonzini@redhat.com | 9 | already opened vhost-vdpa character device. This is useful especially |
12 | Message-Id: <20210309144015.557477-3-david.edmondson@oracle.com> | 10 | when the device can only be accessed with certain privileges. |
13 | Acked-by: Max Reitz <mreitz@redhat.com> | 11 | |
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 12 | Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver |
13 | in libblkio supports it. | ||
14 | |||
15 | Suggested-by: Markus Armbruster <armbru@redhat.com> | ||
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
18 | Message-id: 20230530071941.8954-3-sgarzare@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 20 | --- |
17 | block/vdi.c | 10 ++++++---- | 21 | qapi/block-core.json | 6 ++++++ |
18 | 1 file changed, 6 insertions(+), 4 deletions(-) | 22 | meson.build | 4 ++++ |
23 | 2 files changed, 10 insertions(+) | ||
19 | 24 | ||
20 | diff --git a/block/vdi.c b/block/vdi.c | 25 | diff --git a/qapi/block-core.json b/qapi/block-core.json |
21 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/vdi.c | 27 | --- a/qapi/block-core.json |
23 | +++ b/block/vdi.c | 28 | +++ b/qapi/block-core.json |
24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: | 29 | @@ -XXX,XX +XXX,XX @@ |
25 | 30 | # | |
26 | if (block) { | 31 | # @path: path to the vhost-vdpa character device. |
27 | /* One or more new blocks were allocated. */ | 32 | # |
28 | - VdiHeader *header = (VdiHeader *) block; | 33 | +# Features: |
29 | + VdiHeader *header; | 34 | +# @fdset: Member @path supports the special "/dev/fdset/N" path |
30 | uint8_t *base; | 35 | +# (since 8.1) |
31 | uint64_t offset; | 36 | +# |
32 | uint32_t n_sectors; | 37 | # Since: 7.2 |
33 | 38 | ## | |
34 | + g_free(block); | 39 | { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', |
35 | + header = g_malloc(sizeof(*header)); | 40 | 'data': { 'path': 'str' }, |
36 | + | 41 | + 'features': [ { 'name' :'fdset', |
37 | logout("now writing modified header\n"); | 42 | + 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], |
38 | assert(VDI_IS_ALLOCATED(bmap_first)); | 43 | 'if': 'CONFIG_BLKIO' } |
39 | *header = s->header; | 44 | |
40 | vdi_header_to_le(header); | 45 | ## |
41 | - ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); | 46 | diff --git a/meson.build b/meson.build |
42 | - g_free(block); | 47 | index XXXXXXX..XXXXXXX 100644 |
43 | - block = NULL; | 48 | --- a/meson.build |
44 | + ret = bdrv_pwrite(bs->file, 0, header, sizeof(*header)); | 49 | +++ b/meson.build |
45 | + g_free(header); | 50 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LZO', lzo.found()) |
46 | 51 | config_host_data.set('CONFIG_MPATH', mpathpersist.found()) | |
47 | if (ret < 0) { | 52 | config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) |
48 | return ret; | 53 | config_host_data.set('CONFIG_BLKIO', blkio.found()) |
54 | +if blkio.found() | ||
55 | + config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', | ||
56 | + blkio.version().version_compare('>=1.3.0')) | ||
57 | +endif | ||
58 | config_host_data.set('CONFIG_CURL', curl.found()) | ||
59 | config_host_data.set('CONFIG_CURSES', curses.found()) | ||
60 | config_host_data.set('CONFIG_GBM', gbm.found()) | ||
49 | -- | 61 | -- |
50 | 2.30.2 | 62 | 2.40.1 |
51 | diff view generated by jsdifflib |