1 | The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769: | 1 | The following changes since commit 711c0418c8c1ce3a24346f058b001c4c5a2f0f81: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000) | 3 | Merge remote-tracking branch 'remotes/philmd/tags/mips-20210702' into staging (2021-07-04 14:04:12 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268: | 9 | for you to fetch changes up to 9f460c64e13897117f35ffb61f6f5e0102cabc70: |
10 | 10 | ||
11 | vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000) | 11 | block/io: Merge discard request alignments (2021-07-06 14:28:55 +0100) |
12 | |||
13 | ---------------------------------------------------------------- | ||
14 | Pull request | ||
12 | 15 | ||
13 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
14 | 17 | ||
15 | ---------------------------------------------------------------- | 18 | Akihiko Odaki (3): |
19 | block/file-posix: Optimize for macOS | ||
20 | block: Add backend_defaults property | ||
21 | block/io: Merge discard request alignments | ||
16 | 22 | ||
17 | Deepa Srinivasan (1): | 23 | Stefan Hajnoczi (2): |
18 | block: Fix qemu crash when using scsi-block | 24 | util/async: add a human-readable name to BHs for debugging |
25 | util/async: print leaked BH name when AioContext finalizes | ||
19 | 26 | ||
20 | Fam Zheng (1): | 27 | include/block/aio.h | 31 ++++++++++++++++++++++--- |
21 | README: Fix typo 'git-publish' | 28 | include/hw/block/block.h | 3 +++ |
22 | 29 | include/qemu/main-loop.h | 4 +++- | |
23 | Sergio Lopez (1): | 30 | block/file-posix.c | 27 ++++++++++++++++++++-- |
24 | virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is | 31 | block/io.c | 2 ++ |
25 | present | 32 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++---- |
26 | 33 | tests/unit/ptimer-test-stubs.c | 2 +- | |
27 | Stefan Hajnoczi (4): | 34 | util/async.c | 25 ++++++++++++++++---- |
28 | block: add aio_wait_bh_oneshot() | 35 | util/main-loop.c | 4 ++-- |
29 | virtio-blk: fix race between .ioeventfd_stop() and vq handler | 36 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++ |
30 | virtio-scsi: fix race between .ioeventfd_stop() and vq handler | 37 | 10 files changed, 161 insertions(+), 17 deletions(-) |
31 | vl: introduce vm_shutdown() | ||
32 | |||
33 | include/block/aio-wait.h | 13 +++++++++++ | ||
34 | include/sysemu/iothread.h | 1 - | ||
35 | include/sysemu/sysemu.h | 1 + | ||
36 | block/block-backend.c | 51 ++++++++++++++++++++--------------------- | ||
37 | cpus.c | 16 ++++++++++--- | ||
38 | hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++-------- | ||
39 | hw/scsi/virtio-scsi-dataplane.c | 9 ++++---- | ||
40 | iothread.c | 31 ------------------------- | ||
41 | util/aio-wait.c | 31 +++++++++++++++++++++++++ | ||
42 | vl.c | 13 +++-------- | ||
43 | README | 2 +- | ||
44 | 11 files changed, 122 insertions(+), 85 deletions(-) | ||
45 | 38 | ||
46 | -- | 39 | -- |
47 | 2.14.3 | 40 | 2.31.1 |
48 | 41 | ||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
2 | 1 | ||
3 | Starting qemu with the following arguments causes qemu to segfault: | ||
4 | ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= | ||
5 | iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 | ||
6 | |||
7 | This patch fixes blk_aio_ioctl() so it does not pass stack addresses to | ||
8 | blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More | ||
9 | details about the bug follow. | ||
10 | |||
11 | blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the | ||
12 | coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). | ||
13 | |||
14 | When blk_aio_ioctl() is executed from within a coroutine context (e.g. | ||
15 | iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to | ||
16 | the current coroutine's wakeup queue. blk_aio_ioctl() then returns. | ||
17 | |||
18 | When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: | ||
19 | .... | ||
20 | BlkRwCo *rwco = &acb->rwco; | ||
21 | |||
22 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
23 | rwco->qiov->iov[0].iov_base); <--- qiov is | ||
24 | invalid here | ||
25 | ... | ||
26 | |||
27 | In the case when blk_aio_ioctl() is called from a non-coroutine context, | ||
28 | blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls | ||
29 | qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine | ||
30 | execution is complete, control returns to blk_aio_ioctl_entry() after the call | ||
31 | to blk_co_ioctl(). There is no invalid reference after this point, but the | ||
32 | function is still holding on to invalid pointers. | ||
33 | |||
34 | The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer | ||
35 | rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the | ||
36 | coroutine function casts it to QEMUIOVector or uses the void pointer directly. | ||
37 | |||
38 | Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
39 | Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
40 | Reviewed-by: Mark Kanda <mark.kanda@oracle.com> | ||
41 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
42 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
43 | --- | ||
44 | block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- | ||
45 | 1 file changed, 25 insertions(+), 26 deletions(-) | ||
46 | |||
47 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/block/block-backend.c | ||
50 | +++ b/block/block-backend.c | ||
51 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, | ||
52 | typedef struct BlkRwCo { | ||
53 | BlockBackend *blk; | ||
54 | int64_t offset; | ||
55 | - QEMUIOVector *qiov; | ||
56 | + void *iobuf; | ||
57 | int ret; | ||
58 | BdrvRequestFlags flags; | ||
59 | } BlkRwCo; | ||
60 | @@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo { | ||
61 | static void blk_read_entry(void *opaque) | ||
62 | { | ||
63 | BlkRwCo *rwco = opaque; | ||
64 | + QEMUIOVector *qiov = rwco->iobuf; | ||
65 | |||
66 | - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, | ||
67 | - rwco->qiov, rwco->flags); | ||
68 | + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, | ||
69 | + qiov, rwco->flags); | ||
70 | } | ||
71 | |||
72 | static void blk_write_entry(void *opaque) | ||
73 | { | ||
74 | BlkRwCo *rwco = opaque; | ||
75 | + QEMUIOVector *qiov = rwco->iobuf; | ||
76 | |||
77 | - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, | ||
78 | - rwco->qiov, rwco->flags); | ||
79 | + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, | ||
80 | + qiov, rwco->flags); | ||
81 | } | ||
82 | |||
83 | static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
84 | @@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
85 | rwco = (BlkRwCo) { | ||
86 | .blk = blk, | ||
87 | .offset = offset, | ||
88 | - .qiov = &qiov, | ||
89 | + .iobuf = &qiov, | ||
90 | .flags = flags, | ||
91 | .ret = NOT_DONE, | ||
92 | }; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque) | ||
94 | } | ||
95 | |||
96 | static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
97 | - QEMUIOVector *qiov, CoroutineEntry co_entry, | ||
98 | + void *iobuf, CoroutineEntry co_entry, | ||
99 | BdrvRequestFlags flags, | ||
100 | BlockCompletionFunc *cb, void *opaque) | ||
101 | { | ||
102 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
103 | acb->rwco = (BlkRwCo) { | ||
104 | .blk = blk, | ||
105 | .offset = offset, | ||
106 | - .qiov = qiov, | ||
107 | + .iobuf = iobuf, | ||
108 | .flags = flags, | ||
109 | .ret = NOT_DONE, | ||
110 | }; | ||
111 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque) | ||
112 | { | ||
113 | BlkAioEmAIOCB *acb = opaque; | ||
114 | BlkRwCo *rwco = &acb->rwco; | ||
115 | + QEMUIOVector *qiov = rwco->iobuf; | ||
116 | |||
117 | - assert(rwco->qiov->size == acb->bytes); | ||
118 | + assert(qiov->size == acb->bytes); | ||
119 | rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, | ||
120 | - rwco->qiov, rwco->flags); | ||
121 | + qiov, rwco->flags); | ||
122 | blk_aio_complete(acb); | ||
123 | } | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque) | ||
126 | { | ||
127 | BlkAioEmAIOCB *acb = opaque; | ||
128 | BlkRwCo *rwco = &acb->rwco; | ||
129 | + QEMUIOVector *qiov = rwco->iobuf; | ||
130 | |||
131 | - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); | ||
132 | + assert(!qiov || qiov->size == acb->bytes); | ||
133 | rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, | ||
134 | - rwco->qiov, rwco->flags); | ||
135 | + qiov, rwco->flags); | ||
136 | blk_aio_complete(acb); | ||
137 | } | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
140 | static void blk_ioctl_entry(void *opaque) | ||
141 | { | ||
142 | BlkRwCo *rwco = opaque; | ||
143 | + QEMUIOVector *qiov = rwco->iobuf; | ||
144 | + | ||
145 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
146 | - rwco->qiov->iov[0].iov_base); | ||
147 | + qiov->iov[0].iov_base); | ||
148 | } | ||
149 | |||
150 | int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
151 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque) | ||
152 | BlkAioEmAIOCB *acb = opaque; | ||
153 | BlkRwCo *rwco = &acb->rwco; | ||
154 | |||
155 | - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
156 | - rwco->qiov->iov[0].iov_base); | ||
157 | + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); | ||
158 | + | ||
159 | blk_aio_complete(acb); | ||
160 | } | ||
161 | |||
162 | BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, | ||
163 | BlockCompletionFunc *cb, void *opaque) | ||
164 | { | ||
165 | - QEMUIOVector qiov; | ||
166 | - struct iovec iov; | ||
167 | - | ||
168 | - iov = (struct iovec) { | ||
169 | - .iov_base = buf, | ||
170 | - .iov_len = 0, | ||
171 | - }; | ||
172 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
173 | - | ||
174 | - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); | ||
175 | + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); | ||
176 | } | ||
177 | |||
178 | int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
179 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, | ||
180 | static void blk_pdiscard_entry(void *opaque) | ||
181 | { | ||
182 | BlkRwCo *rwco = opaque; | ||
183 | - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); | ||
184 | + QEMUIOVector *qiov = rwco->iobuf; | ||
185 | + | ||
186 | + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); | ||
187 | } | ||
188 | |||
189 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
190 | -- | ||
191 | 2.14.3 | ||
192 | |||
193 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | Reported-by: Alberto Garcia <berto@igalia.com> | ||
4 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Message-id: 20180306024328.19195-1-famz@redhat.com | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | --- | ||
9 | README | 2 +- | ||
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/README b/README | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/README | ||
15 | +++ b/README | ||
16 | @@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control. | ||
17 | git clone git://git.qemu.org/qemu-web.git | ||
18 | https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ | ||
19 | |||
20 | -A 'git-profile' utility was created to make above process less | ||
21 | +A 'git-publish' utility was created to make above process less | ||
22 | cumbersome, and is highly recommended for making regular contributions, | ||
23 | or even just for sending consecutive patch series revisions. It also | ||
24 | requires a working 'git send-email' setup, and by default doesn't | ||
25 | -- | ||
26 | 2.14.3 | ||
27 | |||
28 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | It can be difficult to debug issues with BHs in production environments. |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | 2 | Although BHs can usually be identified by looking up their ->cb() |
3 | the AioContext lock. By the time the vq handler is able to acquire the | 3 | function pointer, this requires debug information for the program. It is |
4 | AioContext lock the ioeventfd has already been removed and the handler | 4 | also not possible to print human-readable diagnostics about BHs because |
5 | isn't supposed to run anymore! | 5 | they have no identifier. |
6 | 6 | ||
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 7 | This patch adds a name to each BH. The name is not unique per instance |
8 | from within the IOThread. This way no races with the vq handler are | 8 | but differentiates between cb() functions, which is usually enough. It's |
9 | possible. | 9 | done by changing aio_bh_new() and friends to macros that stringify cb. |
10 | |||
11 | The next patch will use the name field when reporting leaked BHs. | ||
10 | 12 | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 14 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 15 | Message-Id: <20210414200247.917496-2-stefanha@redhat.com> |
14 | Message-id: 20180307144205.20619-4-stefanha@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | 16 | --- |
17 | hw/scsi/virtio-scsi-dataplane.c | 9 +++++---- | 17 | include/block/aio.h | 31 ++++++++++++++++++++++++++++--- |
18 | 1 file changed, 5 insertions(+), 4 deletions(-) | 18 | include/qemu/main-loop.h | 4 +++- |
19 | tests/unit/ptimer-test-stubs.c | 2 +- | ||
20 | util/async.c | 9 +++++++-- | ||
21 | util/main-loop.c | 4 ++-- | ||
22 | 5 files changed, 41 insertions(+), 9 deletions(-) | ||
19 | 23 | ||
20 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c | 24 | diff --git a/include/block/aio.h b/include/block/aio.h |
21 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/scsi/virtio-scsi-dataplane.c | 26 | --- a/include/block/aio.h |
23 | +++ b/hw/scsi/virtio-scsi-dataplane.c | 27 | +++ b/include/block/aio.h |
24 | @@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, | 28 | @@ -XXX,XX +XXX,XX @@ void aio_context_acquire(AioContext *ctx); |
25 | return 0; | 29 | /* Relinquish ownership of the AioContext. */ |
30 | void aio_context_release(AioContext *ctx); | ||
31 | |||
32 | +/** | ||
33 | + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will | ||
34 | + * run only once and as soon as possible. | ||
35 | + * | ||
36 | + * @name: A human-readable identifier for debugging purposes. | ||
37 | + */ | ||
38 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
39 | + const char *name); | ||
40 | + | ||
41 | /** | ||
42 | * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run | ||
43 | * only once and as soon as possible. | ||
44 | + * | ||
45 | + * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the | ||
46 | + * name string. | ||
47 | */ | ||
48 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
49 | +#define aio_bh_schedule_oneshot(ctx, cb, opaque) \ | ||
50 | + aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb))) | ||
51 | |||
52 | /** | ||
53 | - * aio_bh_new: Allocate a new bottom half structure. | ||
54 | + * aio_bh_new_full: Allocate a new bottom half structure. | ||
55 | * | ||
56 | * Bottom halves are lightweight callbacks whose invocation is guaranteed | ||
57 | * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure | ||
58 | * is opaque and must be allocated prior to its use. | ||
59 | + * | ||
60 | + * @name: A human-readable identifier for debugging purposes. | ||
61 | */ | ||
62 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
63 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
64 | + const char *name); | ||
65 | + | ||
66 | +/** | ||
67 | + * aio_bh_new: Allocate a new bottom half structure | ||
68 | + * | ||
69 | + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name | ||
70 | + * string. | ||
71 | + */ | ||
72 | +#define aio_bh_new(ctx, cb, opaque) \ | ||
73 | + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) | ||
74 | |||
75 | /** | ||
76 | * aio_notify: Force processing of pending events. | ||
77 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/include/qemu/main-loop.h | ||
80 | +++ b/include/qemu/main-loop.h | ||
81 | @@ -XXX,XX +XXX,XX @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); | ||
82 | |||
83 | void qemu_fd_register(int fd); | ||
84 | |||
85 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); | ||
86 | +#define qemu_bh_new(cb, opaque) \ | ||
87 | + qemu_bh_new_full((cb), (opaque), (stringify(cb))) | ||
88 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); | ||
89 | void qemu_bh_schedule_idle(QEMUBH *bh); | ||
90 | |||
91 | enum { | ||
92 | diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/tests/unit/ptimer-test-stubs.c | ||
95 | +++ b/tests/unit/ptimer-test-stubs.c | ||
96 | @@ -XXX,XX +XXX,XX @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) | ||
97 | return deadline; | ||
26 | } | 98 | } |
27 | 99 | ||
28 | -/* assumes s->ctx held */ | 100 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) |
29 | -static void virtio_scsi_clear_aio(VirtIOSCSI *s) | 101 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) |
30 | +/* Context: BH in IOThread */ | ||
31 | +static void virtio_scsi_dataplane_stop_bh(void *opaque) | ||
32 | { | 102 | { |
33 | + VirtIOSCSI *s = opaque; | 103 | QEMUBH *bh = g_new(QEMUBH, 1); |
34 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); | 104 | |
35 | int i; | 105 | diff --git a/util/async.c b/util/async.c |
36 | 106 | index XXXXXXX..XXXXXXX 100644 | |
37 | @@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) | 107 | --- a/util/async.c |
38 | return 0; | 108 | +++ b/util/async.c |
39 | 109 | @@ -XXX,XX +XXX,XX @@ enum { | |
40 | fail_vrings: | 110 | |
41 | - virtio_scsi_clear_aio(s); | 111 | struct QEMUBH { |
42 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 112 | AioContext *ctx; |
43 | aio_context_release(s->ctx); | 113 | + const char *name; |
44 | for (i = 0; i < vs->conf.num_queues + 2; i++) { | 114 | QEMUBHFunc *cb; |
45 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | 115 | void *opaque; |
46 | @@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) | 116 | QSLIST_ENTRY(QEMUBH) next; |
47 | s->dataplane_stopping = true; | 117 | @@ -XXX,XX +XXX,XX @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) |
48 | 118 | return bh; | |
49 | aio_context_acquire(s->ctx); | 119 | } |
50 | - virtio_scsi_clear_aio(s); | 120 | |
51 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 121 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) |
52 | aio_context_release(s->ctx); | 122 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, |
53 | 123 | + void *opaque, const char *name) | |
54 | blk_drain_all(); /* ensure there are no in-flight requests */ | 124 | { |
125 | QEMUBH *bh; | ||
126 | bh = g_new(QEMUBH, 1); | ||
127 | @@ -XXX,XX +XXX,XX @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
128 | .ctx = ctx, | ||
129 | .cb = cb, | ||
130 | .opaque = opaque, | ||
131 | + .name = name, | ||
132 | }; | ||
133 | aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT); | ||
134 | } | ||
135 | |||
136 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
137 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
138 | + const char *name) | ||
139 | { | ||
140 | QEMUBH *bh; | ||
141 | bh = g_new(QEMUBH, 1); | ||
142 | @@ -XXX,XX +XXX,XX @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
143 | .ctx = ctx, | ||
144 | .cb = cb, | ||
145 | .opaque = opaque, | ||
146 | + .name = name, | ||
147 | }; | ||
148 | return bh; | ||
149 | } | ||
150 | diff --git a/util/main-loop.c b/util/main-loop.c | ||
151 | index XXXXXXX..XXXXXXX 100644 | ||
152 | --- a/util/main-loop.c | ||
153 | +++ b/util/main-loop.c | ||
154 | @@ -XXX,XX +XXX,XX @@ void main_loop_wait(int nonblocking) | ||
155 | |||
156 | /* Functions to operate on the main QEMU AioContext. */ | ||
157 | |||
158 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) | ||
159 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) | ||
160 | { | ||
161 | - return aio_bh_new(qemu_aio_context, cb, opaque); | ||
162 | + return aio_bh_new_full(qemu_aio_context, cb, opaque, name); | ||
163 | } | ||
164 | |||
165 | /* | ||
55 | -- | 166 | -- |
56 | 2.14.3 | 167 | 2.31.1 |
57 | 168 | ||
58 | diff view generated by jsdifflib |
1 | Sometimes it's necessary for the main loop thread to run a BH in an | 1 | BHs must be deleted before the AioContext is finalized. If not, it's a |
---|---|---|---|
2 | IOThread and wait for its completion. This primitive is useful during | 2 | bug and probably indicates that some part of the program still expects |
3 | startup/shutdown to synchronize and avoid race conditions. | 3 | the BH to run in the future. That can lead to memory leaks, inconsistent |
4 | state, or just hangs. | ||
4 | 5 | ||
6 | Unfortunately the assert(flags & BH_DELETED) call in aio_ctx_finalize() | ||
7 | is difficult to debug because the assertion failure contains no | ||
8 | information about the BH! | ||
9 | |||
10 | Use the QEMUBH name field added in the previous patch to show a useful | ||
11 | error when a leaked BH is detected. | ||
12 | |||
13 | Suggested-by: Eric Ernst <eric.g.ernst@gmail.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | 15 | Message-Id: <20210414200247.917496-3-stefanha@redhat.com> |
7 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Message-id: 20180307144205.20619-2-stefanha@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | 16 | --- |
11 | include/block/aio-wait.h | 13 +++++++++++++ | 17 | util/async.c | 16 ++++++++++++++-- |
12 | util/aio-wait.c | 31 +++++++++++++++++++++++++++++++ | 18 | 1 file changed, 14 insertions(+), 2 deletions(-) |
13 | 2 files changed, 44 insertions(+) | ||
14 | 19 | ||
15 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | 20 | diff --git a/util/async.c b/util/async.c |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/block/aio-wait.h | 22 | --- a/util/async.c |
18 | +++ b/include/block/aio-wait.h | 23 | +++ b/util/async.c |
19 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 24 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) |
20 | */ | 25 | assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); |
21 | void aio_wait_kick(AioWait *wait); | 26 | |
22 | 27 | while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { | |
23 | +/** | 28 | - /* qemu_bh_delete() must have been called on BHs in this AioContext */ |
24 | + * aio_wait_bh_oneshot: | 29 | - assert(flags & BH_DELETED); |
25 | + * @ctx: the aio context | 30 | + /* |
26 | + * @cb: the BH callback function | 31 | + * qemu_bh_delete() must have been called on BHs in this AioContext. In |
27 | + * @opaque: user data for the BH callback function | 32 | + * many cases memory leaks, hangs, or inconsistent state occur when a |
28 | + * | 33 | + * BH is leaked because something still expects it to run. |
29 | + * Run a BH in @ctx and wait for it to complete. | 34 | + * |
30 | + * | 35 | + * If you hit this, fix the lifecycle of the BH so that |
31 | + * Must be called from the main loop thread with @ctx acquired exactly once. | 36 | + * qemu_bh_delete() and any associated cleanup is called before the |
32 | + * Note that main loop event processing may occur. | 37 | + * AioContext is finalized. |
33 | + */ | 38 | + */ |
34 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | 39 | + if (unlikely(!(flags & BH_DELETED))) { |
35 | + | 40 | + fprintf(stderr, "%s: BH '%s' leaked, aborting...\n", |
36 | #endif /* QEMU_AIO_WAIT */ | 41 | + __func__, bh->name); |
37 | diff --git a/util/aio-wait.c b/util/aio-wait.c | 42 | + abort(); |
38 | index XXXXXXX..XXXXXXX 100644 | 43 | + } |
39 | --- a/util/aio-wait.c | 44 | |
40 | +++ b/util/aio-wait.c | 45 | g_free(bh); |
41 | @@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait) | ||
42 | aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); | ||
43 | } | 46 | } |
44 | } | ||
45 | + | ||
46 | +typedef struct { | ||
47 | + AioWait wait; | ||
48 | + bool done; | ||
49 | + QEMUBHFunc *cb; | ||
50 | + void *opaque; | ||
51 | +} AioWaitBHData; | ||
52 | + | ||
53 | +/* Context: BH in IOThread */ | ||
54 | +static void aio_wait_bh(void *opaque) | ||
55 | +{ | ||
56 | + AioWaitBHData *data = opaque; | ||
57 | + | ||
58 | + data->cb(data->opaque); | ||
59 | + | ||
60 | + data->done = true; | ||
61 | + aio_wait_kick(&data->wait); | ||
62 | +} | ||
63 | + | ||
64 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
65 | +{ | ||
66 | + AioWaitBHData data = { | ||
67 | + .cb = cb, | ||
68 | + .opaque = opaque, | ||
69 | + }; | ||
70 | + | ||
71 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
72 | + | ||
73 | + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); | ||
74 | + AIO_WAIT_WHILE(&data.wait, ctx, !data.done); | ||
75 | +} | ||
76 | -- | 47 | -- |
77 | 2.14.3 | 48 | 2.31.1 |
78 | 49 | ||
79 | diff view generated by jsdifflib |
1 | Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa | ||
3 | ("iothread: Stop threads before main() quits") tried to work around the | ||
4 | fact that emulation was still active during termination by stopping | ||
5 | iothreads. They suffer from race conditions: | ||
6 | 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the | ||
7 | virtio_scsi_ctx_check() assertion failure because the BDS AioContext | ||
8 | has been modified by iothread_stop_all(). | ||
9 | 2. Guest vq kick racing with main loop termination leaves a readable | ||
10 | ioeventfd that is handled by the next aio_poll() when external | ||
11 | clients are enabled again, resulting in unwanted emulation activity. | ||
12 | 2 | ||
13 | This patch obsoletes those commits by fully disabling emulation activity | 3 | This commit introduces "punch hole" operation and optimizes transfer |
14 | when vcpus are stopped. | 4 | block size for macOS. |
15 | 5 | ||
16 | Use the new vm_shutdown() function instead of pause_all_vcpus() so that | 6 | Thanks to Konstantin Nazarov for detailed analysis of a flaw in an |
17 | vm change state handlers are invoked too. Virtio devices will now stop | 7 | old version of this change: |
18 | their ioeventfds, preventing further emulation activity after vm_stop(). | 8 | https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667 |
19 | 9 | ||
20 | Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a | 10 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> |
21 | QMP STOP event that may affect existing clients. | 11 | Message-id: 20210705130458.97642-1-akihiko.odaki@gmail.com |
22 | |||
23 | It is no longer necessary to call replay_disable_events() directly since | ||
24 | vm_shutdown() does so already. | ||
25 | |||
26 | Drop iothread_stop_all() since it is no longer used. | ||
27 | |||
28 | Cc: Fam Zheng <famz@redhat.com> | ||
29 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
30 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
31 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
32 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
33 | Message-id: 20180307144205.20619-5-stefanha@redhat.com | ||
34 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
35 | --- | 13 | --- |
36 | include/sysemu/iothread.h | 1 - | 14 | block/file-posix.c | 27 +++++++++++++++++++++++++-- |
37 | include/sysemu/sysemu.h | 1 + | 15 | 1 file changed, 25 insertions(+), 2 deletions(-) |
38 | cpus.c | 16 +++++++++++++--- | ||
39 | iothread.c | 31 ------------------------------- | ||
40 | vl.c | 13 +++---------- | ||
41 | 5 files changed, 17 insertions(+), 45 deletions(-) | ||
42 | 16 | ||
43 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h | 17 | diff --git a/block/file-posix.c b/block/file-posix.c |
44 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/include/sysemu/iothread.h | 19 | --- a/block/file-posix.c |
46 | +++ b/include/sysemu/iothread.h | 20 | +++ b/block/file-posix.c |
47 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 21 | @@ -XXX,XX +XXX,XX @@ |
48 | char *iothread_get_id(IOThread *iothread); | 22 | #if defined(HAVE_HOST_BLOCK_DEVICE) |
49 | IOThread *iothread_by_id(const char *id); | 23 | #include <paths.h> |
50 | AioContext *iothread_get_aio_context(IOThread *iothread); | 24 | #include <sys/param.h> |
51 | -void iothread_stop_all(void); | 25 | +#include <sys/mount.h> |
52 | GMainContext *iothread_get_g_main_context(IOThread *iothread); | 26 | #include <IOKit/IOKitLib.h> |
53 | 27 | #include <IOKit/IOBSD.h> | |
54 | /* | 28 | #include <IOKit/storage/IOMediaBSDClient.h> |
55 | diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h | 29 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) |
56 | index XXXXXXX..XXXXXXX 100644 | 30 | return; |
57 | --- a/include/sysemu/sysemu.h | 31 | } |
58 | +++ b/include/sysemu/sysemu.h | 32 | |
59 | @@ -XXX,XX +XXX,XX @@ void vm_start(void); | 33 | +#if defined(__APPLE__) && (__MACH__) |
60 | int vm_prepare_start(void); | 34 | + struct statfs buf; |
61 | int vm_stop(RunState state); | 35 | + |
62 | int vm_stop_force_state(RunState state); | 36 | + if (!fstatfs(s->fd, &buf)) { |
63 | +int vm_shutdown(void); | 37 | + bs->bl.opt_transfer = buf.f_iosize; |
64 | 38 | + bs->bl.pdiscard_alignment = buf.f_bsize; | |
65 | typedef enum WakeupReason { | 39 | + } |
66 | /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ | 40 | +#endif |
67 | diff --git a/cpus.c b/cpus.c | 41 | + |
68 | index XXXXXXX..XXXXXXX 100644 | 42 | if (bs->sg || S_ISBLK(st.st_mode)) { |
69 | --- a/cpus.c | 43 | int ret = hdev_get_max_hw_transfer(s->fd, &st); |
70 | +++ b/cpus.c | 44 | |
71 | @@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void) | 45 | @@ -XXX,XX +XXX,XX @@ out: |
72 | } | 46 | } |
73 | } | 47 | } |
74 | 48 | ||
75 | -static int do_vm_stop(RunState state) | 49 | +#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD) |
76 | +static int do_vm_stop(RunState state, bool send_stop) | 50 | static int translate_err(int err) |
77 | { | 51 | { |
78 | int ret = 0; | 52 | if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || |
79 | 53 | @@ -XXX,XX +XXX,XX @@ static int translate_err(int err) | |
80 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | 54 | } |
81 | pause_all_vcpus(); | 55 | return err; |
82 | runstate_set(state); | 56 | } |
83 | vm_state_notify(0, state); | 57 | +#endif |
84 | - qapi_event_send_stop(&error_abort); | 58 | |
85 | + if (send_stop) { | 59 | #ifdef CONFIG_FALLOCATE |
86 | + qapi_event_send_stop(&error_abort); | 60 | static int do_fallocate(int fd, int mode, off_t offset, off_t len) |
61 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque) | ||
62 | } | ||
63 | } while (errno == EINTR); | ||
64 | |||
65 | - ret = -errno; | ||
66 | + ret = translate_err(-errno); | ||
67 | #endif | ||
68 | } else { | ||
69 | #ifdef CONFIG_FALLOCATE_PUNCH_HOLE | ||
70 | ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, | ||
71 | aiocb->aio_offset, aiocb->aio_nbytes); | ||
72 | + ret = translate_err(-errno); | ||
73 | +#elif defined(__APPLE__) && (__MACH__) | ||
74 | + fpunchhole_t fpunchhole; | ||
75 | + fpunchhole.fp_flags = 0; | ||
76 | + fpunchhole.reserved = 0; | ||
77 | + fpunchhole.fp_offset = aiocb->aio_offset; | ||
78 | + fpunchhole.fp_length = aiocb->aio_nbytes; | ||
79 | + if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) { | ||
80 | + ret = errno == ENODEV ? -ENOTSUP : -errno; | ||
81 | + } else { | ||
82 | + ret = 0; | ||
87 | + } | 83 | + } |
84 | #endif | ||
88 | } | 85 | } |
89 | 86 | ||
90 | bdrv_drain_all(); | 87 | - ret = translate_err(ret); |
91 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | 88 | if (ret == -ENOTSUP) { |
92 | return ret; | 89 | s->has_discard = false; |
93 | } | ||
94 | |||
95 | +/* Special vm_stop() variant for terminating the process. Historically clients | ||
96 | + * did not expect a QMP STOP event and so we need to retain compatibility. | ||
97 | + */ | ||
98 | +int vm_shutdown(void) | ||
99 | +{ | ||
100 | + return do_vm_stop(RUN_STATE_SHUTDOWN, false); | ||
101 | +} | ||
102 | + | ||
103 | static bool cpu_can_run(CPUState *cpu) | ||
104 | { | ||
105 | if (cpu->stop) { | ||
106 | @@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state) | ||
107 | return 0; | ||
108 | } | 90 | } |
109 | |||
110 | - return do_vm_stop(state); | ||
111 | + return do_vm_stop(state, true); | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | diff --git a/iothread.c b/iothread.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/iothread.c | ||
118 | +++ b/iothread.c | ||
119 | @@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread) | ||
120 | qemu_thread_join(&iothread->thread); | ||
121 | } | ||
122 | |||
123 | -static int iothread_stop_iter(Object *object, void *opaque) | ||
124 | -{ | ||
125 | - IOThread *iothread; | ||
126 | - | ||
127 | - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); | ||
128 | - if (!iothread) { | ||
129 | - return 0; | ||
130 | - } | ||
131 | - iothread_stop(iothread); | ||
132 | - return 0; | ||
133 | -} | ||
134 | - | ||
135 | static void iothread_instance_init(Object *obj) | ||
136 | { | ||
137 | IOThread *iothread = IOTHREAD(obj); | ||
138 | @@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp) | ||
139 | return head; | ||
140 | } | ||
141 | |||
142 | -void iothread_stop_all(void) | ||
143 | -{ | ||
144 | - Object *container = object_get_objects_root(); | ||
145 | - BlockDriverState *bs; | ||
146 | - BdrvNextIterator it; | ||
147 | - | ||
148 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
149 | - AioContext *ctx = bdrv_get_aio_context(bs); | ||
150 | - if (ctx == qemu_get_aio_context()) { | ||
151 | - continue; | ||
152 | - } | ||
153 | - aio_context_acquire(ctx); | ||
154 | - bdrv_set_aio_context(bs, qemu_get_aio_context()); | ||
155 | - aio_context_release(ctx); | ||
156 | - } | ||
157 | - | ||
158 | - object_child_foreach(container, iothread_stop_iter, NULL); | ||
159 | -} | ||
160 | - | ||
161 | static gpointer iothread_g_main_context_init(gpointer opaque) | ||
162 | { | ||
163 | AioContext *ctx; | ||
164 | diff --git a/vl.c b/vl.c | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/vl.c | ||
167 | +++ b/vl.c | ||
168 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
169 | os_setup_post(); | ||
170 | |||
171 | main_loop(); | ||
172 | - replay_disable_events(); | ||
173 | |||
174 | - /* The ordering of the following is delicate. Stop vcpus to prevent new | ||
175 | - * I/O requests being queued by the guest. Then stop IOThreads (this | ||
176 | - * includes a drain operation and completes all request processing). At | ||
177 | - * this point emulated devices are still associated with their IOThreads | ||
178 | - * (if any) but no longer have any work to do. Only then can we close | ||
179 | - * block devices safely because we know there is no more I/O coming. | ||
180 | - */ | ||
181 | - pause_all_vcpus(); | ||
182 | - iothread_stop_all(); | ||
183 | + /* No more vcpu or device emulation activity beyond this point */ | ||
184 | + vm_shutdown(); | ||
185 | + | ||
186 | bdrv_close_all(); | ||
187 | |||
188 | res_free(); | ||
189 | -- | 91 | -- |
190 | 2.14.3 | 92 | 2.31.1 |
191 | 93 | ||
192 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | 2 | |
3 | the AioContext lock. By the time the vq handler is able to acquire the | 3 | backend_defaults property allow users to control if default block |
4 | AioContext lock the ioeventfd has already been removed and the handler | 4 | properties should be decided with backend information. |
5 | isn't supposed to run anymore! | 5 | |
6 | 6 | If it is off, any backend information will be discarded, which is | |
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 7 | suitable if you plan to perform live migration to a different disk backend. |
8 | from within the IOThread. This way no races with the vq handler are | 8 | |
9 | possible. | 9 | If it is on, a block device may utilize backend information more |
10 | 10 | aggressively. | |
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 12 | By default, it is auto, which uses backend information for block |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 13 | sizes and ignores the others, which is consistent with the older |
14 | Message-id: 20180307144205.20619-3-stefanha@redhat.com | 14 | versions. |
15 | |||
16 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | ||
17 | Message-id: 20210705130458.97642-2-akihiko.odaki@gmail.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 19 | --- |
17 | hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++------- | 20 | include/hw/block/block.h | 3 +++ |
18 | 1 file changed, 17 insertions(+), 7 deletions(-) | 21 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++++++---- |
19 | 22 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++++++ | |
20 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 23 | 3 files changed, 79 insertions(+), 4 deletions(-) |
24 | |||
25 | diff --git a/include/hw/block/block.h b/include/hw/block/block.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/block/dataplane/virtio-blk.c | 27 | --- a/include/hw/block/block.h |
23 | +++ b/hw/block/dataplane/virtio-blk.c | 28 | +++ b/include/hw/block/block.h |
24 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | 29 | @@ -XXX,XX +XXX,XX @@ |
25 | return -ENOSYS; | 30 | |
31 | typedef struct BlockConf { | ||
32 | BlockBackend *blk; | ||
33 | + OnOffAuto backend_defaults; | ||
34 | uint32_t physical_block_size; | ||
35 | uint32_t logical_block_size; | ||
36 | uint32_t min_io_size; | ||
37 | @@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) | ||
26 | } | 38 | } |
27 | 39 | ||
28 | +/* Stop notifications for new requests from guest. | 40 | #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ |
29 | + * | 41 | + DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \ |
30 | + * Context: BH in IOThread | 42 | + _conf.backend_defaults, ON_OFF_AUTO_AUTO), \ |
31 | + */ | 43 | DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ |
32 | +static void virtio_blk_data_plane_stop_bh(void *opaque) | 44 | _conf.logical_block_size), \ |
33 | +{ | 45 | DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ |
34 | + VirtIOBlockDataPlane *s = opaque; | 46 | diff --git a/hw/block/block.c b/hw/block/block.c |
35 | + unsigned i; | 47 | index XXXXXXX..XXXXXXX 100644 |
48 | --- a/hw/block/block.c | ||
49 | +++ b/hw/block/block.c | ||
50 | @@ -XXX,XX +XXX,XX @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) | ||
51 | { | ||
52 | BlockBackend *blk = conf->blk; | ||
53 | BlockSizes blocksizes; | ||
54 | - int backend_ret; | ||
55 | + BlockDriverState *bs; | ||
56 | + bool use_blocksizes; | ||
57 | + bool use_bs; | ||
36 | + | 58 | + |
37 | + for (i = 0; i < s->conf->num_queues; i++) { | 59 | + switch (conf->backend_defaults) { |
38 | + VirtQueue *vq = virtio_get_queue(s->vdev, i); | 60 | + case ON_OFF_AUTO_AUTO: |
61 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); | ||
62 | + use_bs = false; | ||
63 | + break; | ||
39 | + | 64 | + |
40 | + virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | 65 | + case ON_OFF_AUTO_ON: |
66 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); | ||
67 | + bs = blk_bs(blk); | ||
68 | + use_bs = bs; | ||
69 | + break; | ||
70 | + | ||
71 | + case ON_OFF_AUTO_OFF: | ||
72 | + use_blocksizes = false; | ||
73 | + use_bs = false; | ||
74 | + break; | ||
75 | + | ||
76 | + default: | ||
77 | + abort(); | ||
41 | + } | 78 | + } |
42 | +} | 79 | |
43 | + | 80 | - backend_ret = blk_probe_blocksizes(blk, &blocksizes); |
44 | /* Context: QEMU global mutex held */ | 81 | /* fill in detected values if they are not defined via qemu command line */ |
45 | void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 82 | if (!conf->physical_block_size) { |
46 | { | 83 | - if (!backend_ret) { |
47 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 84 | + if (use_blocksizes) { |
48 | trace_virtio_blk_data_plane_stop(s); | 85 | conf->physical_block_size = blocksizes.phys; |
49 | 86 | } else { | |
50 | aio_context_acquire(s->ctx); | 87 | conf->physical_block_size = BDRV_SECTOR_SIZE; |
51 | - | 88 | } |
52 | - /* Stop notifications for new requests from guest */ | 89 | } |
53 | - for (i = 0; i < nvqs; i++) { | 90 | if (!conf->logical_block_size) { |
54 | - VirtQueue *vq = virtio_get_queue(s->vdev, i); | 91 | - if (!backend_ret) { |
55 | - | 92 | + if (use_blocksizes) { |
56 | - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | 93 | conf->logical_block_size = blocksizes.log; |
57 | - } | 94 | } else { |
58 | + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | 95 | conf->logical_block_size = BDRV_SECTOR_SIZE; |
59 | 96 | } | |
60 | /* Drain and switch bs back to the QEMU main loop */ | 97 | } |
61 | blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); | 98 | + if (use_bs) { |
99 | + if (!conf->opt_io_size) { | ||
100 | + conf->opt_io_size = bs->bl.opt_transfer; | ||
101 | + } | ||
102 | + if (conf->discard_granularity == -1) { | ||
103 | + if (bs->bl.pdiscard_alignment) { | ||
104 | + conf->discard_granularity = bs->bl.pdiscard_alignment; | ||
105 | + } else if (bs->bl.request_alignment != 1) { | ||
106 | + conf->discard_granularity = bs->bl.request_alignment; | ||
107 | + } | ||
108 | + } | ||
109 | + } | ||
110 | |||
111 | if (conf->logical_block_size > conf->physical_block_size) { | ||
112 | error_setg(errp, | ||
113 | diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out | ||
114 | index XXXXXXX..XXXXXXX 100644 | ||
115 | --- a/tests/qemu-iotests/172.out | ||
116 | +++ b/tests/qemu-iotests/172.out | ||
117 | @@ -XXX,XX +XXX,XX @@ Testing: | ||
118 | dev: floppy, id "" | ||
119 | unit = 0 (0x0) | ||
120 | drive = "floppy0" | ||
121 | + backend_defaults = "auto" | ||
122 | logical_block_size = 512 (512 B) | ||
123 | physical_block_size = 512 (512 B) | ||
124 | min_io_size = 0 (0 B) | ||
125 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 | ||
126 | dev: floppy, id "" | ||
127 | unit = 0 (0x0) | ||
128 | drive = "floppy0" | ||
129 | + backend_defaults = "auto" | ||
130 | logical_block_size = 512 (512 B) | ||
131 | physical_block_size = 512 (512 B) | ||
132 | min_io_size = 0 (0 B) | ||
133 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 | ||
134 | dev: floppy, id "" | ||
135 | unit = 1 (0x1) | ||
136 | drive = "floppy1" | ||
137 | + backend_defaults = "auto" | ||
138 | logical_block_size = 512 (512 B) | ||
139 | physical_block_size = 512 (512 B) | ||
140 | min_io_size = 0 (0 B) | ||
141 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 | ||
142 | dev: floppy, id "" | ||
143 | unit = 0 (0x0) | ||
144 | drive = "floppy0" | ||
145 | + backend_defaults = "auto" | ||
146 | logical_block_size = 512 (512 B) | ||
147 | physical_block_size = 512 (512 B) | ||
148 | min_io_size = 0 (0 B) | ||
149 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 | ||
150 | dev: floppy, id "" | ||
151 | unit = 1 (0x1) | ||
152 | drive = "floppy1" | ||
153 | + backend_defaults = "auto" | ||
154 | logical_block_size = 512 (512 B) | ||
155 | physical_block_size = 512 (512 B) | ||
156 | min_io_size = 0 (0 B) | ||
157 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 | ||
158 | dev: floppy, id "" | ||
159 | unit = 0 (0x0) | ||
160 | drive = "floppy0" | ||
161 | + backend_defaults = "auto" | ||
162 | logical_block_size = 512 (512 B) | ||
163 | physical_block_size = 512 (512 B) | ||
164 | min_io_size = 0 (0 B) | ||
165 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb | ||
166 | dev: floppy, id "" | ||
167 | unit = 1 (0x1) | ||
168 | drive = "floppy1" | ||
169 | + backend_defaults = "auto" | ||
170 | logical_block_size = 512 (512 B) | ||
171 | physical_block_size = 512 (512 B) | ||
172 | min_io_size = 0 (0 B) | ||
173 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb | ||
174 | dev: floppy, id "" | ||
175 | unit = 0 (0x0) | ||
176 | drive = "floppy0" | ||
177 | + backend_defaults = "auto" | ||
178 | logical_block_size = 512 (512 B) | ||
179 | physical_block_size = 512 (512 B) | ||
180 | min_io_size = 0 (0 B) | ||
181 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 | ||
182 | dev: floppy, id "" | ||
183 | unit = 0 (0x0) | ||
184 | drive = "floppy0" | ||
185 | + backend_defaults = "auto" | ||
186 | logical_block_size = 512 (512 B) | ||
187 | physical_block_size = 512 (512 B) | ||
188 | min_io_size = 0 (0 B) | ||
189 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 | ||
190 | dev: floppy, id "" | ||
191 | unit = 1 (0x1) | ||
192 | drive = "floppy1" | ||
193 | + backend_defaults = "auto" | ||
194 | logical_block_size = 512 (512 B) | ||
195 | physical_block_size = 512 (512 B) | ||
196 | min_io_size = 0 (0 B) | ||
197 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 | ||
198 | dev: floppy, id "" | ||
199 | unit = 0 (0x0) | ||
200 | drive = "floppy0" | ||
201 | + backend_defaults = "auto" | ||
202 | logical_block_size = 512 (512 B) | ||
203 | physical_block_size = 512 (512 B) | ||
204 | min_io_size = 0 (0 B) | ||
205 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t | ||
206 | dev: floppy, id "" | ||
207 | unit = 1 (0x1) | ||
208 | drive = "floppy1" | ||
209 | + backend_defaults = "auto" | ||
210 | logical_block_size = 512 (512 B) | ||
211 | physical_block_size = 512 (512 B) | ||
212 | min_io_size = 0 (0 B) | ||
213 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t | ||
214 | dev: floppy, id "" | ||
215 | unit = 0 (0x0) | ||
216 | drive = "floppy0" | ||
217 | + backend_defaults = "auto" | ||
218 | logical_block_size = 512 (512 B) | ||
219 | physical_block_size = 512 (512 B) | ||
220 | min_io_size = 0 (0 B) | ||
221 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 | ||
222 | dev: floppy, id "" | ||
223 | unit = 0 (0x0) | ||
224 | drive = "none0" | ||
225 | + backend_defaults = "auto" | ||
226 | logical_block_size = 512 (512 B) | ||
227 | physical_block_size = 512 (512 B) | ||
228 | min_io_size = 0 (0 B) | ||
229 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 | ||
230 | dev: floppy, id "" | ||
231 | unit = 1 (0x1) | ||
232 | drive = "none0" | ||
233 | + backend_defaults = "auto" | ||
234 | logical_block_size = 512 (512 B) | ||
235 | physical_block_size = 512 (512 B) | ||
236 | min_io_size = 0 (0 B) | ||
237 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco | ||
238 | dev: floppy, id "" | ||
239 | unit = 1 (0x1) | ||
240 | drive = "none1" | ||
241 | + backend_defaults = "auto" | ||
242 | logical_block_size = 512 (512 B) | ||
243 | physical_block_size = 512 (512 B) | ||
244 | min_io_size = 0 (0 B) | ||
245 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco | ||
246 | dev: floppy, id "" | ||
247 | unit = 0 (0x0) | ||
248 | drive = "none0" | ||
249 | + backend_defaults = "auto" | ||
250 | logical_block_size = 512 (512 B) | ||
251 | physical_block_size = 512 (512 B) | ||
252 | min_io_size = 0 (0 B) | ||
253 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
254 | dev: floppy, id "" | ||
255 | unit = 1 (0x1) | ||
256 | drive = "none0" | ||
257 | + backend_defaults = "auto" | ||
258 | logical_block_size = 512 (512 B) | ||
259 | physical_block_size = 512 (512 B) | ||
260 | min_io_size = 0 (0 B) | ||
261 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
262 | dev: floppy, id "" | ||
263 | unit = 0 (0x0) | ||
264 | drive = "floppy0" | ||
265 | + backend_defaults = "auto" | ||
266 | logical_block_size = 512 (512 B) | ||
267 | physical_block_size = 512 (512 B) | ||
268 | min_io_size = 0 (0 B) | ||
269 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
270 | dev: floppy, id "" | ||
271 | unit = 1 (0x1) | ||
272 | drive = "none0" | ||
273 | + backend_defaults = "auto" | ||
274 | logical_block_size = 512 (512 B) | ||
275 | physical_block_size = 512 (512 B) | ||
276 | min_io_size = 0 (0 B) | ||
277 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
278 | dev: floppy, id "" | ||
279 | unit = 0 (0x0) | ||
280 | drive = "floppy0" | ||
281 | + backend_defaults = "auto" | ||
282 | logical_block_size = 512 (512 B) | ||
283 | physical_block_size = 512 (512 B) | ||
284 | min_io_size = 0 (0 B) | ||
285 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
286 | dev: floppy, id "" | ||
287 | unit = 0 (0x0) | ||
288 | drive = "none0" | ||
289 | + backend_defaults = "auto" | ||
290 | logical_block_size = 512 (512 B) | ||
291 | physical_block_size = 512 (512 B) | ||
292 | min_io_size = 0 (0 B) | ||
293 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
294 | dev: floppy, id "" | ||
295 | unit = 1 (0x1) | ||
296 | drive = "floppy1" | ||
297 | + backend_defaults = "auto" | ||
298 | logical_block_size = 512 (512 B) | ||
299 | physical_block_size = 512 (512 B) | ||
300 | min_io_size = 0 (0 B) | ||
301 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
302 | dev: floppy, id "" | ||
303 | unit = 0 (0x0) | ||
304 | drive = "none0" | ||
305 | + backend_defaults = "auto" | ||
306 | logical_block_size = 512 (512 B) | ||
307 | physical_block_size = 512 (512 B) | ||
308 | min_io_size = 0 (0 B) | ||
309 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
310 | dev: floppy, id "" | ||
311 | unit = 1 (0x1) | ||
312 | drive = "floppy1" | ||
313 | + backend_defaults = "auto" | ||
314 | logical_block_size = 512 (512 B) | ||
315 | physical_block_size = 512 (512 B) | ||
316 | min_io_size = 0 (0 B) | ||
317 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
318 | dev: floppy, id "" | ||
319 | unit = 1 (0x1) | ||
320 | drive = "none0" | ||
321 | + backend_defaults = "auto" | ||
322 | logical_block_size = 512 (512 B) | ||
323 | physical_block_size = 512 (512 B) | ||
324 | min_io_size = 0 (0 B) | ||
325 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
326 | dev: floppy, id "" | ||
327 | unit = 0 (0x0) | ||
328 | drive = "floppy0" | ||
329 | + backend_defaults = "auto" | ||
330 | logical_block_size = 512 (512 B) | ||
331 | physical_block_size = 512 (512 B) | ||
332 | min_io_size = 0 (0 B) | ||
333 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
334 | dev: floppy, id "" | ||
335 | unit = 1 (0x1) | ||
336 | drive = "none0" | ||
337 | + backend_defaults = "auto" | ||
338 | logical_block_size = 512 (512 B) | ||
339 | physical_block_size = 512 (512 B) | ||
340 | min_io_size = 0 (0 B) | ||
341 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
342 | dev: floppy, id "" | ||
343 | unit = 0 (0x0) | ||
344 | drive = "floppy0" | ||
345 | + backend_defaults = "auto" | ||
346 | logical_block_size = 512 (512 B) | ||
347 | physical_block_size = 512 (512 B) | ||
348 | min_io_size = 0 (0 B) | ||
349 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device | ||
350 | dev: floppy, id "" | ||
351 | unit = 0 (0x0) | ||
352 | drive = "none0" | ||
353 | + backend_defaults = "auto" | ||
354 | logical_block_size = 512 (512 B) | ||
355 | physical_block_size = 512 (512 B) | ||
356 | min_io_size = 0 (0 B) | ||
357 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy | ||
358 | dev: floppy, id "" | ||
359 | unit = 0 (0x0) | ||
360 | drive = "" | ||
361 | + backend_defaults = "auto" | ||
362 | logical_block_size = 512 (512 B) | ||
363 | physical_block_size = 512 (512 B) | ||
364 | min_io_size = 0 (0 B) | ||
365 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=120 | ||
366 | dev: floppy, id "" | ||
367 | unit = 0 (0x0) | ||
368 | drive = "" | ||
369 | + backend_defaults = "auto" | ||
370 | logical_block_size = 512 (512 B) | ||
371 | physical_block_size = 512 (512 B) | ||
372 | min_io_size = 0 (0 B) | ||
373 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=144 | ||
374 | dev: floppy, id "" | ||
375 | unit = 0 (0x0) | ||
376 | drive = "" | ||
377 | + backend_defaults = "auto" | ||
378 | logical_block_size = 512 (512 B) | ||
379 | physical_block_size = 512 (512 B) | ||
380 | min_io_size = 0 (0 B) | ||
381 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=288 | ||
382 | dev: floppy, id "" | ||
383 | unit = 0 (0x0) | ||
384 | drive = "" | ||
385 | + backend_defaults = "auto" | ||
386 | logical_block_size = 512 (512 B) | ||
387 | physical_block_size = 512 (512 B) | ||
388 | min_io_size = 0 (0 B) | ||
389 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t | ||
390 | dev: floppy, id "" | ||
391 | unit = 0 (0x0) | ||
392 | drive = "none0" | ||
393 | + backend_defaults = "auto" | ||
394 | logical_block_size = 512 (512 B) | ||
395 | physical_block_size = 512 (512 B) | ||
396 | min_io_size = 0 (0 B) | ||
397 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t | ||
398 | dev: floppy, id "" | ||
399 | unit = 0 (0x0) | ||
400 | drive = "none0" | ||
401 | + backend_defaults = "auto" | ||
402 | logical_block_size = 512 (512 B) | ||
403 | physical_block_size = 512 (512 B) | ||
404 | min_io_size = 0 (0 B) | ||
405 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical | ||
406 | dev: floppy, id "" | ||
407 | unit = 0 (0x0) | ||
408 | drive = "none0" | ||
409 | + backend_defaults = "auto" | ||
410 | logical_block_size = 512 (512 B) | ||
411 | physical_block_size = 512 (512 B) | ||
412 | min_io_size = 0 (0 B) | ||
413 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica | ||
414 | dev: floppy, id "" | ||
415 | unit = 0 (0x0) | ||
416 | drive = "none0" | ||
417 | + backend_defaults = "auto" | ||
418 | logical_block_size = 512 (512 B) | ||
419 | physical_block_size = 512 (512 B) | ||
420 | min_io_size = 0 (0 B) | ||
62 | -- | 421 | -- |
63 | 2.14.3 | 422 | 2.31.1 |
64 | 423 | ||
65 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane: | 3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> |
4 | notify guest as a batch") deferred guest notification to a BH in order | 4 | Message-id: 20210705130458.97642-3-akihiko.odaki@gmail.com |
5 | batch notifications, with purpose of avoiding flooding the guest with | ||
6 | interruptions. | ||
7 | |||
8 | This optimization came with a cost. The average latency perceived in the | ||
9 | guest is increased by a few microseconds, but also when multiple IO | ||
10 | operations finish at the same time, the guest won't be notified until | ||
11 | all completions from each operation has been run. On the contrary, | ||
12 | virtio-scsi issues the notification at the end of each completion. | ||
13 | |||
14 | On the other hand, nowadays we have the EVENT_IDX feature that allows a | ||
15 | better coordination between QEMU and the Guest OS to avoid sending | ||
16 | unnecessary interruptions. | ||
17 | |||
18 | With this change, virtio-blk/dataplane only batches notifications if the | ||
19 | EVENT_IDX feature is not present. | ||
20 | |||
21 | Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1): | ||
22 | - Test specs: | ||
23 | * fio-3.4 (ioengine=sync, iodepth=1, direct=1) | ||
24 | * qemu master | ||
25 | * virtio-blk with a dedicated iothread (default poll-max-ns) | ||
26 | * backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000 | ||
27 | * 8 vCPUs pinned to isolated physical cores | ||
28 | * Emulator and iothread also pinned to separate isolated cores | ||
29 | * variance between runs < 1% | ||
30 | |||
31 | - Not patched | ||
32 | * numjobs=1: lat_avg=327.32 irqs=29998 | ||
33 | * numjobs=4: lat_avg=337.89 irqs=29073 | ||
34 | * numjobs=8: lat_avg=342.98 irqs=28643 | ||
35 | |||
36 | - Patched: | ||
37 | * numjobs=1: lat_avg=323.92 irqs=30262 | ||
38 | * numjobs=4: lat_avg=332.65 irqs=29520 | ||
39 | * numjobs=8: lat_avg=335.54 irqs=29323 | ||
40 | |||
41 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
42 | Message-id: 20180307114459.26636-1-slp@redhat.com | ||
43 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
44 | --- | 6 | --- |
45 | hw/block/dataplane/virtio-blk.c | 15 +++++++++++++-- | 7 | block/io.c | 2 ++ |
46 | 1 file changed, 13 insertions(+), 2 deletions(-) | 8 | 1 file changed, 2 insertions(+) |
47 | 9 | ||
48 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 10 | diff --git a/block/io.c b/block/io.c |
49 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/hw/block/dataplane/virtio-blk.c | 12 | --- a/block/io.c |
51 | +++ b/hw/block/dataplane/virtio-blk.c | 13 | +++ b/block/io.c |
52 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) |
53 | VirtIODevice *vdev; | 15 | |
54 | QEMUBH *bh; /* bh for guest notification */ | 16 | static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) |
55 | unsigned long *batch_notify_vqs; | ||
56 | + bool batch_notifications; | ||
57 | |||
58 | /* Note that these EventNotifiers are assigned by value. This is | ||
59 | * fine as long as you do not call event_notifier_cleanup on them | ||
60 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | ||
61 | /* Raise an interrupt to signal guest, if necessary */ | ||
62 | void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) | ||
63 | { | 17 | { |
64 | - set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | 18 | + dst->pdiscard_alignment = MAX(dst->pdiscard_alignment, |
65 | - qemu_bh_schedule(s->bh); | 19 | + src->pdiscard_alignment); |
66 | + if (s->batch_notifications) { | 20 | dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); |
67 | + set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | 21 | dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); |
68 | + qemu_bh_schedule(s->bh); | 22 | dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, |
69 | + } else { | ||
70 | + virtio_notify_irqfd(s->vdev, vq); | ||
71 | + } | ||
72 | } | ||
73 | |||
74 | static void notify_guest_bh(void *opaque) | ||
75 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
76 | |||
77 | s->starting = true; | ||
78 | |||
79 | + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { | ||
80 | + s->batch_notifications = true; | ||
81 | + } else { | ||
82 | + s->batch_notifications = false; | ||
83 | + } | ||
84 | + | ||
85 | /* Set up guest notifier (irq) */ | ||
86 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); | ||
87 | if (r != 0) { | ||
88 | -- | 23 | -- |
89 | 2.14.3 | 24 | 2.31.1 |
90 | 25 | ||
91 | diff view generated by jsdifflib |