1 | The following changes since commit 9964e96dc9999cf7f7c936ee854a795415d19b60: | 1 | The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'jasowang/tags/net-pull-request' into staging (2017-05-23 15:01:31 +0100) | 3 | Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | git://github.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 223a23c198787328ae75bc65d84edf5fde33c0b6: | 9 | for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268: |
10 | 10 | ||
11 | block/gluster: glfs_lseek() workaround (2017-05-24 16:44:46 -0400) | 11 | vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block patches | 14 | |
15 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
16 | 16 | ||
17 | Jeff Cody (1): | 17 | Deepa Srinivasan (1): |
18 | block/gluster: glfs_lseek() workaround | 18 | block: Fix qemu crash when using scsi-block |
19 | 19 | ||
20 | Paolo Bonzini (11): | 20 | Fam Zheng (1): |
21 | blockjob: remove unnecessary check | 21 | README: Fix typo 'git-publish' |
22 | blockjob: remove iostatus_reset callback | ||
23 | blockjob: introduce block_job_early_fail | ||
24 | blockjob: introduce block_job_pause/resume_all | ||
25 | blockjob: separate monitor and blockjob APIs | ||
26 | blockjob: move iostatus reset inside block_job_user_resume | ||
27 | blockjob: introduce block_job_cancel_async, check iostatus invariants | ||
28 | blockjob: group BlockJob transaction functions together | ||
29 | blockjob: strengthen a bit test-blockjob-txn | ||
30 | blockjob: reorganize block_job_completed_txn_abort | ||
31 | blockjob: use deferred_to_main_loop to indicate the coroutine has | ||
32 | ended | ||
33 | 22 | ||
34 | block/backup.c | 2 +- | 23 | Sergio Lopez (1): |
35 | block/commit.c | 2 +- | 24 | virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is |
36 | block/gluster.c | 18 +- | 25 | present |
37 | block/io.c | 19 +- | 26 | |
38 | block/mirror.c | 2 +- | 27 | Stefan Hajnoczi (4): |
39 | blockdev.c | 1 - | 28 | block: add aio_wait_bh_oneshot() |
40 | blockjob.c | 750 ++++++++++++++++++++++++------------------- | 29 | virtio-blk: fix race between .ioeventfd_stop() and vq handler |
41 | include/block/blockjob.h | 16 - | 30 | virtio-scsi: fix race between .ioeventfd_stop() and vq handler |
42 | include/block/blockjob_int.h | 27 +- | 31 | vl: introduce vm_shutdown() |
43 | tests/test-blockjob-txn.c | 7 +- | 32 | |
44 | tests/test-blockjob.c | 10 +- | 33 | include/block/aio-wait.h | 13 +++++++++++ |
45 | 11 files changed, 463 insertions(+), 391 deletions(-) | 34 | include/sysemu/iothread.h | 1 - |
35 | include/sysemu/sysemu.h | 1 + | ||
36 | block/block-backend.c | 51 ++++++++++++++++++++--------------------- | ||
37 | cpus.c | 16 ++++++++++--- | ||
38 | hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++-------- | ||
39 | hw/scsi/virtio-scsi-dataplane.c | 9 ++++---- | ||
40 | iothread.c | 31 ------------------------- | ||
41 | util/aio-wait.c | 31 +++++++++++++++++++++++++ | ||
42 | vl.c | 13 +++-------- | ||
43 | README | 2 +- | ||
44 | 11 files changed, 122 insertions(+), 85 deletions(-) | ||
46 | 45 | ||
47 | -- | 46 | -- |
48 | 2.9.3 | 47 | 2.14.3 |
49 | 48 | ||
50 | 49 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Deepa Srinivasan <deepa.srinivasan@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Yet another pure code movement patch, preparing for the next change. | 3 | Starting qemu with the following arguments causes qemu to segfault: |
4 | ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= | ||
5 | iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 | ||
4 | 6 | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | This patch fixes blk_aio_ioctl() so it does not pass stack addresses to |
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More |
7 | Message-id: 20170508141310.8674-9-pbonzini@redhat.com | 9 | details about the bug follow. |
8 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 10 | |
11 | blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the | ||
12 | coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). | ||
13 | |||
14 | When blk_aio_ioctl() is executed from within a coroutine context (e.g. | ||
15 | iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to | ||
16 | the current coroutine's wakeup queue. blk_aio_ioctl() then returns. | ||
17 | |||
18 | When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: | ||
19 | .... | ||
20 | BlkRwCo *rwco = &acb->rwco; | ||
21 | |||
22 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
23 | rwco->qiov->iov[0].iov_base); <--- qiov is | ||
24 | invalid here | ||
25 | ... | ||
26 | |||
27 | In the case when blk_aio_ioctl() is called from a non-coroutine context, | ||
28 | blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls | ||
29 | qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine | ||
30 | execution is complete, control returns to blk_aio_ioctl_entry() after the call | ||
31 | to blk_co_ioctl(). There is no invalid reference after this point, but the | ||
32 | function is still holding on to invalid pointers. | ||
33 | |||
34 | The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer | ||
35 | rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the | ||
36 | coroutine function casts it to QEMUIOVector or uses the void pointer directly. | ||
37 | |||
38 | Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
39 | Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
40 | Reviewed-by: Mark Kanda <mark.kanda@oracle.com> | ||
41 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
42 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | 43 | --- |
10 | blockjob.c | 128 ++++++++++++++++++++++++++++++------------------------------- | 44 | block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- |
11 | 1 file changed, 64 insertions(+), 64 deletions(-) | 45 | 1 file changed, 25 insertions(+), 26 deletions(-) |
12 | 46 | ||
13 | diff --git a/blockjob.c b/blockjob.c | 47 | diff --git a/block/block-backend.c b/block/block-backend.c |
14 | index XXXXXXX..XXXXXXX 100644 | 48 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/blockjob.c | 49 | --- a/block/block-backend.c |
16 | +++ b/blockjob.c | 50 | +++ b/block/block-backend.c |
17 | @@ -XXX,XX +XXX,XX @@ BlockJob *block_job_get(const char *id) | 51 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, |
18 | return NULL; | 52 | typedef struct BlkRwCo { |
53 | BlockBackend *blk; | ||
54 | int64_t offset; | ||
55 | - QEMUIOVector *qiov; | ||
56 | + void *iobuf; | ||
57 | int ret; | ||
58 | BdrvRequestFlags flags; | ||
59 | } BlkRwCo; | ||
60 | @@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo { | ||
61 | static void blk_read_entry(void *opaque) | ||
62 | { | ||
63 | BlkRwCo *rwco = opaque; | ||
64 | + QEMUIOVector *qiov = rwco->iobuf; | ||
65 | |||
66 | - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, | ||
67 | - rwco->qiov, rwco->flags); | ||
68 | + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, | ||
69 | + qiov, rwco->flags); | ||
19 | } | 70 | } |
20 | 71 | ||
21 | +BlockJobTxn *block_job_txn_new(void) | 72 | static void blk_write_entry(void *opaque) |
22 | +{ | 73 | { |
23 | + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); | 74 | BlkRwCo *rwco = opaque; |
24 | + QLIST_INIT(&txn->jobs); | 75 | + QEMUIOVector *qiov = rwco->iobuf; |
25 | + txn->refcnt = 1; | 76 | |
26 | + return txn; | 77 | - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, |
27 | +} | 78 | - rwco->qiov, rwco->flags); |
79 | + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, | ||
80 | + qiov, rwco->flags); | ||
81 | } | ||
82 | |||
83 | static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
84 | @@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
85 | rwco = (BlkRwCo) { | ||
86 | .blk = blk, | ||
87 | .offset = offset, | ||
88 | - .qiov = &qiov, | ||
89 | + .iobuf = &qiov, | ||
90 | .flags = flags, | ||
91 | .ret = NOT_DONE, | ||
92 | }; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque) | ||
94 | } | ||
95 | |||
96 | static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
97 | - QEMUIOVector *qiov, CoroutineEntry co_entry, | ||
98 | + void *iobuf, CoroutineEntry co_entry, | ||
99 | BdrvRequestFlags flags, | ||
100 | BlockCompletionFunc *cb, void *opaque) | ||
101 | { | ||
102 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
103 | acb->rwco = (BlkRwCo) { | ||
104 | .blk = blk, | ||
105 | .offset = offset, | ||
106 | - .qiov = qiov, | ||
107 | + .iobuf = iobuf, | ||
108 | .flags = flags, | ||
109 | .ret = NOT_DONE, | ||
110 | }; | ||
111 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque) | ||
112 | { | ||
113 | BlkAioEmAIOCB *acb = opaque; | ||
114 | BlkRwCo *rwco = &acb->rwco; | ||
115 | + QEMUIOVector *qiov = rwco->iobuf; | ||
116 | |||
117 | - assert(rwco->qiov->size == acb->bytes); | ||
118 | + assert(qiov->size == acb->bytes); | ||
119 | rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, | ||
120 | - rwco->qiov, rwco->flags); | ||
121 | + qiov, rwco->flags); | ||
122 | blk_aio_complete(acb); | ||
123 | } | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque) | ||
126 | { | ||
127 | BlkAioEmAIOCB *acb = opaque; | ||
128 | BlkRwCo *rwco = &acb->rwco; | ||
129 | + QEMUIOVector *qiov = rwco->iobuf; | ||
130 | |||
131 | - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); | ||
132 | + assert(!qiov || qiov->size == acb->bytes); | ||
133 | rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, | ||
134 | - rwco->qiov, rwco->flags); | ||
135 | + qiov, rwco->flags); | ||
136 | blk_aio_complete(acb); | ||
137 | } | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
140 | static void blk_ioctl_entry(void *opaque) | ||
141 | { | ||
142 | BlkRwCo *rwco = opaque; | ||
143 | + QEMUIOVector *qiov = rwco->iobuf; | ||
28 | + | 144 | + |
29 | +static void block_job_txn_ref(BlockJobTxn *txn) | 145 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, |
30 | +{ | 146 | - rwco->qiov->iov[0].iov_base); |
31 | + txn->refcnt++; | 147 | + qiov->iov[0].iov_base); |
32 | +} | 148 | } |
149 | |||
150 | int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
151 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque) | ||
152 | BlkAioEmAIOCB *acb = opaque; | ||
153 | BlkRwCo *rwco = &acb->rwco; | ||
154 | |||
155 | - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
156 | - rwco->qiov->iov[0].iov_base); | ||
157 | + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); | ||
33 | + | 158 | + |
34 | +void block_job_txn_unref(BlockJobTxn *txn) | 159 | blk_aio_complete(acb); |
35 | +{ | 160 | } |
36 | + if (txn && --txn->refcnt == 0) { | 161 | |
37 | + g_free(txn); | 162 | BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, |
38 | + } | 163 | BlockCompletionFunc *cb, void *opaque) |
39 | +} | 164 | { |
165 | - QEMUIOVector qiov; | ||
166 | - struct iovec iov; | ||
167 | - | ||
168 | - iov = (struct iovec) { | ||
169 | - .iov_base = buf, | ||
170 | - .iov_len = 0, | ||
171 | - }; | ||
172 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
173 | - | ||
174 | - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); | ||
175 | + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); | ||
176 | } | ||
177 | |||
178 | int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
179 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, | ||
180 | static void blk_pdiscard_entry(void *opaque) | ||
181 | { | ||
182 | BlkRwCo *rwco = opaque; | ||
183 | - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); | ||
184 | + QEMUIOVector *qiov = rwco->iobuf; | ||
40 | + | 185 | + |
41 | +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) | 186 | + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); |
42 | +{ | ||
43 | + if (!txn) { | ||
44 | + return; | ||
45 | + } | ||
46 | + | ||
47 | + assert(!job->txn); | ||
48 | + job->txn = txn; | ||
49 | + | ||
50 | + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); | ||
51 | + block_job_txn_ref(txn); | ||
52 | +} | ||
53 | + | ||
54 | static void block_job_pause(BlockJob *job) | ||
55 | { | ||
56 | job->pause_count++; | ||
57 | @@ -XXX,XX +XXX,XX @@ static void block_job_cancel_async(BlockJob *job) | ||
58 | job->cancelled = true; | ||
59 | } | 187 | } |
60 | 188 | ||
61 | +static int block_job_finish_sync(BlockJob *job, | 189 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) |
62 | + void (*finish)(BlockJob *, Error **errp), | ||
63 | + Error **errp) | ||
64 | +{ | ||
65 | + Error *local_err = NULL; | ||
66 | + int ret; | ||
67 | + | ||
68 | + assert(blk_bs(job->blk)->job == job); | ||
69 | + | ||
70 | + block_job_ref(job); | ||
71 | + | ||
72 | + finish(job, &local_err); | ||
73 | + if (local_err) { | ||
74 | + error_propagate(errp, local_err); | ||
75 | + block_job_unref(job); | ||
76 | + return -EBUSY; | ||
77 | + } | ||
78 | + /* block_job_drain calls block_job_enter, and it should be enough to | ||
79 | + * induce progress until the job completes or moves to the main thread. | ||
80 | + */ | ||
81 | + while (!job->deferred_to_main_loop && !job->completed) { | ||
82 | + block_job_drain(job); | ||
83 | + } | ||
84 | + while (!job->completed) { | ||
85 | + aio_poll(qemu_get_aio_context(), true); | ||
86 | + } | ||
87 | + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; | ||
88 | + block_job_unref(job); | ||
89 | + return ret; | ||
90 | +} | ||
91 | + | ||
92 | static void block_job_completed_txn_abort(BlockJob *job) | ||
93 | { | ||
94 | AioContext *ctx; | ||
95 | @@ -XXX,XX +XXX,XX @@ void block_job_cancel(BlockJob *job) | ||
96 | } | ||
97 | } | ||
98 | |||
99 | -static int block_job_finish_sync(BlockJob *job, | ||
100 | - void (*finish)(BlockJob *, Error **errp), | ||
101 | - Error **errp) | ||
102 | -{ | ||
103 | - Error *local_err = NULL; | ||
104 | - int ret; | ||
105 | - | ||
106 | - assert(blk_bs(job->blk)->job == job); | ||
107 | - | ||
108 | - block_job_ref(job); | ||
109 | - | ||
110 | - finish(job, &local_err); | ||
111 | - if (local_err) { | ||
112 | - error_propagate(errp, local_err); | ||
113 | - block_job_unref(job); | ||
114 | - return -EBUSY; | ||
115 | - } | ||
116 | - /* block_job_drain calls block_job_enter, and it should be enough to | ||
117 | - * induce progress until the job completes or moves to the main thread. | ||
118 | - */ | ||
119 | - while (!job->deferred_to_main_loop && !job->completed) { | ||
120 | - block_job_drain(job); | ||
121 | - } | ||
122 | - while (!job->completed) { | ||
123 | - aio_poll(qemu_get_aio_context(), true); | ||
124 | - } | ||
125 | - ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; | ||
126 | - block_job_unref(job); | ||
127 | - return ret; | ||
128 | -} | ||
129 | - | ||
130 | /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be | ||
131 | * used with block_job_finish_sync() without the need for (rather nasty) | ||
132 | * function pointer casts there. */ | ||
133 | @@ -XXX,XX +XXX,XX @@ void block_job_defer_to_main_loop(BlockJob *job, | ||
134 | aio_bh_schedule_oneshot(qemu_get_aio_context(), | ||
135 | block_job_defer_to_main_loop_bh, data); | ||
136 | } | ||
137 | - | ||
138 | -BlockJobTxn *block_job_txn_new(void) | ||
139 | -{ | ||
140 | - BlockJobTxn *txn = g_new0(BlockJobTxn, 1); | ||
141 | - QLIST_INIT(&txn->jobs); | ||
142 | - txn->refcnt = 1; | ||
143 | - return txn; | ||
144 | -} | ||
145 | - | ||
146 | -static void block_job_txn_ref(BlockJobTxn *txn) | ||
147 | -{ | ||
148 | - txn->refcnt++; | ||
149 | -} | ||
150 | - | ||
151 | -void block_job_txn_unref(BlockJobTxn *txn) | ||
152 | -{ | ||
153 | - if (txn && --txn->refcnt == 0) { | ||
154 | - g_free(txn); | ||
155 | - } | ||
156 | -} | ||
157 | - | ||
158 | -void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) | ||
159 | -{ | ||
160 | - if (!txn) { | ||
161 | - return; | ||
162 | - } | ||
163 | - | ||
164 | - assert(!job->txn); | ||
165 | - job->txn = txn; | ||
166 | - | ||
167 | - QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); | ||
168 | - block_job_txn_ref(txn); | ||
169 | -} | ||
170 | -- | 190 | -- |
171 | 2.9.3 | 191 | 2.14.3 |
172 | 192 | ||
173 | 193 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | !job is always checked prior to the call, drop it from here. | 3 | Reported-by: Alberto Garcia <berto@igalia.com> |
4 | 4 | Signed-off-by: Fam Zheng <famz@redhat.com> | |
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 6 | Message-id: 20180306024328.19195-1-famz@redhat.com |
7 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Message-id: 20170508141310.8674-2-pbonzini@redhat.com | ||
9 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
10 | --- | 8 | --- |
11 | blockjob.c | 2 +- | 9 | README | 2 +- |
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | 10 | 1 file changed, 1 insertion(+), 1 deletion(-) |
13 | 11 | ||
14 | diff --git a/blockjob.c b/blockjob.c | 12 | diff --git a/README b/README |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/blockjob.c | 14 | --- a/README |
17 | +++ b/blockjob.c | 15 | +++ b/README |
18 | @@ -XXX,XX +XXX,XX @@ static bool block_job_should_pause(BlockJob *job) | 16 | @@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control. |
19 | 17 | git clone git://git.qemu.org/qemu-web.git | |
20 | bool block_job_user_paused(BlockJob *job) | 18 | https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ |
21 | { | 19 | |
22 | - return job ? job->user_paused : 0; | 20 | -A 'git-profile' utility was created to make above process less |
23 | + return job->user_paused; | 21 | +A 'git-publish' utility was created to make above process less |
24 | } | 22 | cumbersome, and is highly recommended for making regular contributions, |
25 | 23 | or even just for sending consecutive patch series revisions. It also | |
26 | void coroutine_fn block_job_pause_point(BlockJob *job) | 24 | requires a working 'git send-email' setup, and by default doesn't |
27 | -- | 25 | -- |
28 | 2.9.3 | 26 | 2.14.3 |
29 | 27 | ||
30 | 28 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | This is unused since commit 66a0fae ("blockjob: Don't touch BDS iostatus", | ||
4 | 2016-05-19). | ||
5 | |||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
11 | Message-id: 20170508141310.8674-3-pbonzini@redhat.com | ||
12 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
13 | --- | ||
14 | blockjob.c | 3 --- | ||
15 | include/block/blockjob_int.h | 3 --- | ||
16 | 2 files changed, 6 deletions(-) | ||
17 | |||
18 | diff --git a/blockjob.c b/blockjob.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/blockjob.c | ||
21 | +++ b/blockjob.c | ||
22 | @@ -XXX,XX +XXX,XX @@ bool block_job_is_cancelled(BlockJob *job) | ||
23 | void block_job_iostatus_reset(BlockJob *job) | ||
24 | { | ||
25 | job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; | ||
26 | - if (job->driver->iostatus_reset) { | ||
27 | - job->driver->iostatus_reset(job); | ||
28 | - } | ||
29 | } | ||
30 | |||
31 | static int block_job_finish_sync(BlockJob *job, | ||
32 | diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/include/block/blockjob_int.h | ||
35 | +++ b/include/block/blockjob_int.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct BlockJobDriver { | ||
37 | /** Optional callback for job types that support setting a speed limit */ | ||
38 | void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); | ||
39 | |||
40 | - /** Optional callback for job types that need to forward I/O status reset */ | ||
41 | - void (*iostatus_reset)(BlockJob *job); | ||
42 | - | ||
43 | /** Mandatory: Entrypoint for the Coroutine. */ | ||
44 | CoroutineEntry *start; | ||
45 | |||
46 | -- | ||
47 | 2.9.3 | ||
48 | |||
49 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Sergio Lopez <slp@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | All block jobs are using block_job_defer_to_main_loop as the final | 3 | Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane: |
4 | step just before the coroutine terminates. At this point, | 4 | notify guest as a batch") deferred guest notification to a BH in order |
5 | block_job_enter should do nothing, but currently it restarts | 5 | batch notifications, with purpose of avoiding flooding the guest with |
6 | the freed coroutine. | 6 | interruptions. |
7 | 7 | ||
8 | Now, the job->co states should probably be changed to an enum | 8 | This optimization came with a cost. The average latency perceived in the |
9 | (e.g. BEFORE_START, STARTED, YIELDED, COMPLETED) subsuming | 9 | guest is increased by a few microseconds, but also when multiple IO |
10 | block_job_started, job->deferred_to_main_loop and job->busy. | 10 | operations finish at the same time, the guest won't be notified until |
11 | For now, this patch eliminates the problematic reenter by | 11 | all completions from each operation has been run. On the contrary, |
12 | removing the reset of job->deferred_to_main_loop (which served | 12 | virtio-scsi issues the notification at the end of each completion. |
13 | no purpose, as far as I could see) and checking the flag in | ||
14 | block_job_enter. | ||
15 | 13 | ||
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | On the other hand, nowadays we have the EVENT_IDX feature that allows a |
17 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 15 | better coordination between QEMU and the Guest OS to avoid sending |
18 | Message-id: 20170508141310.8674-12-pbonzini@redhat.com | 16 | unnecessary interruptions. |
19 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 17 | |
18 | With this change, virtio-blk/dataplane only batches notifications if the | ||
19 | EVENT_IDX feature is not present. | ||
20 | |||
21 | Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1): | ||
22 | - Test specs: | ||
23 | * fio-3.4 (ioengine=sync, iodepth=1, direct=1) | ||
24 | * qemu master | ||
25 | * virtio-blk with a dedicated iothread (default poll-max-ns) | ||
26 | * backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000 | ||
27 | * 8 vCPUs pinned to isolated physical cores | ||
28 | * Emulator and iothread also pinned to separate isolated cores | ||
29 | * variance between runs < 1% | ||
30 | |||
31 | - Not patched | ||
32 | * numjobs=1: lat_avg=327.32 irqs=29998 | ||
33 | * numjobs=4: lat_avg=337.89 irqs=29073 | ||
34 | * numjobs=8: lat_avg=342.98 irqs=28643 | ||
35 | |||
36 | - Patched: | ||
37 | * numjobs=1: lat_avg=323.92 irqs=30262 | ||
38 | * numjobs=4: lat_avg=332.65 irqs=29520 | ||
39 | * numjobs=8: lat_avg=335.54 irqs=29323 | ||
40 | |||
41 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
42 | Message-id: 20180307114459.26636-1-slp@redhat.com | ||
43 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | --- | 44 | --- |
21 | blockjob.c | 10 ++++++++-- | 45 | hw/block/dataplane/virtio-blk.c | 15 +++++++++++++-- |
22 | include/block/blockjob_int.h | 3 ++- | 46 | 1 file changed, 13 insertions(+), 2 deletions(-) |
23 | 2 files changed, 10 insertions(+), 3 deletions(-) | ||
24 | 47 | ||
25 | diff --git a/blockjob.c b/blockjob.c | 48 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
26 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/blockjob.c | 50 | --- a/hw/block/dataplane/virtio-blk.c |
28 | +++ b/blockjob.c | 51 | +++ b/hw/block/dataplane/virtio-blk.c |
29 | @@ -XXX,XX +XXX,XX @@ void block_job_resume_all(void) | 52 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { |
30 | 53 | VirtIODevice *vdev; | |
31 | void block_job_enter(BlockJob *job) | 54 | QEMUBH *bh; /* bh for guest notification */ |
55 | unsigned long *batch_notify_vqs; | ||
56 | + bool batch_notifications; | ||
57 | |||
58 | /* Note that these EventNotifiers are assigned by value. This is | ||
59 | * fine as long as you do not call event_notifier_cleanup on them | ||
60 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | ||
61 | /* Raise an interrupt to signal guest, if necessary */ | ||
62 | void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) | ||
32 | { | 63 | { |
33 | - if (job->co && !job->busy) { | 64 | - set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); |
34 | + if (!block_job_started(job)) { | 65 | - qemu_bh_schedule(s->bh); |
35 | + return; | 66 | + if (s->batch_notifications) { |
67 | + set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | ||
68 | + qemu_bh_schedule(s->bh); | ||
69 | + } else { | ||
70 | + virtio_notify_irqfd(s->vdev, vq); | ||
36 | + } | 71 | + } |
37 | + if (job->deferred_to_main_loop) { | 72 | } |
38 | + return; | 73 | |
74 | static void notify_guest_bh(void *opaque) | ||
75 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
76 | |||
77 | s->starting = true; | ||
78 | |||
79 | + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { | ||
80 | + s->batch_notifications = true; | ||
81 | + } else { | ||
82 | + s->batch_notifications = false; | ||
39 | + } | 83 | + } |
40 | + | 84 | + |
41 | + if (!job->busy) { | 85 | /* Set up guest notifier (irq) */ |
42 | bdrv_coroutine_enter(blk_bs(job->blk), job->co); | 86 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); |
43 | } | 87 | if (r != 0) { |
44 | } | ||
45 | @@ -XXX,XX +XXX,XX @@ static void block_job_defer_to_main_loop_bh(void *opaque) | ||
46 | aio_context_acquire(aio_context); | ||
47 | } | ||
48 | |||
49 | - data->job->deferred_to_main_loop = false; | ||
50 | data->fn(data->job, data->opaque); | ||
51 | |||
52 | if (aio_context != data->aio_context) { | ||
53 | diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/include/block/blockjob_int.h | ||
56 | +++ b/include/block/blockjob_int.h | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); | ||
58 | * @fn: The function to run in the main loop | ||
59 | * @opaque: The opaque value that is passed to @fn | ||
60 | * | ||
61 | - * Execute a given function in the main loop with the BlockDriverState | ||
62 | + * This function must be called by the main job coroutine just before it | ||
63 | + * returns. @fn is executed in the main loop with the BlockDriverState | ||
64 | * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and | ||
65 | * anything that uses bdrv_drain_all() in the main loop. | ||
66 | * | ||
67 | -- | 88 | -- |
68 | 2.9.3 | 89 | 2.14.3 |
69 | 90 | ||
70 | 91 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | Sometimes it's necessary for the main loop thread to run a BH in an |
---|---|---|---|
2 | IOThread and wait for its completion. This primitive is useful during | ||
3 | startup/shutdown to synchronize and avoid race conditions. | ||
2 | 4 | ||
3 | Outside blockjob.c, the block_job_iostatus_reset function is used once | 5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
4 | in the monitor and once in BlockBackend. When we introduce the block | 6 | Reviewed-by: Fam Zheng <famz@redhat.com> |
5 | job mutex, block_job_iostatus_reset's client is going to be the block | 7 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> |
6 | layer (for which blockjob.c will take the block job mutex) rather than | 8 | Message-id: 20180307144205.20619-2-stefanha@redhat.com |
7 | the monitor (which will take the block job mutex by itself). | 9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | --- | ||
11 | include/block/aio-wait.h | 13 +++++++++++++ | ||
12 | util/aio-wait.c | 31 +++++++++++++++++++++++++++++++ | ||
13 | 2 files changed, 44 insertions(+) | ||
8 | 14 | ||
9 | The monitor's call to block_job_iostatus_reset from the monitor comes | 15 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h |
10 | just before the sole call to block_job_user_resume, so reset the | ||
11 | iostatus directly from block_job_iostatus_reset. This will avoid | ||
12 | the need to introduce separate block_job_iostatus_reset and | ||
13 | block_job_iostatus_reset_locked APIs. | ||
14 | |||
15 | After making this change, move the function together with the others | ||
16 | that were moved in the previous patch. | ||
17 | |||
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
20 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
21 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
22 | Message-id: 20170508141310.8674-7-pbonzini@redhat.com | ||
23 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
24 | --- | ||
25 | blockdev.c | 1 - | ||
26 | blockjob.c | 11 ++++++----- | ||
27 | 2 files changed, 6 insertions(+), 6 deletions(-) | ||
28 | |||
29 | diff --git a/blockdev.c b/blockdev.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/blockdev.c | 17 | --- a/include/block/aio-wait.h |
32 | +++ b/blockdev.c | 18 | +++ b/include/block/aio-wait.h |
33 | @@ -XXX,XX +XXX,XX @@ void qmp_block_job_resume(const char *device, Error **errp) | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
34 | } | 20 | */ |
35 | 21 | void aio_wait_kick(AioWait *wait); | |
36 | trace_qmp_block_job_resume(job); | 22 | |
37 | - block_job_iostatus_reset(job); | 23 | +/** |
38 | block_job_user_resume(job); | 24 | + * aio_wait_bh_oneshot: |
39 | aio_context_release(aio_context); | 25 | + * @ctx: the aio context |
40 | } | 26 | + * @cb: the BH callback function |
41 | diff --git a/blockjob.c b/blockjob.c | 27 | + * @opaque: user data for the BH callback function |
28 | + * | ||
29 | + * Run a BH in @ctx and wait for it to complete. | ||
30 | + * | ||
31 | + * Must be called from the main loop thread with @ctx acquired exactly once. | ||
32 | + * Note that main loop event processing may occur. | ||
33 | + */ | ||
34 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
35 | + | ||
36 | #endif /* QEMU_AIO_WAIT */ | ||
37 | diff --git a/util/aio-wait.c b/util/aio-wait.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
43 | --- a/blockjob.c | 39 | --- a/util/aio-wait.c |
44 | +++ b/blockjob.c | 40 | +++ b/util/aio-wait.c |
45 | @@ -XXX,XX +XXX,XX @@ void block_job_user_resume(BlockJob *job) | 41 | @@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait) |
46 | { | 42 | aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); |
47 | if (job && job->user_paused && job->pause_count > 0) { | ||
48 | job->user_paused = false; | ||
49 | + block_job_iostatus_reset(job); | ||
50 | block_job_resume(job); | ||
51 | } | 43 | } |
52 | } | 44 | } |
53 | @@ -XXX,XX +XXX,XX @@ void block_job_cancel(BlockJob *job) | 45 | + |
54 | } | 46 | +typedef struct { |
55 | } | 47 | + AioWait wait; |
56 | 48 | + bool done; | |
57 | -void block_job_iostatus_reset(BlockJob *job) | 49 | + QEMUBHFunc *cb; |
58 | -{ | 50 | + void *opaque; |
59 | - job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; | 51 | +} AioWaitBHData; |
60 | -} | 52 | + |
61 | - | 53 | +/* Context: BH in IOThread */ |
62 | static int block_job_finish_sync(BlockJob *job, | 54 | +static void aio_wait_bh(void *opaque) |
63 | void (*finish)(BlockJob *, Error **errp), | ||
64 | Error **errp) | ||
65 | @@ -XXX,XX +XXX,XX @@ void block_job_yield(BlockJob *job) | ||
66 | block_job_pause_point(job); | ||
67 | } | ||
68 | |||
69 | +void block_job_iostatus_reset(BlockJob *job) | ||
70 | +{ | 55 | +{ |
71 | + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; | 56 | + AioWaitBHData *data = opaque; |
57 | + | ||
58 | + data->cb(data->opaque); | ||
59 | + | ||
60 | + data->done = true; | ||
61 | + aio_wait_kick(&data->wait); | ||
72 | +} | 62 | +} |
73 | + | 63 | + |
74 | void block_job_event_ready(BlockJob *job) | 64 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) |
75 | { | 65 | +{ |
76 | job->ready = true; | 66 | + AioWaitBHData data = { |
67 | + .cb = cb, | ||
68 | + .opaque = opaque, | ||
69 | + }; | ||
70 | + | ||
71 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
72 | + | ||
73 | + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); | ||
74 | + AIO_WAIT_WHILE(&data.wait, ctx, !data.done); | ||
75 | +} | ||
77 | -- | 76 | -- |
78 | 2.9.3 | 77 | 2.14.3 |
79 | 78 | ||
80 | 79 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | ||
3 | the AioContext lock. By the time the vq handler is able to acquire the | ||
4 | AioContext lock the ioeventfd has already been removed and the handler | ||
5 | isn't supposed to run anymore! | ||
2 | 6 | ||
3 | Remove use of block_job_pause/resume from outside blockjob.c, thus | 7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal |
4 | making them static. The new functions are used by the block layer, | 8 | from within the IOThread. This way no races with the vq handler are |
5 | so place them in blockjob_int.h. | 9 | possible. |
6 | 10 | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Reviewed-by: John Snow <jsnow@redhat.com> | 12 | Reviewed-by: Fam Zheng <famz@redhat.com> |
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> |
10 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 14 | Message-id: 20180307144205.20619-3-stefanha@redhat.com |
11 | Message-id: 20170508141310.8674-5-pbonzini@redhat.com | 15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
13 | --- | 16 | --- |
14 | block/io.c | 19 ++------ | 17 | hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++------- |
15 | blockjob.c | 114 ++++++++++++++++++++++++++----------------- | 18 | 1 file changed, 17 insertions(+), 7 deletions(-) |
16 | include/block/blockjob.h | 16 ------ | ||
17 | include/block/blockjob_int.h | 14 ++++++ | ||
18 | 4 files changed, 86 insertions(+), 77 deletions(-) | ||
19 | 19 | ||
20 | diff --git a/block/io.c b/block/io.c | 20 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/io.c | 22 | --- a/hw/block/dataplane/virtio-blk.c |
23 | +++ b/block/io.c | 23 | +++ b/hw/block/dataplane/virtio-blk.c |
24 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
25 | #include "trace.h" | 25 | return -ENOSYS; |
26 | #include "sysemu/block-backend.h" | ||
27 | #include "block/blockjob.h" | ||
28 | +#include "block/blockjob_int.h" | ||
29 | #include "block/block_int.h" | ||
30 | #include "qemu/cutils.h" | ||
31 | #include "qapi/error.h" | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
33 | bool waited = true; | ||
34 | BlockDriverState *bs; | ||
35 | BdrvNextIterator it; | ||
36 | - BlockJob *job = NULL; | ||
37 | GSList *aio_ctxs = NULL, *ctx; | ||
38 | |||
39 | - while ((job = block_job_next(job))) { | ||
40 | - AioContext *aio_context = blk_get_aio_context(job->blk); | ||
41 | - | ||
42 | - aio_context_acquire(aio_context); | ||
43 | - block_job_pause(job); | ||
44 | - aio_context_release(aio_context); | ||
45 | - } | ||
46 | + block_job_pause_all(); | ||
47 | |||
48 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
49 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
51 | { | ||
52 | BlockDriverState *bs; | ||
53 | BdrvNextIterator it; | ||
54 | - BlockJob *job = NULL; | ||
55 | |||
56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
57 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
58 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
59 | aio_context_release(aio_context); | ||
60 | } | ||
61 | |||
62 | - while ((job = block_job_next(job))) { | ||
63 | - AioContext *aio_context = blk_get_aio_context(job->blk); | ||
64 | - | ||
65 | - aio_context_acquire(aio_context); | ||
66 | - block_job_resume(job); | ||
67 | - aio_context_release(aio_context); | ||
68 | - } | ||
69 | + block_job_resume_all(); | ||
70 | } | 26 | } |
71 | 27 | ||
72 | void bdrv_drain_all(void) | 28 | +/* Stop notifications for new requests from guest. |
73 | diff --git a/blockjob.c b/blockjob.c | 29 | + * |
74 | index XXXXXXX..XXXXXXX 100644 | 30 | + * Context: BH in IOThread |
75 | --- a/blockjob.c | 31 | + */ |
76 | +++ b/blockjob.c | 32 | +static void virtio_blk_data_plane_stop_bh(void *opaque) |
77 | @@ -XXX,XX +XXX,XX @@ struct BlockJobTxn { | ||
78 | |||
79 | static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); | ||
80 | |||
81 | -static char *child_job_get_parent_desc(BdrvChild *c) | ||
82 | -{ | ||
83 | - BlockJob *job = c->opaque; | ||
84 | - return g_strdup_printf("%s job '%s'", | ||
85 | - BlockJobType_lookup[job->driver->job_type], | ||
86 | - job->id); | ||
87 | -} | ||
88 | - | ||
89 | -static const BdrvChildRole child_job = { | ||
90 | - .get_parent_desc = child_job_get_parent_desc, | ||
91 | - .stay_at_node = true, | ||
92 | -}; | ||
93 | - | ||
94 | -static void block_job_drained_begin(void *opaque) | ||
95 | -{ | ||
96 | - BlockJob *job = opaque; | ||
97 | - block_job_pause(job); | ||
98 | -} | ||
99 | - | ||
100 | -static void block_job_drained_end(void *opaque) | ||
101 | -{ | ||
102 | - BlockJob *job = opaque; | ||
103 | - block_job_resume(job); | ||
104 | -} | ||
105 | - | ||
106 | -static const BlockDevOps block_job_dev_ops = { | ||
107 | - .drained_begin = block_job_drained_begin, | ||
108 | - .drained_end = block_job_drained_end, | ||
109 | -}; | ||
110 | - | ||
111 | BlockJob *block_job_next(BlockJob *job) | ||
112 | { | ||
113 | if (!job) { | ||
114 | @@ -XXX,XX +XXX,XX @@ BlockJob *block_job_get(const char *id) | ||
115 | return NULL; | ||
116 | } | ||
117 | |||
118 | +static void block_job_pause(BlockJob *job) | ||
119 | +{ | 33 | +{ |
120 | + job->pause_count++; | 34 | + VirtIOBlockDataPlane *s = opaque; |
121 | +} | 35 | + unsigned i; |
122 | + | 36 | + |
123 | +static void block_job_resume(BlockJob *job) | 37 | + for (i = 0; i < s->conf->num_queues; i++) { |
124 | +{ | 38 | + VirtQueue *vq = virtio_get_queue(s->vdev, i); |
125 | + assert(job->pause_count > 0); | ||
126 | + job->pause_count--; | ||
127 | + if (job->pause_count) { | ||
128 | + return; | ||
129 | + } | ||
130 | + block_job_enter(job); | ||
131 | +} | ||
132 | + | 39 | + |
133 | static void block_job_ref(BlockJob *job) | 40 | + virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); |
134 | { | ||
135 | ++job->refcnt; | ||
136 | @@ -XXX,XX +XXX,XX @@ static void block_job_detach_aio_context(void *opaque) | ||
137 | block_job_unref(job); | ||
138 | } | ||
139 | |||
140 | +static char *child_job_get_parent_desc(BdrvChild *c) | ||
141 | +{ | ||
142 | + BlockJob *job = c->opaque; | ||
143 | + return g_strdup_printf("%s job '%s'", | ||
144 | + BlockJobType_lookup[job->driver->job_type], | ||
145 | + job->id); | ||
146 | +} | ||
147 | + | ||
148 | +static const BdrvChildRole child_job = { | ||
149 | + .get_parent_desc = child_job_get_parent_desc, | ||
150 | + .stay_at_node = true, | ||
151 | +}; | ||
152 | + | ||
153 | +static void block_job_drained_begin(void *opaque) | ||
154 | +{ | ||
155 | + BlockJob *job = opaque; | ||
156 | + block_job_pause(job); | ||
157 | +} | ||
158 | + | ||
159 | +static void block_job_drained_end(void *opaque) | ||
160 | +{ | ||
161 | + BlockJob *job = opaque; | ||
162 | + block_job_resume(job); | ||
163 | +} | ||
164 | + | ||
165 | +static const BlockDevOps block_job_dev_ops = { | ||
166 | + .drained_begin = block_job_drained_begin, | ||
167 | + .drained_end = block_job_drained_end, | ||
168 | +}; | ||
169 | + | ||
170 | void block_job_remove_all_bdrv(BlockJob *job) | ||
171 | { | ||
172 | GSList *l; | ||
173 | @@ -XXX,XX +XXX,XX @@ void block_job_complete(BlockJob *job, Error **errp) | ||
174 | job->driver->complete(job, errp); | ||
175 | } | ||
176 | |||
177 | -void block_job_pause(BlockJob *job) | ||
178 | -{ | ||
179 | - job->pause_count++; | ||
180 | -} | ||
181 | - | ||
182 | void block_job_user_pause(BlockJob *job) | ||
183 | { | ||
184 | job->user_paused = true; | ||
185 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn block_job_pause_point(BlockJob *job) | ||
186 | } | ||
187 | } | ||
188 | |||
189 | -void block_job_resume(BlockJob *job) | ||
190 | -{ | ||
191 | - assert(job->pause_count > 0); | ||
192 | - job->pause_count--; | ||
193 | - if (job->pause_count) { | ||
194 | - return; | ||
195 | - } | ||
196 | - block_job_enter(job); | ||
197 | -} | ||
198 | - | ||
199 | void block_job_user_resume(BlockJob *job) | ||
200 | { | ||
201 | if (job && job->user_paused && job->pause_count > 0) { | ||
202 | @@ -XXX,XX +XXX,XX @@ static void block_job_event_completed(BlockJob *job, const char *msg) | ||
203 | &error_abort); | ||
204 | } | ||
205 | |||
206 | +void block_job_pause_all(void) | ||
207 | +{ | ||
208 | + BlockJob *job = NULL; | ||
209 | + while ((job = block_job_next(job))) { | ||
210 | + AioContext *aio_context = blk_get_aio_context(job->blk); | ||
211 | + | ||
212 | + aio_context_acquire(aio_context); | ||
213 | + block_job_pause(job); | ||
214 | + aio_context_release(aio_context); | ||
215 | + } | 41 | + } |
216 | +} | 42 | +} |
217 | + | 43 | + |
218 | +void block_job_resume_all(void) | 44 | /* Context: QEMU global mutex held */ |
219 | +{ | 45 | void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
220 | + BlockJob *job = NULL; | ||
221 | + while ((job = block_job_next(job))) { | ||
222 | + AioContext *aio_context = blk_get_aio_context(job->blk); | ||
223 | + | ||
224 | + aio_context_acquire(aio_context); | ||
225 | + block_job_resume(job); | ||
226 | + aio_context_release(aio_context); | ||
227 | + } | ||
228 | +} | ||
229 | + | ||
230 | void block_job_event_ready(BlockJob *job) | ||
231 | { | 46 | { |
232 | job->ready = true; | 47 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
233 | diff --git a/include/block/blockjob.h b/include/block/blockjob.h | 48 | trace_virtio_blk_data_plane_stop(s); |
234 | index XXXXXXX..XXXXXXX 100644 | 49 | |
235 | --- a/include/block/blockjob.h | 50 | aio_context_acquire(s->ctx); |
236 | +++ b/include/block/blockjob.h | ||
237 | @@ -XXX,XX +XXX,XX @@ void block_job_complete(BlockJob *job, Error **errp); | ||
238 | BlockJobInfo *block_job_query(BlockJob *job, Error **errp); | ||
239 | |||
240 | /** | ||
241 | - * block_job_pause: | ||
242 | - * @job: The job to be paused. | ||
243 | - * | ||
244 | - * Asynchronously pause the specified job. | ||
245 | - */ | ||
246 | -void block_job_pause(BlockJob *job); | ||
247 | - | 51 | - |
248 | -/** | 52 | - /* Stop notifications for new requests from guest */ |
249 | * block_job_user_pause: | 53 | - for (i = 0; i < nvqs; i++) { |
250 | * @job: The job to be paused. | 54 | - VirtQueue *vq = virtio_get_queue(s->vdev, i); |
251 | * | ||
252 | @@ -XXX,XX +XXX,XX @@ void block_job_user_pause(BlockJob *job); | ||
253 | bool block_job_user_paused(BlockJob *job); | ||
254 | |||
255 | /** | ||
256 | - * block_job_resume: | ||
257 | - * @job: The job to be resumed. | ||
258 | - * | ||
259 | - * Resume the specified job. Must be paired with a preceding block_job_pause. | ||
260 | - */ | ||
261 | -void block_job_resume(BlockJob *job); | ||
262 | - | 55 | - |
263 | -/** | 56 | - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); |
264 | * block_job_user_resume: | 57 | - } |
265 | * @job: The job to be resumed. | 58 | + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); |
266 | * | 59 | |
267 | diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h | 60 | /* Drain and switch bs back to the QEMU main loop */ |
268 | index XXXXXXX..XXXXXXX 100644 | 61 | blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); |
269 | --- a/include/block/blockjob_int.h | ||
270 | +++ b/include/block/blockjob_int.h | ||
271 | @@ -XXX,XX +XXX,XX @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); | ||
272 | void block_job_yield(BlockJob *job); | ||
273 | |||
274 | /** | ||
275 | + * block_job_pause_all: | ||
276 | + * | ||
277 | + * Asynchronously pause all jobs. | ||
278 | + */ | ||
279 | +void block_job_pause_all(void); | ||
280 | + | ||
281 | +/** | ||
282 | + * block_job_resume_all: | ||
283 | + * | ||
284 | + * Resume all block jobs. Must be paired with a preceding block_job_pause_all. | ||
285 | + */ | ||
286 | +void block_job_resume_all(void); | ||
287 | + | ||
288 | +/** | ||
289 | * block_job_early_fail: | ||
290 | * @bs: The block device. | ||
291 | * | ||
292 | -- | 62 | -- |
293 | 2.9.3 | 63 | 2.14.3 |
294 | 64 | ||
295 | 65 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | ||
3 | the AioContext lock. By the time the vq handler is able to acquire the | ||
4 | AioContext lock the ioeventfd has already been removed and the handler | ||
5 | isn't supposed to run anymore! | ||
2 | 6 | ||
3 | We have two different headers for block job operations, blockjob.h | 7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal |
4 | and blockjob_int.h. The former contains APIs called by the monitor, | 8 | from within the IOThread. This way no races with the vq handler are |
5 | the latter contains APIs called by the block job drivers and the | 9 | possible. |
6 | block layer itself. | ||
7 | 10 | ||
8 | Keep the two APIs separate in the blockjob.c file too. This will | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | be useful when transitioning away from the AioContext lock, because | 12 | Reviewed-by: Fam Zheng <famz@redhat.com> |
10 | there will be locking policies for the two categories, too---the | 13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> |
11 | monitor will have to call new block_job_lock/unlock APIs, while blockjob | 14 | Message-id: 20180307144205.20619-4-stefanha@redhat.com |
12 | APIs will take care of this for the users. | 15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | ||
17 | hw/scsi/virtio-scsi-dataplane.c | 9 +++++---- | ||
18 | 1 file changed, 5 insertions(+), 4 deletions(-) | ||
13 | 19 | ||
14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 20 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c |
15 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
16 | Message-id: 20170508141310.8674-6-pbonzini@redhat.com | ||
17 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
18 | --- | ||
19 | blockjob.c | 390 ++++++++++++++++++++++++++++++++----------------------------- | ||
20 | 1 file changed, 205 insertions(+), 185 deletions(-) | ||
21 | |||
22 | diff --git a/blockjob.c b/blockjob.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/blockjob.c | 22 | --- a/hw/scsi/virtio-scsi-dataplane.c |
25 | +++ b/blockjob.c | 23 | +++ b/hw/scsi/virtio-scsi-dataplane.c |
26 | @@ -XXX,XX +XXX,XX @@ struct BlockJobTxn { | 24 | @@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, |
27 | |||
28 | static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); | ||
29 | |||
30 | +/* | ||
31 | + * The block job API is composed of two categories of functions. | ||
32 | + * | ||
33 | + * The first includes functions used by the monitor. The monitor is | ||
34 | + * peculiar in that it accesses the block job list with block_job_get, and | ||
35 | + * therefore needs consistency across block_job_get and the actual operation | ||
36 | + * (e.g. block_job_set_speed). The consistency is achieved with | ||
37 | + * aio_context_acquire/release. These functions are declared in blockjob.h. | ||
38 | + * | ||
39 | + * The second includes functions used by the block job drivers and sometimes | ||
40 | + * by the core block layer. These do not care about locking, because the | ||
41 | + * whole coroutine runs under the AioContext lock, and are declared in | ||
42 | + * blockjob_int.h. | ||
43 | + */ | ||
44 | + | ||
45 | BlockJob *block_job_next(BlockJob *job) | ||
46 | { | ||
47 | if (!job) { | ||
48 | @@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, | ||
49 | return 0; | 25 | return 0; |
50 | } | 26 | } |
51 | 27 | ||
52 | -void *block_job_create(const char *job_id, const BlockJobDriver *driver, | 28 | -/* assumes s->ctx held */ |
53 | - BlockDriverState *bs, uint64_t perm, | 29 | -static void virtio_scsi_clear_aio(VirtIOSCSI *s) |
54 | - uint64_t shared_perm, int64_t speed, int flags, | 30 | +/* Context: BH in IOThread */ |
55 | - BlockCompletionFunc *cb, void *opaque, Error **errp) | 31 | +static void virtio_scsi_dataplane_stop_bh(void *opaque) |
56 | -{ | ||
57 | - BlockBackend *blk; | ||
58 | - BlockJob *job; | ||
59 | - int ret; | ||
60 | - | ||
61 | - if (bs->job) { | ||
62 | - error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); | ||
63 | - return NULL; | ||
64 | - } | ||
65 | - | ||
66 | - if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { | ||
67 | - job_id = bdrv_get_device_name(bs); | ||
68 | - if (!*job_id) { | ||
69 | - error_setg(errp, "An explicit job ID is required for this node"); | ||
70 | - return NULL; | ||
71 | - } | ||
72 | - } | ||
73 | - | ||
74 | - if (job_id) { | ||
75 | - if (flags & BLOCK_JOB_INTERNAL) { | ||
76 | - error_setg(errp, "Cannot specify job ID for internal block job"); | ||
77 | - return NULL; | ||
78 | - } | ||
79 | - | ||
80 | - if (!id_wellformed(job_id)) { | ||
81 | - error_setg(errp, "Invalid job ID '%s'", job_id); | ||
82 | - return NULL; | ||
83 | - } | ||
84 | - | ||
85 | - if (block_job_get(job_id)) { | ||
86 | - error_setg(errp, "Job ID '%s' already in use", job_id); | ||
87 | - return NULL; | ||
88 | - } | ||
89 | - } | ||
90 | - | ||
91 | - blk = blk_new(perm, shared_perm); | ||
92 | - ret = blk_insert_bs(blk, bs, errp); | ||
93 | - if (ret < 0) { | ||
94 | - blk_unref(blk); | ||
95 | - return NULL; | ||
96 | - } | ||
97 | - | ||
98 | - job = g_malloc0(driver->instance_size); | ||
99 | - job->driver = driver; | ||
100 | - job->id = g_strdup(job_id); | ||
101 | - job->blk = blk; | ||
102 | - job->cb = cb; | ||
103 | - job->opaque = opaque; | ||
104 | - job->busy = false; | ||
105 | - job->paused = true; | ||
106 | - job->pause_count = 1; | ||
107 | - job->refcnt = 1; | ||
108 | - | ||
109 | - error_setg(&job->blocker, "block device is in use by block job: %s", | ||
110 | - BlockJobType_lookup[driver->job_type]); | ||
111 | - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | ||
112 | - bs->job = job; | ||
113 | - | ||
114 | - blk_set_dev_ops(blk, &block_job_dev_ops, job); | ||
115 | - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | ||
116 | - | ||
117 | - QLIST_INSERT_HEAD(&block_jobs, job, job_list); | ||
118 | - | ||
119 | - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, | ||
120 | - block_job_detach_aio_context, job); | ||
121 | - | ||
122 | - /* Only set speed when necessary to avoid NotSupported error */ | ||
123 | - if (speed != 0) { | ||
124 | - Error *local_err = NULL; | ||
125 | - | ||
126 | - block_job_set_speed(job, speed, &local_err); | ||
127 | - if (local_err) { | ||
128 | - block_job_unref(job); | ||
129 | - error_propagate(errp, local_err); | ||
130 | - return NULL; | ||
131 | - } | ||
132 | - } | ||
133 | - return job; | ||
134 | -} | ||
135 | - | ||
136 | bool block_job_is_internal(BlockJob *job) | ||
137 | { | 32 | { |
138 | return (job->id == NULL); | 33 | + VirtIOSCSI *s = opaque; |
139 | @@ -XXX,XX +XXX,XX @@ void block_job_start(BlockJob *job) | 34 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); |
140 | bdrv_coroutine_enter(blk_bs(job->blk), job->co); | 35 | int i; |
141 | } | 36 | |
142 | 37 | @@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) | |
143 | -void block_job_early_fail(BlockJob *job) | 38 | return 0; |
144 | -{ | 39 | |
145 | - block_job_unref(job); | 40 | fail_vrings: |
146 | -} | 41 | - virtio_scsi_clear_aio(s); |
147 | - | 42 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); |
148 | static void block_job_completed_single(BlockJob *job) | 43 | aio_context_release(s->ctx); |
149 | { | 44 | for (i = 0; i < vs->conf.num_queues + 2; i++) { |
150 | if (!job->ret) { | 45 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); |
151 | @@ -XXX,XX +XXX,XX @@ static void block_job_completed_txn_success(BlockJob *job) | 46 | @@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) |
152 | } | 47 | s->dataplane_stopping = true; |
153 | } | 48 | |
154 | 49 | aio_context_acquire(s->ctx); | |
155 | -void block_job_completed(BlockJob *job, int ret) | 50 | - virtio_scsi_clear_aio(s); |
156 | -{ | 51 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); |
157 | - assert(blk_bs(job->blk)->job == job); | 52 | aio_context_release(s->ctx); |
158 | - assert(!job->completed); | 53 | |
159 | - job->completed = true; | 54 | blk_drain_all(); /* ensure there are no in-flight requests */ |
160 | - job->ret = ret; | ||
161 | - if (!job->txn) { | ||
162 | - block_job_completed_single(job); | ||
163 | - } else if (ret < 0 || block_job_is_cancelled(job)) { | ||
164 | - block_job_completed_txn_abort(job); | ||
165 | - } else { | ||
166 | - block_job_completed_txn_success(job); | ||
167 | - } | ||
168 | -} | ||
169 | - | ||
170 | void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) | ||
171 | { | ||
172 | Error *local_err = NULL; | ||
173 | @@ -XXX,XX +XXX,XX @@ void block_job_user_pause(BlockJob *job) | ||
174 | block_job_pause(job); | ||
175 | } | ||
176 | |||
177 | -static bool block_job_should_pause(BlockJob *job) | ||
178 | -{ | ||
179 | - return job->pause_count > 0; | ||
180 | -} | ||
181 | - | ||
182 | bool block_job_user_paused(BlockJob *job) | ||
183 | { | ||
184 | return job->user_paused; | ||
185 | } | ||
186 | |||
187 | -void coroutine_fn block_job_pause_point(BlockJob *job) | ||
188 | -{ | ||
189 | - assert(job && block_job_started(job)); | ||
190 | - | ||
191 | - if (!block_job_should_pause(job)) { | ||
192 | - return; | ||
193 | - } | ||
194 | - if (block_job_is_cancelled(job)) { | ||
195 | - return; | ||
196 | - } | ||
197 | - | ||
198 | - if (job->driver->pause) { | ||
199 | - job->driver->pause(job); | ||
200 | - } | ||
201 | - | ||
202 | - if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { | ||
203 | - job->paused = true; | ||
204 | - job->busy = false; | ||
205 | - qemu_coroutine_yield(); /* wait for block_job_resume() */ | ||
206 | - job->busy = true; | ||
207 | - job->paused = false; | ||
208 | - } | ||
209 | - | ||
210 | - if (job->driver->resume) { | ||
211 | - job->driver->resume(job); | ||
212 | - } | ||
213 | -} | ||
214 | - | ||
215 | void block_job_user_resume(BlockJob *job) | ||
216 | { | ||
217 | if (job && job->user_paused && job->pause_count > 0) { | ||
218 | @@ -XXX,XX +XXX,XX @@ void block_job_user_resume(BlockJob *job) | ||
219 | } | ||
220 | } | ||
221 | |||
222 | -void block_job_enter(BlockJob *job) | ||
223 | -{ | ||
224 | - if (job->co && !job->busy) { | ||
225 | - bdrv_coroutine_enter(blk_bs(job->blk), job->co); | ||
226 | - } | ||
227 | -} | ||
228 | - | ||
229 | void block_job_cancel(BlockJob *job) | ||
230 | { | ||
231 | if (block_job_started(job)) { | ||
232 | @@ -XXX,XX +XXX,XX @@ void block_job_cancel(BlockJob *job) | ||
233 | } | ||
234 | } | ||
235 | |||
236 | -bool block_job_is_cancelled(BlockJob *job) | ||
237 | -{ | ||
238 | - return job->cancelled; | ||
239 | -} | ||
240 | - | ||
241 | void block_job_iostatus_reset(BlockJob *job) | ||
242 | { | ||
243 | job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; | ||
244 | @@ -XXX,XX +XXX,XX @@ int block_job_complete_sync(BlockJob *job, Error **errp) | ||
245 | return block_job_finish_sync(job, &block_job_complete, errp); | ||
246 | } | ||
247 | |||
248 | -void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) | ||
249 | -{ | ||
250 | - assert(job->busy); | ||
251 | - | ||
252 | - /* Check cancellation *before* setting busy = false, too! */ | ||
253 | - if (block_job_is_cancelled(job)) { | ||
254 | - return; | ||
255 | - } | ||
256 | - | ||
257 | - job->busy = false; | ||
258 | - if (!block_job_should_pause(job)) { | ||
259 | - co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); | ||
260 | - } | ||
261 | - job->busy = true; | ||
262 | - | ||
263 | - block_job_pause_point(job); | ||
264 | -} | ||
265 | - | ||
266 | -void block_job_yield(BlockJob *job) | ||
267 | -{ | ||
268 | - assert(job->busy); | ||
269 | - | ||
270 | - /* Check cancellation *before* setting busy = false, too! */ | ||
271 | - if (block_job_is_cancelled(job)) { | ||
272 | - return; | ||
273 | - } | ||
274 | - | ||
275 | - job->busy = false; | ||
276 | - if (!block_job_should_pause(job)) { | ||
277 | - qemu_coroutine_yield(); | ||
278 | - } | ||
279 | - job->busy = true; | ||
280 | - | ||
281 | - block_job_pause_point(job); | ||
282 | -} | ||
283 | - | ||
284 | BlockJobInfo *block_job_query(BlockJob *job, Error **errp) | ||
285 | { | ||
286 | BlockJobInfo *info; | ||
287 | @@ -XXX,XX +XXX,XX @@ static void block_job_event_completed(BlockJob *job, const char *msg) | ||
288 | &error_abort); | ||
289 | } | ||
290 | |||
291 | +/* | ||
292 | + * API for block job drivers and the block layer. These functions are | ||
293 | + * declared in blockjob_int.h. | ||
294 | + */ | ||
295 | + | ||
296 | +void *block_job_create(const char *job_id, const BlockJobDriver *driver, | ||
297 | + BlockDriverState *bs, uint64_t perm, | ||
298 | + uint64_t shared_perm, int64_t speed, int flags, | ||
299 | + BlockCompletionFunc *cb, void *opaque, Error **errp) | ||
300 | +{ | ||
301 | + BlockBackend *blk; | ||
302 | + BlockJob *job; | ||
303 | + int ret; | ||
304 | + | ||
305 | + if (bs->job) { | ||
306 | + error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); | ||
307 | + return NULL; | ||
308 | + } | ||
309 | + | ||
310 | + if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { | ||
311 | + job_id = bdrv_get_device_name(bs); | ||
312 | + if (!*job_id) { | ||
313 | + error_setg(errp, "An explicit job ID is required for this node"); | ||
314 | + return NULL; | ||
315 | + } | ||
316 | + } | ||
317 | + | ||
318 | + if (job_id) { | ||
319 | + if (flags & BLOCK_JOB_INTERNAL) { | ||
320 | + error_setg(errp, "Cannot specify job ID for internal block job"); | ||
321 | + return NULL; | ||
322 | + } | ||
323 | + | ||
324 | + if (!id_wellformed(job_id)) { | ||
325 | + error_setg(errp, "Invalid job ID '%s'", job_id); | ||
326 | + return NULL; | ||
327 | + } | ||
328 | + | ||
329 | + if (block_job_get(job_id)) { | ||
330 | + error_setg(errp, "Job ID '%s' already in use", job_id); | ||
331 | + return NULL; | ||
332 | + } | ||
333 | + } | ||
334 | + | ||
335 | + blk = blk_new(perm, shared_perm); | ||
336 | + ret = blk_insert_bs(blk, bs, errp); | ||
337 | + if (ret < 0) { | ||
338 | + blk_unref(blk); | ||
339 | + return NULL; | ||
340 | + } | ||
341 | + | ||
342 | + job = g_malloc0(driver->instance_size); | ||
343 | + job->driver = driver; | ||
344 | + job->id = g_strdup(job_id); | ||
345 | + job->blk = blk; | ||
346 | + job->cb = cb; | ||
347 | + job->opaque = opaque; | ||
348 | + job->busy = false; | ||
349 | + job->paused = true; | ||
350 | + job->pause_count = 1; | ||
351 | + job->refcnt = 1; | ||
352 | + | ||
353 | + error_setg(&job->blocker, "block device is in use by block job: %s", | ||
354 | + BlockJobType_lookup[driver->job_type]); | ||
355 | + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | ||
356 | + bs->job = job; | ||
357 | + | ||
358 | + blk_set_dev_ops(blk, &block_job_dev_ops, job); | ||
359 | + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | ||
360 | + | ||
361 | + QLIST_INSERT_HEAD(&block_jobs, job, job_list); | ||
362 | + | ||
363 | + blk_add_aio_context_notifier(blk, block_job_attached_aio_context, | ||
364 | + block_job_detach_aio_context, job); | ||
365 | + | ||
366 | + /* Only set speed when necessary to avoid NotSupported error */ | ||
367 | + if (speed != 0) { | ||
368 | + Error *local_err = NULL; | ||
369 | + | ||
370 | + block_job_set_speed(job, speed, &local_err); | ||
371 | + if (local_err) { | ||
372 | + block_job_unref(job); | ||
373 | + error_propagate(errp, local_err); | ||
374 | + return NULL; | ||
375 | + } | ||
376 | + } | ||
377 | + return job; | ||
378 | +} | ||
379 | + | ||
380 | void block_job_pause_all(void) | ||
381 | { | ||
382 | BlockJob *job = NULL; | ||
383 | @@ -XXX,XX +XXX,XX @@ void block_job_pause_all(void) | ||
384 | } | ||
385 | } | ||
386 | |||
387 | +void block_job_early_fail(BlockJob *job) | ||
388 | +{ | ||
389 | + block_job_unref(job); | ||
390 | +} | ||
391 | + | ||
392 | +void block_job_completed(BlockJob *job, int ret) | ||
393 | +{ | ||
394 | + assert(blk_bs(job->blk)->job == job); | ||
395 | + assert(!job->completed); | ||
396 | + job->completed = true; | ||
397 | + job->ret = ret; | ||
398 | + if (!job->txn) { | ||
399 | + block_job_completed_single(job); | ||
400 | + } else if (ret < 0 || block_job_is_cancelled(job)) { | ||
401 | + block_job_completed_txn_abort(job); | ||
402 | + } else { | ||
403 | + block_job_completed_txn_success(job); | ||
404 | + } | ||
405 | +} | ||
406 | + | ||
407 | +static bool block_job_should_pause(BlockJob *job) | ||
408 | +{ | ||
409 | + return job->pause_count > 0; | ||
410 | +} | ||
411 | + | ||
412 | +void coroutine_fn block_job_pause_point(BlockJob *job) | ||
413 | +{ | ||
414 | + assert(job && block_job_started(job)); | ||
415 | + | ||
416 | + if (!block_job_should_pause(job)) { | ||
417 | + return; | ||
418 | + } | ||
419 | + if (block_job_is_cancelled(job)) { | ||
420 | + return; | ||
421 | + } | ||
422 | + | ||
423 | + if (job->driver->pause) { | ||
424 | + job->driver->pause(job); | ||
425 | + } | ||
426 | + | ||
427 | + if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { | ||
428 | + job->paused = true; | ||
429 | + job->busy = false; | ||
430 | + qemu_coroutine_yield(); /* wait for block_job_resume() */ | ||
431 | + job->busy = true; | ||
432 | + job->paused = false; | ||
433 | + } | ||
434 | + | ||
435 | + if (job->driver->resume) { | ||
436 | + job->driver->resume(job); | ||
437 | + } | ||
438 | +} | ||
439 | + | ||
440 | void block_job_resume_all(void) | ||
441 | { | ||
442 | BlockJob *job = NULL; | ||
443 | @@ -XXX,XX +XXX,XX @@ void block_job_resume_all(void) | ||
444 | } | ||
445 | } | ||
446 | |||
447 | +void block_job_enter(BlockJob *job) | ||
448 | +{ | ||
449 | + if (job->co && !job->busy) { | ||
450 | + bdrv_coroutine_enter(blk_bs(job->blk), job->co); | ||
451 | + } | ||
452 | +} | ||
453 | + | ||
454 | +bool block_job_is_cancelled(BlockJob *job) | ||
455 | +{ | ||
456 | + return job->cancelled; | ||
457 | +} | ||
458 | + | ||
459 | +void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) | ||
460 | +{ | ||
461 | + assert(job->busy); | ||
462 | + | ||
463 | + /* Check cancellation *before* setting busy = false, too! */ | ||
464 | + if (block_job_is_cancelled(job)) { | ||
465 | + return; | ||
466 | + } | ||
467 | + | ||
468 | + job->busy = false; | ||
469 | + if (!block_job_should_pause(job)) { | ||
470 | + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); | ||
471 | + } | ||
472 | + job->busy = true; | ||
473 | + | ||
474 | + block_job_pause_point(job); | ||
475 | +} | ||
476 | + | ||
477 | +void block_job_yield(BlockJob *job) | ||
478 | +{ | ||
479 | + assert(job->busy); | ||
480 | + | ||
481 | + /* Check cancellation *before* setting busy = false, too! */ | ||
482 | + if (block_job_is_cancelled(job)) { | ||
483 | + return; | ||
484 | + } | ||
485 | + | ||
486 | + job->busy = false; | ||
487 | + if (!block_job_should_pause(job)) { | ||
488 | + qemu_coroutine_yield(); | ||
489 | + } | ||
490 | + job->busy = true; | ||
491 | + | ||
492 | + block_job_pause_point(job); | ||
493 | +} | ||
494 | + | ||
495 | void block_job_event_ready(BlockJob *job) | ||
496 | { | ||
497 | job->ready = true; | ||
498 | -- | 55 | -- |
499 | 2.9.3 | 56 | 2.14.3 |
500 | 57 | ||
501 | 58 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before |
---|---|---|---|
2 | stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa | ||
3 | ("iothread: Stop threads before main() quits") tried to work around the | ||
4 | fact that emulation was still active during termination by stopping | ||
5 | iothreads. They suffer from race conditions: | ||
6 | 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the | ||
7 | virtio_scsi_ctx_check() assertion failure because the BDS AioContext | ||
8 | has been modified by iothread_stop_all(). | ||
9 | 2. Guest vq kick racing with main loop termination leaves a readable | ||
10 | ioeventfd that is handled by the next aio_poll() when external | ||
11 | clients are enabled again, resulting in unwanted emulation activity. | ||
2 | 12 | ||
3 | Outside blockjob.c, block_job_unref is only used when a block job fails | 13 | This patch obsoletes those commits by fully disabling emulation activity |
4 | to start, and block_job_ref is not used at all. The reference counting | 14 | when vcpus are stopped. |
5 | thus is pretty well hidden. Introduce a separate function to be used | ||
6 | by block jobs; because block_job_ref and block_job_unref now become | ||
7 | static, move them earlier in blockjob.c. | ||
8 | 15 | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Use the new vm_shutdown() function instead of pause_all_vcpus() so that |
10 | Reviewed-by: John Snow <jsnow@redhat.com> | 17 | vm change state handlers are invoked too. Virtio devices will now stop |
11 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 18 | their ioeventfds, preventing further emulation activity after vm_stop(). |
12 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 19 | |
13 | Message-id: 20170508141310.8674-4-pbonzini@redhat.com | 20 | Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a |
14 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 21 | QMP STOP event that may affect existing clients. |
22 | |||
23 | It is no longer necessary to call replay_disable_events() directly since | ||
24 | vm_shutdown() does so already. | ||
25 | |||
26 | Drop iothread_stop_all() since it is no longer used. | ||
27 | |||
28 | Cc: Fam Zheng <famz@redhat.com> | ||
29 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
30 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
31 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
32 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
33 | Message-id: 20180307144205.20619-5-stefanha@redhat.com | ||
34 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | 35 | --- |
16 | block/backup.c | 2 +- | 36 | include/sysemu/iothread.h | 1 - |
17 | block/commit.c | 2 +- | 37 | include/sysemu/sysemu.h | 1 + |
18 | block/mirror.c | 2 +- | 38 | cpus.c | 16 +++++++++++++--- |
19 | blockjob.c | 47 ++++++++++++++++++++++++++------------------ | 39 | iothread.c | 31 ------------------------------- |
20 | include/block/blockjob_int.h | 15 +++----------- | 40 | vl.c | 13 +++---------- |
21 | tests/test-blockjob.c | 10 +++++----- | 41 | 5 files changed, 17 insertions(+), 45 deletions(-) |
22 | 6 files changed, 39 insertions(+), 39 deletions(-) | ||
23 | 42 | ||
24 | diff --git a/block/backup.c b/block/backup.c | 43 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h |
25 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/backup.c | 45 | --- a/include/sysemu/iothread.h |
27 | +++ b/block/backup.c | 46 | +++ b/include/sysemu/iothread.h |
28 | @@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, | 47 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
48 | char *iothread_get_id(IOThread *iothread); | ||
49 | IOThread *iothread_by_id(const char *id); | ||
50 | AioContext *iothread_get_aio_context(IOThread *iothread); | ||
51 | -void iothread_stop_all(void); | ||
52 | GMainContext *iothread_get_g_main_context(IOThread *iothread); | ||
53 | |||
54 | /* | ||
55 | diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/include/sysemu/sysemu.h | ||
58 | +++ b/include/sysemu/sysemu.h | ||
59 | @@ -XXX,XX +XXX,XX @@ void vm_start(void); | ||
60 | int vm_prepare_start(void); | ||
61 | int vm_stop(RunState state); | ||
62 | int vm_stop_force_state(RunState state); | ||
63 | +int vm_shutdown(void); | ||
64 | |||
65 | typedef enum WakeupReason { | ||
66 | /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ | ||
67 | diff --git a/cpus.c b/cpus.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/cpus.c | ||
70 | +++ b/cpus.c | ||
71 | @@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void) | ||
29 | } | 72 | } |
30 | if (job) { | 73 | } |
31 | backup_clean(&job->common); | 74 | |
32 | - block_job_unref(&job->common); | 75 | -static int do_vm_stop(RunState state) |
33 | + block_job_early_fail(&job->common); | 76 | +static int do_vm_stop(RunState state, bool send_stop) |
77 | { | ||
78 | int ret = 0; | ||
79 | |||
80 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | ||
81 | pause_all_vcpus(); | ||
82 | runstate_set(state); | ||
83 | vm_state_notify(0, state); | ||
84 | - qapi_event_send_stop(&error_abort); | ||
85 | + if (send_stop) { | ||
86 | + qapi_event_send_stop(&error_abort); | ||
87 | + } | ||
34 | } | 88 | } |
35 | 89 | ||
36 | return NULL; | 90 | bdrv_drain_all(); |
37 | diff --git a/block/commit.c b/block/commit.c | 91 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) |
38 | index XXXXXXX..XXXXXXX 100644 | 92 | return ret; |
39 | --- a/block/commit.c | ||
40 | +++ b/block/commit.c | ||
41 | @@ -XXX,XX +XXX,XX @@ fail: | ||
42 | if (commit_top_bs) { | ||
43 | bdrv_set_backing_hd(overlay_bs, top, &error_abort); | ||
44 | } | ||
45 | - block_job_unref(&s->common); | ||
46 | + block_job_early_fail(&s->common); | ||
47 | } | 93 | } |
48 | 94 | ||
49 | 95 | +/* Special vm_stop() variant for terminating the process. Historically clients | |
50 | diff --git a/block/mirror.c b/block/mirror.c | 96 | + * did not expect a QMP STOP event and so we need to retain compatibility. |
51 | index XXXXXXX..XXXXXXX 100644 | 97 | + */ |
52 | --- a/block/mirror.c | 98 | +int vm_shutdown(void) |
53 | +++ b/block/mirror.c | ||
54 | @@ -XXX,XX +XXX,XX @@ fail: | ||
55 | |||
56 | g_free(s->replaces); | ||
57 | blk_unref(s->target); | ||
58 | - block_job_unref(&s->common); | ||
59 | + block_job_early_fail(&s->common); | ||
60 | } | ||
61 | |||
62 | bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, | ||
63 | diff --git a/blockjob.c b/blockjob.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/blockjob.c | ||
66 | +++ b/blockjob.c | ||
67 | @@ -XXX,XX +XXX,XX @@ BlockJob *block_job_get(const char *id) | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | +static void block_job_ref(BlockJob *job) | ||
72 | +{ | 99 | +{ |
73 | + ++job->refcnt; | 100 | + return do_vm_stop(RUN_STATE_SHUTDOWN, false); |
74 | +} | 101 | +} |
75 | + | 102 | + |
76 | +static void block_job_attached_aio_context(AioContext *new_context, | 103 | static bool cpu_can_run(CPUState *cpu) |
77 | + void *opaque); | ||
78 | +static void block_job_detach_aio_context(void *opaque); | ||
79 | + | ||
80 | +static void block_job_unref(BlockJob *job) | ||
81 | +{ | ||
82 | + if (--job->refcnt == 0) { | ||
83 | + BlockDriverState *bs = blk_bs(job->blk); | ||
84 | + bs->job = NULL; | ||
85 | + block_job_remove_all_bdrv(job); | ||
86 | + blk_remove_aio_context_notifier(job->blk, | ||
87 | + block_job_attached_aio_context, | ||
88 | + block_job_detach_aio_context, job); | ||
89 | + blk_unref(job->blk); | ||
90 | + error_free(job->blocker); | ||
91 | + g_free(job->id); | ||
92 | + QLIST_REMOVE(job, job_list); | ||
93 | + g_free(job); | ||
94 | + } | ||
95 | +} | ||
96 | + | ||
97 | static void block_job_attached_aio_context(AioContext *new_context, | ||
98 | void *opaque) | ||
99 | { | 104 | { |
100 | @@ -XXX,XX +XXX,XX @@ void block_job_start(BlockJob *job) | 105 | if (cpu->stop) { |
101 | bdrv_coroutine_enter(blk_bs(job->blk), job->co); | 106 | @@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state) |
107 | return 0; | ||
108 | } | ||
109 | |||
110 | - return do_vm_stop(state); | ||
111 | + return do_vm_stop(state, true); | ||
102 | } | 112 | } |
103 | 113 | ||
104 | -void block_job_ref(BlockJob *job) | 114 | /** |
105 | +void block_job_early_fail(BlockJob *job) | 115 | diff --git a/iothread.c b/iothread.c |
106 | { | 116 | index XXXXXXX..XXXXXXX 100644 |
107 | - ++job->refcnt; | 117 | --- a/iothread.c |
118 | +++ b/iothread.c | ||
119 | @@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread) | ||
120 | qemu_thread_join(&iothread->thread); | ||
121 | } | ||
122 | |||
123 | -static int iothread_stop_iter(Object *object, void *opaque) | ||
124 | -{ | ||
125 | - IOThread *iothread; | ||
126 | - | ||
127 | - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); | ||
128 | - if (!iothread) { | ||
129 | - return 0; | ||
130 | - } | ||
131 | - iothread_stop(iothread); | ||
132 | - return 0; | ||
108 | -} | 133 | -} |
109 | - | 134 | - |
110 | -void block_job_unref(BlockJob *job) | 135 | static void iothread_instance_init(Object *obj) |
136 | { | ||
137 | IOThread *iothread = IOTHREAD(obj); | ||
138 | @@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp) | ||
139 | return head; | ||
140 | } | ||
141 | |||
142 | -void iothread_stop_all(void) | ||
111 | -{ | 143 | -{ |
112 | - if (--job->refcnt == 0) { | 144 | - Object *container = object_get_objects_root(); |
113 | - BlockDriverState *bs = blk_bs(job->blk); | 145 | - BlockDriverState *bs; |
114 | - bs->job = NULL; | 146 | - BdrvNextIterator it; |
115 | - block_job_remove_all_bdrv(job); | 147 | - |
116 | - blk_remove_aio_context_notifier(job->blk, | 148 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
117 | - block_job_attached_aio_context, | 149 | - AioContext *ctx = bdrv_get_aio_context(bs); |
118 | - block_job_detach_aio_context, job); | 150 | - if (ctx == qemu_get_aio_context()) { |
119 | - blk_unref(job->blk); | 151 | - continue; |
120 | - error_free(job->blocker); | 152 | - } |
121 | - g_free(job->id); | 153 | - aio_context_acquire(ctx); |
122 | - QLIST_REMOVE(job, job_list); | 154 | - bdrv_set_aio_context(bs, qemu_get_aio_context()); |
123 | - g_free(job); | 155 | - aio_context_release(ctx); |
124 | - } | 156 | - } |
125 | + block_job_unref(job); | 157 | - |
126 | } | 158 | - object_child_foreach(container, iothread_stop_iter, NULL); |
127 | 159 | -} | |
128 | static void block_job_completed_single(BlockJob *job) | 160 | - |
129 | diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h | 161 | static gpointer iothread_g_main_context_init(gpointer opaque) |
162 | { | ||
163 | AioContext *ctx; | ||
164 | diff --git a/vl.c b/vl.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | 165 | index XXXXXXX..XXXXXXX 100644 |
131 | --- a/include/block/blockjob_int.h | 166 | --- a/vl.c |
132 | +++ b/include/block/blockjob_int.h | 167 | +++ b/vl.c |
133 | @@ -XXX,XX +XXX,XX @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); | 168 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
134 | void block_job_yield(BlockJob *job); | 169 | os_setup_post(); |
135 | 170 | ||
136 | /** | 171 | main_loop(); |
137 | - * block_job_ref: | 172 | - replay_disable_events(); |
138 | + * block_job_early_fail: | 173 | |
139 | * @bs: The block device. | 174 | - /* The ordering of the following is delicate. Stop vcpus to prevent new |
140 | * | 175 | - * I/O requests being queued by the guest. Then stop IOThreads (this |
141 | - * Grab a reference to the block job. Should be paired with block_job_unref. | 176 | - * includes a drain operation and completes all request processing). At |
142 | + * The block job could not be started, free it. | 177 | - * this point emulated devices are still associated with their IOThreads |
143 | */ | 178 | - * (if any) but no longer have any work to do. Only then can we close |
144 | -void block_job_ref(BlockJob *job); | 179 | - * block devices safely because we know there is no more I/O coming. |
145 | - | 180 | - */ |
146 | -/** | 181 | - pause_all_vcpus(); |
147 | - * block_job_unref: | 182 | - iothread_stop_all(); |
148 | - * @bs: The block device. | 183 | + /* No more vcpu or device emulation activity beyond this point */ |
149 | - * | 184 | + vm_shutdown(); |
150 | - * Release reference to the block job and release resources if it is the last | 185 | + |
151 | - * reference. | 186 | bdrv_close_all(); |
152 | - */ | 187 | |
153 | -void block_job_unref(BlockJob *job); | 188 | res_free(); |
154 | +void block_job_early_fail(BlockJob *job); | ||
155 | |||
156 | /** | ||
157 | * block_job_completed: | ||
158 | diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c | ||
159 | index XXXXXXX..XXXXXXX 100644 | ||
160 | --- a/tests/test-blockjob.c | ||
161 | +++ b/tests/test-blockjob.c | ||
162 | @@ -XXX,XX +XXX,XX @@ static void test_job_ids(void) | ||
163 | job[1] = do_test_id(blk[1], "id0", false); | ||
164 | |||
165 | /* But once job[0] finishes we can reuse its ID */ | ||
166 | - block_job_unref(job[0]); | ||
167 | + block_job_early_fail(job[0]); | ||
168 | job[1] = do_test_id(blk[1], "id0", true); | ||
169 | |||
170 | /* No job ID specified, defaults to the backend name ('drive1') */ | ||
171 | - block_job_unref(job[1]); | ||
172 | + block_job_early_fail(job[1]); | ||
173 | job[1] = do_test_id(blk[1], NULL, true); | ||
174 | |||
175 | /* Duplicate job ID */ | ||
176 | @@ -XXX,XX +XXX,XX @@ static void test_job_ids(void) | ||
177 | /* This one is valid */ | ||
178 | job[2] = do_test_id(blk[2], "id_2", true); | ||
179 | |||
180 | - block_job_unref(job[0]); | ||
181 | - block_job_unref(job[1]); | ||
182 | - block_job_unref(job[2]); | ||
183 | + block_job_early_fail(job[0]); | ||
184 | + block_job_early_fail(job[1]); | ||
185 | + block_job_early_fail(job[2]); | ||
186 | |||
187 | destroy_blk(blk[0]); | ||
188 | destroy_blk(blk[1]); | ||
189 | -- | 189 | -- |
190 | 2.9.3 | 190 | 2.14.3 |
191 | 191 | ||
192 | 192 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | The new functions helps respecting the invariant that the coroutine | ||
4 | is entered with false user_resume, zero pause count and no error | ||
5 | recorded in the iostatus. | ||
6 | |||
7 | Resetting the iostatus is now common to all of block_job_cancel_async, | ||
8 | block_job_user_resume and block_job_iostatus_reset, albeit with slight | ||
9 | differences: | ||
10 | |||
11 | - block_job_cancel_async resets the iostatus, and resumes the job if | ||
12 | there was an error, but the coroutine is not restarted immediately. | ||
13 | For example the caller may continue with a call to block_job_finish_sync. | ||
14 | |||
15 | - block_job_user_resume resets the iostatus. It wants to resume the job | ||
16 | unconditionally, even if there was no error. | ||
17 | |||
18 | - block_job_iostatus_reset doesn't resume the job at all. Maybe that's | ||
19 | a bug but it should be fixed separately. | ||
20 | |||
21 | block_job_iostatus_reset does the least common denominator, so add some | ||
22 | checking but otherwise leave it as the entry point for resetting the | ||
23 | iostatus. | ||
24 | |||
25 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
26 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
27 | Message-id: 20170508141310.8674-8-pbonzini@redhat.com | ||
28 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
29 | --- | ||
30 | blockjob.c | 24 ++++++++++++++++++++---- | ||
31 | 1 file changed, 20 insertions(+), 4 deletions(-) | ||
32 | |||
33 | diff --git a/blockjob.c b/blockjob.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/blockjob.c | ||
36 | +++ b/blockjob.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static void block_job_completed_single(BlockJob *job) | ||
38 | block_job_unref(job); | ||
39 | } | ||
40 | |||
41 | +static void block_job_cancel_async(BlockJob *job) | ||
42 | +{ | ||
43 | + if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) { | ||
44 | + block_job_iostatus_reset(job); | ||
45 | + } | ||
46 | + if (job->user_paused) { | ||
47 | + /* Do not call block_job_enter here, the caller will handle it. */ | ||
48 | + job->user_paused = false; | ||
49 | + job->pause_count--; | ||
50 | + } | ||
51 | + job->cancelled = true; | ||
52 | +} | ||
53 | + | ||
54 | static void block_job_completed_txn_abort(BlockJob *job) | ||
55 | { | ||
56 | AioContext *ctx; | ||
57 | @@ -XXX,XX +XXX,XX @@ static void block_job_completed_txn_abort(BlockJob *job) | ||
58 | * them; this job, however, may or may not be cancelled, depending | ||
59 | * on the caller, so leave it. */ | ||
60 | if (other_job != job) { | ||
61 | - other_job->cancelled = true; | ||
62 | + block_job_cancel_async(other_job); | ||
63 | } | ||
64 | continue; | ||
65 | } | ||
66 | @@ -XXX,XX +XXX,XX @@ bool block_job_user_paused(BlockJob *job) | ||
67 | void block_job_user_resume(BlockJob *job) | ||
68 | { | ||
69 | if (job && job->user_paused && job->pause_count > 0) { | ||
70 | - job->user_paused = false; | ||
71 | block_job_iostatus_reset(job); | ||
72 | + job->user_paused = false; | ||
73 | block_job_resume(job); | ||
74 | } | ||
75 | } | ||
76 | @@ -XXX,XX +XXX,XX @@ void block_job_user_resume(BlockJob *job) | ||
77 | void block_job_cancel(BlockJob *job) | ||
78 | { | ||
79 | if (block_job_started(job)) { | ||
80 | - job->cancelled = true; | ||
81 | - block_job_iostatus_reset(job); | ||
82 | + block_job_cancel_async(job); | ||
83 | block_job_enter(job); | ||
84 | } else { | ||
85 | block_job_completed(job, -ECANCELED); | ||
86 | @@ -XXX,XX +XXX,XX @@ void block_job_yield(BlockJob *job) | ||
87 | |||
88 | void block_job_iostatus_reset(BlockJob *job) | ||
89 | { | ||
90 | + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { | ||
91 | + return; | ||
92 | + } | ||
93 | + assert(job->user_paused && job->pause_count > 0); | ||
94 | job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; | ||
95 | } | ||
96 | |||
97 | -- | ||
98 | 2.9.3 | ||
99 | |||
100 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | Unlike test-blockjob-txn, QMP releases the reference to the transaction | ||
4 | before the jobs finish. Thus, qemu-iotest 124 showed a failure while | ||
5 | working on the next patch that the unit tests did not have. Make | ||
6 | the test a little nastier. | ||
7 | |||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Message-id: 20170508141310.8674-10-pbonzini@redhat.com | ||
12 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
13 | --- | ||
14 | tests/test-blockjob-txn.c | 7 +++++-- | ||
15 | 1 file changed, 5 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/tests/test-blockjob-txn.c | ||
20 | +++ b/tests/test-blockjob-txn.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void test_pair_jobs(int expected1, int expected2) | ||
22 | block_job_start(job1); | ||
23 | block_job_start(job2); | ||
24 | |||
25 | + /* Release our reference now to trigger as many nice | ||
26 | + * use-after-free bugs as possible. | ||
27 | + */ | ||
28 | + block_job_txn_unref(txn); | ||
29 | + | ||
30 | if (expected1 == -ECANCELED) { | ||
31 | block_job_cancel(job1); | ||
32 | } | ||
33 | @@ -XXX,XX +XXX,XX @@ static void test_pair_jobs(int expected1, int expected2) | ||
34 | |||
35 | g_assert_cmpint(result1, ==, expected1); | ||
36 | g_assert_cmpint(result2, ==, expected2); | ||
37 | - | ||
38 | - block_job_txn_unref(txn); | ||
39 | } | ||
40 | |||
41 | static void test_pair_jobs_success(void) | ||
42 | -- | ||
43 | 2.9.3 | ||
44 | |||
45 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | This splits the part that touches job states from the part that invokes | ||
4 | callbacks. It will make the code simpler to understand once job states will | ||
5 | be protected by a different mutex than the AioContext lock. | ||
6 | |||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
9 | Message-id: 20170508141310.8674-11-pbonzini@redhat.com | ||
10 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
11 | --- | ||
12 | blockjob.c | 35 ++++++++++++++++++++++------------- | ||
13 | 1 file changed, 22 insertions(+), 13 deletions(-) | ||
14 | |||
15 | diff --git a/blockjob.c b/blockjob.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/blockjob.c | ||
18 | +++ b/blockjob.c | ||
19 | @@ -XXX,XX +XXX,XX @@ void block_job_start(BlockJob *job) | ||
20 | |||
21 | static void block_job_completed_single(BlockJob *job) | ||
22 | { | ||
23 | + assert(job->completed); | ||
24 | + | ||
25 | if (!job->ret) { | ||
26 | if (job->driver->commit) { | ||
27 | job->driver->commit(job); | ||
28 | @@ -XXX,XX +XXX,XX @@ static int block_job_finish_sync(BlockJob *job, | ||
29 | |||
30 | block_job_ref(job); | ||
31 | |||
32 | - finish(job, &local_err); | ||
33 | + if (finish) { | ||
34 | + finish(job, &local_err); | ||
35 | + } | ||
36 | if (local_err) { | ||
37 | error_propagate(errp, local_err); | ||
38 | block_job_unref(job); | ||
39 | @@ -XXX,XX +XXX,XX @@ static void block_job_completed_txn_abort(BlockJob *job) | ||
40 | { | ||
41 | AioContext *ctx; | ||
42 | BlockJobTxn *txn = job->txn; | ||
43 | - BlockJob *other_job, *next; | ||
44 | + BlockJob *other_job; | ||
45 | |||
46 | if (txn->aborting) { | ||
47 | /* | ||
48 | @@ -XXX,XX +XXX,XX @@ static void block_job_completed_txn_abort(BlockJob *job) | ||
49 | return; | ||
50 | } | ||
51 | txn->aborting = true; | ||
52 | + block_job_txn_ref(txn); | ||
53 | + | ||
54 | /* We are the first failed job. Cancel other jobs. */ | ||
55 | QLIST_FOREACH(other_job, &txn->jobs, txn_list) { | ||
56 | ctx = blk_get_aio_context(other_job->blk); | ||
57 | aio_context_acquire(ctx); | ||
58 | } | ||
59 | + | ||
60 | + /* Other jobs are effectively cancelled by us, set the status for | ||
61 | + * them; this job, however, may or may not be cancelled, depending | ||
62 | + * on the caller, so leave it. */ | ||
63 | QLIST_FOREACH(other_job, &txn->jobs, txn_list) { | ||
64 | - if (other_job == job || other_job->completed) { | ||
65 | - /* Other jobs are "effectively" cancelled by us, set the status for | ||
66 | - * them; this job, however, may or may not be cancelled, depending | ||
67 | - * on the caller, so leave it. */ | ||
68 | - if (other_job != job) { | ||
69 | - block_job_cancel_async(other_job); | ||
70 | - } | ||
71 | - continue; | ||
72 | + if (other_job != job) { | ||
73 | + block_job_cancel_async(other_job); | ||
74 | } | ||
75 | - block_job_cancel_sync(other_job); | ||
76 | - assert(other_job->completed); | ||
77 | } | ||
78 | - QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { | ||
79 | + while (!QLIST_EMPTY(&txn->jobs)) { | ||
80 | + other_job = QLIST_FIRST(&txn->jobs); | ||
81 | ctx = blk_get_aio_context(other_job->blk); | ||
82 | + if (!other_job->completed) { | ||
83 | + assert(other_job->cancelled); | ||
84 | + block_job_finish_sync(other_job, NULL, NULL); | ||
85 | + } | ||
86 | block_job_completed_single(other_job); | ||
87 | aio_context_release(ctx); | ||
88 | } | ||
89 | + | ||
90 | + block_job_txn_unref(txn); | ||
91 | } | ||
92 | |||
93 | static void block_job_completed_txn_success(BlockJob *job) | ||
94 | -- | ||
95 | 2.9.3 | ||
96 | |||
97 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | On current released versions of glusterfs, glfs_lseek() will sometimes | ||
2 | return invalid values for SEEK_DATA or SEEK_HOLE. For SEEK_DATA and | ||
3 | SEEK_HOLE, the returned value should be >= the passed offset, or < 0 in | ||
4 | the case of error: | ||
5 | 1 | ||
6 | LSEEK(2): | ||
7 | |||
8 | off_t lseek(int fd, off_t offset, int whence); | ||
9 | |||
10 | [...] | ||
11 | |||
12 | SEEK_HOLE | ||
13 | Adjust the file offset to the next hole in the file greater | ||
14 | than or equal to offset. If offset points into the middle of | ||
15 | a hole, then the file offset is set to offset. If there is no | ||
16 | hole past offset, then the file offset is adjusted to the end | ||
17 | of the file (i.e., there is an implicit hole at the end of | ||
18 | any file). | ||
19 | |||
20 | [...] | ||
21 | |||
22 | RETURN VALUE | ||
23 | Upon successful completion, lseek() returns the resulting | ||
24 | offset location as measured in bytes from the beginning of the | ||
25 | file. On error, the value (off_t) -1 is returned and errno is | ||
26 | set to indicate the error | ||
27 | |||
28 | However, occasionally glfs_lseek() for SEEK_HOLE/DATA will return a | ||
29 | value less than the passed offset, yet greater than zero. | ||
30 | |||
31 | For instance, here are example values observed from this call: | ||
32 | |||
33 | offs = glfs_lseek(s->fd, start, SEEK_HOLE); | ||
34 | if (offs < 0) { | ||
35 | return -errno; /* D1 and (H3 or H4) */ | ||
36 | } | ||
37 | |||
38 | start == 7608336384 | ||
39 | offs == 7607877632 | ||
40 | |||
41 | This causes QEMU to abort on the assert test. When this value is | ||
42 | returned, errno is also 0. | ||
43 | |||
44 | This is a reported and known bug to glusterfs: | ||
45 | https://bugzilla.redhat.com/show_bug.cgi?id=1425293 | ||
46 | |||
47 | Although this is being fixed in gluster, we still should work around it | ||
48 | in QEMU, given that multiple released versions of gluster behave this | ||
49 | way. | ||
50 | |||
51 | This patch treats the return case of (offs < start) the same as if an | ||
52 | error value other than ENXIO is returned; we will assume we learned | ||
53 | nothing, and there are no holes in the file. | ||
54 | |||
55 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
56 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
57 | Reviewed-by: Niels de Vos <ndevos@redhat.com> | ||
58 | Message-id: 87c0140e9407c08f6e74b04131b610f2e27c014c.1495560397.git.jcody@redhat.com | ||
59 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
60 | --- | ||
61 | block/gluster.c | 18 ++++++++++++++++-- | ||
62 | 1 file changed, 16 insertions(+), 2 deletions(-) | ||
63 | |||
64 | diff --git a/block/gluster.c b/block/gluster.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/block/gluster.c | ||
67 | +++ b/block/gluster.c | ||
68 | @@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start, | ||
69 | if (offs < 0) { | ||
70 | return -errno; /* D3 or D4 */ | ||
71 | } | ||
72 | - assert(offs >= start); | ||
73 | + | ||
74 | + if (offs < start) { | ||
75 | + /* This is not a valid return by lseek(). We are safe to just return | ||
76 | + * -EIO in this case, and we'll treat it like D4. Unfortunately some | ||
77 | + * versions of gluster server will return offs < start, so an assert | ||
78 | + * here will unnecessarily abort QEMU. */ | ||
79 | + return -EIO; | ||
80 | + } | ||
81 | |||
82 | if (offs > start) { | ||
83 | /* D2: in hole, next data at offs */ | ||
84 | @@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start, | ||
85 | if (offs < 0) { | ||
86 | return -errno; /* D1 and (H3 or H4) */ | ||
87 | } | ||
88 | - assert(offs >= start); | ||
89 | + | ||
90 | + if (offs < start) { | ||
91 | + /* This is not a valid return by lseek(). We are safe to just return | ||
92 | + * -EIO in this case, and we'll treat it like H4. Unfortunately some | ||
93 | + * versions of gluster server will return offs < start, so an assert | ||
94 | + * here will unnecessarily abort QEMU. */ | ||
95 | + return -EIO; | ||
96 | + } | ||
97 | |||
98 | if (offs > start) { | ||
99 | /* | ||
100 | -- | ||
101 | 2.9.3 | ||
102 | |||
103 | diff view generated by jsdifflib |