1 | The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769: | 1 | The following changes since commit 9cf289af47bcfae5c75de37d8e5d6fd23705322c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000) | 3 | Merge tag 'qga-pull-request' of gitlab.com:marcandre.lureau/qemu into staging (2022-05-04 03:42:49 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268: | 9 | for you to fetch changes up to bef2e050d6a7feb865854c65570c496ac5a8cf53: |
10 | 10 | ||
11 | vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000) | 11 | util/event-loop-base: Introduce options to set the thread pool size (2022-05-04 17:02:19 +0100) |
12 | |||
13 | ---------------------------------------------------------------- | ||
14 | Pull request | ||
15 | |||
16 | Add new thread-pool-min/thread-pool-max parameters to control the thread pool | ||
17 | used for async I/O. | ||
12 | 18 | ||
13 | ---------------------------------------------------------------- | 19 | ---------------------------------------------------------------- |
14 | 20 | ||
15 | ---------------------------------------------------------------- | 21 | Nicolas Saenz Julienne (3): |
22 | Introduce event-loop-base abstract class | ||
23 | util/main-loop: Introduce the main loop into QOM | ||
24 | util/event-loop-base: Introduce options to set the thread pool size | ||
16 | 25 | ||
17 | Deepa Srinivasan (1): | 26 | qapi/qom.json | 43 ++++++++-- |
18 | block: Fix qemu crash when using scsi-block | 27 | meson.build | 26 +++--- |
19 | 28 | include/block/aio.h | 10 +++ | |
20 | Fam Zheng (1): | 29 | include/block/thread-pool.h | 3 + |
21 | README: Fix typo 'git-publish' | 30 | include/qemu/main-loop.h | 10 +++ |
22 | 31 | include/sysemu/event-loop-base.h | 41 +++++++++ | |
23 | Sergio Lopez (1): | 32 | include/sysemu/iothread.h | 6 +- |
24 | virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is | 33 | event-loop-base.c | 140 +++++++++++++++++++++++++++++++ |
25 | present | 34 | iothread.c | 68 +++++---------- |
26 | 35 | util/aio-posix.c | 1 + | |
27 | Stefan Hajnoczi (4): | 36 | util/async.c | 20 +++++ |
28 | block: add aio_wait_bh_oneshot() | 37 | util/main-loop.c | 65 ++++++++++++++ |
29 | virtio-blk: fix race between .ioeventfd_stop() and vq handler | 38 | util/thread-pool.c | 55 +++++++++++- |
30 | virtio-scsi: fix race between .ioeventfd_stop() and vq handler | 39 | 13 files changed, 419 insertions(+), 69 deletions(-) |
31 | vl: introduce vm_shutdown() | 40 | create mode 100644 include/sysemu/event-loop-base.h |
32 | 41 | create mode 100644 event-loop-base.c | |
33 | include/block/aio-wait.h | 13 +++++++++++ | ||
34 | include/sysemu/iothread.h | 1 - | ||
35 | include/sysemu/sysemu.h | 1 + | ||
36 | block/block-backend.c | 51 ++++++++++++++++++++--------------------- | ||
37 | cpus.c | 16 ++++++++++--- | ||
38 | hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++-------- | ||
39 | hw/scsi/virtio-scsi-dataplane.c | 9 ++++---- | ||
40 | iothread.c | 31 ------------------------- | ||
41 | util/aio-wait.c | 31 +++++++++++++++++++++++++ | ||
42 | vl.c | 13 +++-------- | ||
43 | README | 2 +- | ||
44 | 11 files changed, 122 insertions(+), 85 deletions(-) | ||
45 | 42 | ||
46 | -- | 43 | -- |
47 | 2.14.3 | 44 | 2.35.1 |
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
2 | 1 | ||
3 | Starting qemu with the following arguments causes qemu to segfault: | ||
4 | ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= | ||
5 | iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 | ||
6 | |||
7 | This patch fixes blk_aio_ioctl() so it does not pass stack addresses to | ||
8 | blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More | ||
9 | details about the bug follow. | ||
10 | |||
11 | blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the | ||
12 | coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). | ||
13 | |||
14 | When blk_aio_ioctl() is executed from within a coroutine context (e.g. | ||
15 | iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to | ||
16 | the current coroutine's wakeup queue. blk_aio_ioctl() then returns. | ||
17 | |||
18 | When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: | ||
19 | .... | ||
20 | BlkRwCo *rwco = &acb->rwco; | ||
21 | |||
22 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
23 | rwco->qiov->iov[0].iov_base); <--- qiov is | ||
24 | invalid here | ||
25 | ... | ||
26 | |||
27 | In the case when blk_aio_ioctl() is called from a non-coroutine context, | ||
28 | blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls | ||
29 | qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine | ||
30 | execution is complete, control returns to blk_aio_ioctl_entry() after the call | ||
31 | to blk_co_ioctl(). There is no invalid reference after this point, but the | ||
32 | function is still holding on to invalid pointers. | ||
33 | |||
34 | The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer | ||
35 | rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the | ||
36 | coroutine function casts it to QEMUIOVector or uses the void pointer directly. | ||
37 | |||
38 | Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
39 | Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
40 | Reviewed-by: Mark Kanda <mark.kanda@oracle.com> | ||
41 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
42 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
43 | --- | ||
44 | block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- | ||
45 | 1 file changed, 25 insertions(+), 26 deletions(-) | ||
46 | |||
47 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/block/block-backend.c | ||
50 | +++ b/block/block-backend.c | ||
51 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, | ||
52 | typedef struct BlkRwCo { | ||
53 | BlockBackend *blk; | ||
54 | int64_t offset; | ||
55 | - QEMUIOVector *qiov; | ||
56 | + void *iobuf; | ||
57 | int ret; | ||
58 | BdrvRequestFlags flags; | ||
59 | } BlkRwCo; | ||
60 | @@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo { | ||
61 | static void blk_read_entry(void *opaque) | ||
62 | { | ||
63 | BlkRwCo *rwco = opaque; | ||
64 | + QEMUIOVector *qiov = rwco->iobuf; | ||
65 | |||
66 | - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, | ||
67 | - rwco->qiov, rwco->flags); | ||
68 | + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, | ||
69 | + qiov, rwco->flags); | ||
70 | } | ||
71 | |||
72 | static void blk_write_entry(void *opaque) | ||
73 | { | ||
74 | BlkRwCo *rwco = opaque; | ||
75 | + QEMUIOVector *qiov = rwco->iobuf; | ||
76 | |||
77 | - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, | ||
78 | - rwco->qiov, rwco->flags); | ||
79 | + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, | ||
80 | + qiov, rwco->flags); | ||
81 | } | ||
82 | |||
83 | static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
84 | @@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | ||
85 | rwco = (BlkRwCo) { | ||
86 | .blk = blk, | ||
87 | .offset = offset, | ||
88 | - .qiov = &qiov, | ||
89 | + .iobuf = &qiov, | ||
90 | .flags = flags, | ||
91 | .ret = NOT_DONE, | ||
92 | }; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque) | ||
94 | } | ||
95 | |||
96 | static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
97 | - QEMUIOVector *qiov, CoroutineEntry co_entry, | ||
98 | + void *iobuf, CoroutineEntry co_entry, | ||
99 | BdrvRequestFlags flags, | ||
100 | BlockCompletionFunc *cb, void *opaque) | ||
101 | { | ||
102 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
103 | acb->rwco = (BlkRwCo) { | ||
104 | .blk = blk, | ||
105 | .offset = offset, | ||
106 | - .qiov = qiov, | ||
107 | + .iobuf = iobuf, | ||
108 | .flags = flags, | ||
109 | .ret = NOT_DONE, | ||
110 | }; | ||
111 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque) | ||
112 | { | ||
113 | BlkAioEmAIOCB *acb = opaque; | ||
114 | BlkRwCo *rwco = &acb->rwco; | ||
115 | + QEMUIOVector *qiov = rwco->iobuf; | ||
116 | |||
117 | - assert(rwco->qiov->size == acb->bytes); | ||
118 | + assert(qiov->size == acb->bytes); | ||
119 | rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, | ||
120 | - rwco->qiov, rwco->flags); | ||
121 | + qiov, rwco->flags); | ||
122 | blk_aio_complete(acb); | ||
123 | } | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque) | ||
126 | { | ||
127 | BlkAioEmAIOCB *acb = opaque; | ||
128 | BlkRwCo *rwco = &acb->rwco; | ||
129 | + QEMUIOVector *qiov = rwco->iobuf; | ||
130 | |||
131 | - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); | ||
132 | + assert(!qiov || qiov->size == acb->bytes); | ||
133 | rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, | ||
134 | - rwco->qiov, rwco->flags); | ||
135 | + qiov, rwco->flags); | ||
136 | blk_aio_complete(acb); | ||
137 | } | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
140 | static void blk_ioctl_entry(void *opaque) | ||
141 | { | ||
142 | BlkRwCo *rwco = opaque; | ||
143 | + QEMUIOVector *qiov = rwco->iobuf; | ||
144 | + | ||
145 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
146 | - rwco->qiov->iov[0].iov_base); | ||
147 | + qiov->iov[0].iov_base); | ||
148 | } | ||
149 | |||
150 | int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
151 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque) | ||
152 | BlkAioEmAIOCB *acb = opaque; | ||
153 | BlkRwCo *rwco = &acb->rwco; | ||
154 | |||
155 | - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
156 | - rwco->qiov->iov[0].iov_base); | ||
157 | + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); | ||
158 | + | ||
159 | blk_aio_complete(acb); | ||
160 | } | ||
161 | |||
162 | BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, | ||
163 | BlockCompletionFunc *cb, void *opaque) | ||
164 | { | ||
165 | - QEMUIOVector qiov; | ||
166 | - struct iovec iov; | ||
167 | - | ||
168 | - iov = (struct iovec) { | ||
169 | - .iov_base = buf, | ||
170 | - .iov_len = 0, | ||
171 | - }; | ||
172 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
173 | - | ||
174 | - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); | ||
175 | + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); | ||
176 | } | ||
177 | |||
178 | int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
179 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, | ||
180 | static void blk_pdiscard_entry(void *opaque) | ||
181 | { | ||
182 | BlkRwCo *rwco = opaque; | ||
183 | - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); | ||
184 | + QEMUIOVector *qiov = rwco->iobuf; | ||
185 | + | ||
186 | + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); | ||
187 | } | ||
188 | |||
189 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
190 | -- | ||
191 | 2.14.3 | ||
192 | |||
193 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | Reported-by: Alberto Garcia <berto@igalia.com> | ||
4 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Message-id: 20180306024328.19195-1-famz@redhat.com | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | --- | ||
9 | README | 2 +- | ||
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/README b/README | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/README | ||
15 | +++ b/README | ||
16 | @@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control. | ||
17 | git clone git://git.qemu.org/qemu-web.git | ||
18 | https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ | ||
19 | |||
20 | -A 'git-profile' utility was created to make above process less | ||
21 | +A 'git-publish' utility was created to make above process less | ||
22 | cumbersome, and is highly recommended for making regular contributions, | ||
23 | or even just for sending consecutive patch series revisions. It also | ||
24 | requires a working 'git send-email' setup, and by default doesn't | ||
25 | -- | ||
26 | 2.14.3 | ||
27 | |||
28 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Sergio Lopez <slp@redhat.com> | ||
2 | 1 | ||
3 | Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane: | ||
4 | notify guest as a batch") deferred guest notification to a BH in order | ||
5 | batch notifications, with purpose of avoiding flooding the guest with | ||
6 | interruptions. | ||
7 | |||
8 | This optimization came with a cost. The average latency perceived in the | ||
9 | guest is increased by a few microseconds, but also when multiple IO | ||
10 | operations finish at the same time, the guest won't be notified until | ||
11 | all completions from each operation has been run. On the contrary, | ||
12 | virtio-scsi issues the notification at the end of each completion. | ||
13 | |||
14 | On the other hand, nowadays we have the EVENT_IDX feature that allows a | ||
15 | better coordination between QEMU and the Guest OS to avoid sending | ||
16 | unnecessary interruptions. | ||
17 | |||
18 | With this change, virtio-blk/dataplane only batches notifications if the | ||
19 | EVENT_IDX feature is not present. | ||
20 | |||
21 | Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1): | ||
22 | - Test specs: | ||
23 | * fio-3.4 (ioengine=sync, iodepth=1, direct=1) | ||
24 | * qemu master | ||
25 | * virtio-blk with a dedicated iothread (default poll-max-ns) | ||
26 | * backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000 | ||
27 | * 8 vCPUs pinned to isolated physical cores | ||
28 | * Emulator and iothread also pinned to separate isolated cores | ||
29 | * variance between runs < 1% | ||
30 | |||
31 | - Not patched | ||
32 | * numjobs=1: lat_avg=327.32 irqs=29998 | ||
33 | * numjobs=4: lat_avg=337.89 irqs=29073 | ||
34 | * numjobs=8: lat_avg=342.98 irqs=28643 | ||
35 | |||
36 | - Patched: | ||
37 | * numjobs=1: lat_avg=323.92 irqs=30262 | ||
38 | * numjobs=4: lat_avg=332.65 irqs=29520 | ||
39 | * numjobs=8: lat_avg=335.54 irqs=29323 | ||
40 | |||
41 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
42 | Message-id: 20180307114459.26636-1-slp@redhat.com | ||
43 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
44 | --- | ||
45 | hw/block/dataplane/virtio-blk.c | 15 +++++++++++++-- | ||
46 | 1 file changed, 13 insertions(+), 2 deletions(-) | ||
47 | |||
48 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/hw/block/dataplane/virtio-blk.c | ||
51 | +++ b/hw/block/dataplane/virtio-blk.c | ||
52 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | ||
53 | VirtIODevice *vdev; | ||
54 | QEMUBH *bh; /* bh for guest notification */ | ||
55 | unsigned long *batch_notify_vqs; | ||
56 | + bool batch_notifications; | ||
57 | |||
58 | /* Note that these EventNotifiers are assigned by value. This is | ||
59 | * fine as long as you do not call event_notifier_cleanup on them | ||
60 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | ||
61 | /* Raise an interrupt to signal guest, if necessary */ | ||
62 | void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) | ||
63 | { | ||
64 | - set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | ||
65 | - qemu_bh_schedule(s->bh); | ||
66 | + if (s->batch_notifications) { | ||
67 | + set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | ||
68 | + qemu_bh_schedule(s->bh); | ||
69 | + } else { | ||
70 | + virtio_notify_irqfd(s->vdev, vq); | ||
71 | + } | ||
72 | } | ||
73 | |||
74 | static void notify_guest_bh(void *opaque) | ||
75 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
76 | |||
77 | s->starting = true; | ||
78 | |||
79 | + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { | ||
80 | + s->batch_notifications = true; | ||
81 | + } else { | ||
82 | + s->batch_notifications = false; | ||
83 | + } | ||
84 | + | ||
85 | /* Set up guest notifier (irq) */ | ||
86 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); | ||
87 | if (r != 0) { | ||
88 | -- | ||
89 | 2.14.3 | ||
90 | |||
91 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Sometimes it's necessary for the main loop thread to run a BH in an | ||
2 | IOThread and wait for its completion. This primitive is useful during | ||
3 | startup/shutdown to synchronize and avoid race conditions. | ||
4 | 1 | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
7 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Message-id: 20180307144205.20619-2-stefanha@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | include/block/aio-wait.h | 13 +++++++++++++ | ||
12 | util/aio-wait.c | 31 +++++++++++++++++++++++++++++++ | ||
13 | 2 files changed, 44 insertions(+) | ||
14 | |||
15 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/block/aio-wait.h | ||
18 | +++ b/include/block/aio-wait.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
20 | */ | ||
21 | void aio_wait_kick(AioWait *wait); | ||
22 | |||
23 | +/** | ||
24 | + * aio_wait_bh_oneshot: | ||
25 | + * @ctx: the aio context | ||
26 | + * @cb: the BH callback function | ||
27 | + * @opaque: user data for the BH callback function | ||
28 | + * | ||
29 | + * Run a BH in @ctx and wait for it to complete. | ||
30 | + * | ||
31 | + * Must be called from the main loop thread with @ctx acquired exactly once. | ||
32 | + * Note that main loop event processing may occur. | ||
33 | + */ | ||
34 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
35 | + | ||
36 | #endif /* QEMU_AIO_WAIT */ | ||
37 | diff --git a/util/aio-wait.c b/util/aio-wait.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/util/aio-wait.c | ||
40 | +++ b/util/aio-wait.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait) | ||
42 | aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); | ||
43 | } | ||
44 | } | ||
45 | + | ||
46 | +typedef struct { | ||
47 | + AioWait wait; | ||
48 | + bool done; | ||
49 | + QEMUBHFunc *cb; | ||
50 | + void *opaque; | ||
51 | +} AioWaitBHData; | ||
52 | + | ||
53 | +/* Context: BH in IOThread */ | ||
54 | +static void aio_wait_bh(void *opaque) | ||
55 | +{ | ||
56 | + AioWaitBHData *data = opaque; | ||
57 | + | ||
58 | + data->cb(data->opaque); | ||
59 | + | ||
60 | + data->done = true; | ||
61 | + aio_wait_kick(&data->wait); | ||
62 | +} | ||
63 | + | ||
64 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
65 | +{ | ||
66 | + AioWaitBHData data = { | ||
67 | + .cb = cb, | ||
68 | + .opaque = opaque, | ||
69 | + }; | ||
70 | + | ||
71 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
72 | + | ||
73 | + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); | ||
74 | + AIO_WAIT_WHILE(&data.wait, ctx, !data.done); | ||
75 | +} | ||
76 | -- | ||
77 | 2.14.3 | ||
78 | |||
79 | diff view generated by jsdifflib |
1 | Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before | 1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> |
---|---|---|---|
2 | stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa | 2 | |
3 | ("iothread: Stop threads before main() quits") tried to work around the | 3 | Introduce the 'event-loop-base' abstract class, it'll hold the |
4 | fact that emulation was still active during termination by stopping | 4 | properties common to all event loops and provide the necessary hooks for |
5 | iothreads. They suffer from race conditions: | 5 | their creation and maintenance. Then have iothread inherit from it. |
6 | 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the | 6 | |
7 | virtio_scsi_ctx_check() assertion failure because the BDS AioContext | 7 | EventLoopBaseClass is defined as user creatable and provides a hook for |
8 | has been modified by iothread_stop_all(). | 8 | its children to attach themselves to the user creatable class 'complete' |
9 | 2. Guest vq kick racing with main loop termination leaves a readable | 9 | function. It also provides an update_params() callback to propagate |
10 | ioeventfd that is handled by the next aio_poll() when external | 10 | property changes onto its children. |
11 | clients are enabled again, resulting in unwanted emulation activity. | 11 | |
12 | 12 | The new 'event-loop-base' class will live in the root directory. It is | |
13 | This patch obsoletes those commits by fully disabling emulation activity | 13 | built on its own using the 'link_whole' option (there are no direct |
14 | when vcpus are stopped. | 14 | function dependencies between the class and its children, it all happens |
15 | 15 | trough 'constructor' magic). And also imposes new compilation | |
16 | Use the new vm_shutdown() function instead of pause_all_vcpus() so that | 16 | dependencies: |
17 | vm change state handlers are invoked too. Virtio devices will now stop | 17 | |
18 | their ioeventfds, preventing further emulation activity after vm_stop(). | 18 | qom <- event-loop-base <- blockdev (iothread.c) |
19 | 19 | ||
20 | Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a | 20 | And in subsequent patches: |
21 | QMP STOP event that may affect existing clients. | 21 | |
22 | 22 | qom <- event-loop-base <- qemuutil (util/main-loop.c) | |
23 | It is no longer necessary to call replay_disable_events() directly since | 23 | |
24 | vm_shutdown() does so already. | 24 | All this forced some amount of reordering in meson.build: |
25 | 25 | ||
26 | Drop iothread_stop_all() since it is no longer used. | 26 | - Moved qom build definition before qemuutil. Doing it the other way |
27 | 27 | around (i.e. moving qemuutil after qom) isn't possible as a lot of | |
28 | Cc: Fam Zheng <famz@redhat.com> | 28 | core libraries that live in between the two depend on it. |
29 | Cc: Kevin Wolf <kwolf@redhat.com> | 29 | |
30 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 30 | - Process the 'hw' subdir earlier, as it introduces files into the |
31 | Reviewed-by: Fam Zheng <famz@redhat.com> | 31 | 'qom' source set. |
32 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 32 | |
33 | Message-id: 20180307144205.20619-5-stefanha@redhat.com | 33 | No functional changes intended. |
34 | |||
35 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
36 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
37 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
38 | Message-id: 20220425075723.20019-2-nsaenzju@redhat.com | ||
34 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 39 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
35 | --- | 40 | --- |
36 | include/sysemu/iothread.h | 1 - | 41 | qapi/qom.json | 22 +++++-- |
37 | include/sysemu/sysemu.h | 1 + | 42 | meson.build | 23 ++++--- |
38 | cpus.c | 16 +++++++++++++--- | 43 | include/sysemu/event-loop-base.h | 36 +++++++++++ |
39 | iothread.c | 31 ------------------------------- | 44 | include/sysemu/iothread.h | 6 +- |
40 | vl.c | 13 +++---------- | 45 | event-loop-base.c | 104 +++++++++++++++++++++++++++++++ |
41 | 5 files changed, 17 insertions(+), 45 deletions(-) | 46 | iothread.c | 65 ++++++------------- |
42 | 47 | 6 files changed, 192 insertions(+), 64 deletions(-) | |
48 | create mode 100644 include/sysemu/event-loop-base.h | ||
49 | create mode 100644 event-loop-base.c | ||
50 | |||
51 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/qapi/qom.json | ||
54 | +++ b/qapi/qom.json | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | '*repeat': 'bool', | ||
57 | '*grab-toggle': 'GrabToggleKeys' } } | ||
58 | |||
59 | +## | ||
60 | +# @EventLoopBaseProperties: | ||
61 | +# | ||
62 | +# Common properties for event loops | ||
63 | +# | ||
64 | +# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | ||
65 | +# 0 means that the engine will use its default. | ||
66 | +# (default: 0) | ||
67 | +# | ||
68 | +# Since: 7.1 | ||
69 | +## | ||
70 | +{ 'struct': 'EventLoopBaseProperties', | ||
71 | + 'data': { '*aio-max-batch': 'int' } } | ||
72 | + | ||
73 | ## | ||
74 | # @IothreadProperties: | ||
75 | # | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | # algorithm detects it is spending too long polling without | ||
78 | # encountering events. 0 selects a default behaviour (default: 0) | ||
79 | # | ||
80 | -# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | ||
81 | -# 0 means that the engine will use its default | ||
82 | -# (default:0, since 6.1) | ||
83 | +# The @aio-max-batch option is available since 6.1. | ||
84 | # | ||
85 | # Since: 2.0 | ||
86 | ## | ||
87 | { 'struct': 'IothreadProperties', | ||
88 | + 'base': 'EventLoopBaseProperties', | ||
89 | 'data': { '*poll-max-ns': 'int', | ||
90 | '*poll-grow': 'int', | ||
91 | - '*poll-shrink': 'int', | ||
92 | - '*aio-max-batch': 'int' } } | ||
93 | + '*poll-shrink': 'int' } } | ||
94 | |||
95 | ## | ||
96 | # @MemoryBackendProperties: | ||
97 | diff --git a/meson.build b/meson.build | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/meson.build | ||
100 | +++ b/meson.build | ||
101 | @@ -XXX,XX +XXX,XX @@ subdir('qom') | ||
102 | subdir('authz') | ||
103 | subdir('crypto') | ||
104 | subdir('ui') | ||
105 | +subdir('hw') | ||
106 | |||
107 | |||
108 | if enable_modules | ||
109 | @@ -XXX,XX +XXX,XX @@ if enable_modules | ||
110 | modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') | ||
111 | endif | ||
112 | |||
113 | +qom_ss = qom_ss.apply(config_host, strict: false) | ||
114 | +libqom = static_library('qom', qom_ss.sources() + genh, | ||
115 | + dependencies: [qom_ss.dependencies()], | ||
116 | + name_suffix: 'fa') | ||
117 | +qom = declare_dependency(link_whole: libqom) | ||
118 | + | ||
119 | +event_loop_base = files('event-loop-base.c') | ||
120 | +event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, | ||
121 | + build_by_default: true) | ||
122 | +event_loop_base = declare_dependency(link_whole: event_loop_base, | ||
123 | + dependencies: [qom]) | ||
124 | + | ||
125 | stub_ss = stub_ss.apply(config_all, strict: false) | ||
126 | |||
127 | util_ss.add_all(trace_ss) | ||
128 | @@ -XXX,XX +XXX,XX @@ subdir('monitor') | ||
129 | subdir('net') | ||
130 | subdir('replay') | ||
131 | subdir('semihosting') | ||
132 | -subdir('hw') | ||
133 | subdir('tcg') | ||
134 | subdir('fpu') | ||
135 | subdir('accel') | ||
136 | @@ -XXX,XX +XXX,XX @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', | ||
137 | capture: true, | ||
138 | command: [undefsym, nm, '@INPUT@']) | ||
139 | |||
140 | -qom_ss = qom_ss.apply(config_host, strict: false) | ||
141 | -libqom = static_library('qom', qom_ss.sources() + genh, | ||
142 | - dependencies: [qom_ss.dependencies()], | ||
143 | - name_suffix: 'fa') | ||
144 | - | ||
145 | -qom = declare_dependency(link_whole: libqom) | ||
146 | - | ||
147 | authz_ss = authz_ss.apply(config_host, strict: false) | ||
148 | libauthz = static_library('authz', authz_ss.sources() + genh, | ||
149 | dependencies: [authz_ss.dependencies()], | ||
150 | @@ -XXX,XX +XXX,XX @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, | ||
151 | build_by_default: false) | ||
152 | |||
153 | blockdev = declare_dependency(link_whole: [libblockdev], | ||
154 | - dependencies: [block]) | ||
155 | + dependencies: [block, event_loop_base]) | ||
156 | |||
157 | qmp_ss = qmp_ss.apply(config_host, strict: false) | ||
158 | libqmp = static_library('qmp', qmp_ss.sources() + genh, | ||
159 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
160 | new file mode 100644 | ||
161 | index XXXXXXX..XXXXXXX | ||
162 | --- /dev/null | ||
163 | +++ b/include/sysemu/event-loop-base.h | ||
164 | @@ -XXX,XX +XXX,XX @@ | ||
165 | +/* | ||
166 | + * QEMU event-loop backend | ||
167 | + * | ||
168 | + * Copyright (C) 2022 Red Hat Inc | ||
169 | + * | ||
170 | + * Authors: | ||
171 | + * Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
172 | + * | ||
173 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
174 | + * See the COPYING file in the top-level directory. | ||
175 | + */ | ||
176 | +#ifndef QEMU_EVENT_LOOP_BASE_H | ||
177 | +#define QEMU_EVENT_LOOP_BASE_H | ||
178 | + | ||
179 | +#include "qom/object.h" | ||
180 | +#include "block/aio.h" | ||
181 | +#include "qemu/typedefs.h" | ||
182 | + | ||
183 | +#define TYPE_EVENT_LOOP_BASE "event-loop-base" | ||
184 | +OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, | ||
185 | + EVENT_LOOP_BASE) | ||
186 | + | ||
187 | +struct EventLoopBaseClass { | ||
188 | + ObjectClass parent_class; | ||
189 | + | ||
190 | + void (*init)(EventLoopBase *base, Error **errp); | ||
191 | + void (*update_params)(EventLoopBase *base, Error **errp); | ||
192 | +}; | ||
193 | + | ||
194 | +struct EventLoopBase { | ||
195 | + Object parent; | ||
196 | + | ||
197 | + /* AioContext AIO engine parameters */ | ||
198 | + int64_t aio_max_batch; | ||
199 | +}; | ||
200 | +#endif | ||
43 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h | 201 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h |
44 | index XXXXXXX..XXXXXXX 100644 | 202 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/include/sysemu/iothread.h | 203 | --- a/include/sysemu/iothread.h |
46 | +++ b/include/sysemu/iothread.h | 204 | +++ b/include/sysemu/iothread.h |
47 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 205 | @@ -XXX,XX +XXX,XX @@ |
48 | char *iothread_get_id(IOThread *iothread); | 206 | #include "block/aio.h" |
49 | IOThread *iothread_by_id(const char *id); | 207 | #include "qemu/thread.h" |
50 | AioContext *iothread_get_aio_context(IOThread *iothread); | 208 | #include "qom/object.h" |
51 | -void iothread_stop_all(void); | 209 | +#include "sysemu/event-loop-base.h" |
52 | GMainContext *iothread_get_g_main_context(IOThread *iothread); | 210 | |
53 | 211 | #define TYPE_IOTHREAD "iothread" | |
54 | /* | 212 | |
55 | diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h | 213 | struct IOThread { |
56 | index XXXXXXX..XXXXXXX 100644 | 214 | - Object parent_obj; |
57 | --- a/include/sysemu/sysemu.h | 215 | + EventLoopBase parent_obj; |
58 | +++ b/include/sysemu/sysemu.h | 216 | |
59 | @@ -XXX,XX +XXX,XX @@ void vm_start(void); | 217 | QemuThread thread; |
60 | int vm_prepare_start(void); | 218 | AioContext *ctx; |
61 | int vm_stop(RunState state); | 219 | @@ -XXX,XX +XXX,XX @@ struct IOThread { |
62 | int vm_stop_force_state(RunState state); | 220 | int64_t poll_max_ns; |
63 | +int vm_shutdown(void); | 221 | int64_t poll_grow; |
64 | 222 | int64_t poll_shrink; | |
65 | typedef enum WakeupReason { | 223 | - |
66 | /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ | 224 | - /* AioContext AIO engine parameters */ |
67 | diff --git a/cpus.c b/cpus.c | 225 | - int64_t aio_max_batch; |
68 | index XXXXXXX..XXXXXXX 100644 | 226 | }; |
69 | --- a/cpus.c | 227 | typedef struct IOThread IOThread; |
70 | +++ b/cpus.c | 228 | |
71 | @@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void) | 229 | diff --git a/event-loop-base.c b/event-loop-base.c |
72 | } | 230 | new file mode 100644 |
73 | } | 231 | index XXXXXXX..XXXXXXX |
74 | 232 | --- /dev/null | |
75 | -static int do_vm_stop(RunState state) | 233 | +++ b/event-loop-base.c |
76 | +static int do_vm_stop(RunState state, bool send_stop) | 234 | @@ -XXX,XX +XXX,XX @@ |
77 | { | 235 | +/* |
78 | int ret = 0; | 236 | + * QEMU event-loop base |
79 | 237 | + * | |
80 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | 238 | + * Copyright (C) 2022 Red Hat Inc |
81 | pause_all_vcpus(); | 239 | + * |
82 | runstate_set(state); | 240 | + * Authors: |
83 | vm_state_notify(0, state); | 241 | + * Stefan Hajnoczi <stefanha@redhat.com> |
84 | - qapi_event_send_stop(&error_abort); | 242 | + * Nicolas Saenz Julienne <nsaenzju@redhat.com> |
85 | + if (send_stop) { | 243 | + * |
86 | + qapi_event_send_stop(&error_abort); | 244 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. |
87 | + } | 245 | + * See the COPYING file in the top-level directory. |
88 | } | ||
89 | |||
90 | bdrv_drain_all(); | ||
91 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | ||
92 | return ret; | ||
93 | } | ||
94 | |||
95 | +/* Special vm_stop() variant for terminating the process. Historically clients | ||
96 | + * did not expect a QMP STOP event and so we need to retain compatibility. | ||
97 | + */ | 246 | + */ |
98 | +int vm_shutdown(void) | 247 | + |
248 | +#include "qemu/osdep.h" | ||
249 | +#include "qom/object_interfaces.h" | ||
250 | +#include "qapi/error.h" | ||
251 | +#include "sysemu/event-loop-base.h" | ||
252 | + | ||
253 | +typedef struct { | ||
254 | + const char *name; | ||
255 | + ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ | ||
256 | +} EventLoopBaseParamInfo; | ||
257 | + | ||
258 | +static EventLoopBaseParamInfo aio_max_batch_info = { | ||
259 | + "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), | ||
260 | +}; | ||
261 | + | ||
262 | +static void event_loop_base_get_param(Object *obj, Visitor *v, | ||
263 | + const char *name, void *opaque, Error **errp) | ||
99 | +{ | 264 | +{ |
100 | + return do_vm_stop(RUN_STATE_SHUTDOWN, false); | 265 | + EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); |
266 | + EventLoopBaseParamInfo *info = opaque; | ||
267 | + int64_t *field = (void *)event_loop_base + info->offset; | ||
268 | + | ||
269 | + visit_type_int64(v, name, field, errp); | ||
101 | +} | 270 | +} |
102 | + | 271 | + |
103 | static bool cpu_can_run(CPUState *cpu) | 272 | +static void event_loop_base_set_param(Object *obj, Visitor *v, |
104 | { | 273 | + const char *name, void *opaque, Error **errp) |
105 | if (cpu->stop) { | 274 | +{ |
106 | @@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state) | 275 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); |
107 | return 0; | 276 | + EventLoopBase *base = EVENT_LOOP_BASE(obj); |
108 | } | 277 | + EventLoopBaseParamInfo *info = opaque; |
109 | 278 | + int64_t *field = (void *)base + info->offset; | |
110 | - return do_vm_stop(state); | 279 | + int64_t value; |
111 | + return do_vm_stop(state, true); | 280 | + |
112 | } | 281 | + if (!visit_type_int64(v, name, &value, errp)) { |
113 | 282 | + return; | |
114 | /** | 283 | + } |
284 | + | ||
285 | + if (value < 0) { | ||
286 | + error_setg(errp, "%s value must be in range [0, %" PRId64 "]", | ||
287 | + info->name, INT64_MAX); | ||
288 | + return; | ||
289 | + } | ||
290 | + | ||
291 | + *field = value; | ||
292 | + | ||
293 | + if (bc->update_params) { | ||
294 | + bc->update_params(base, errp); | ||
295 | + } | ||
296 | + | ||
297 | + return; | ||
298 | +} | ||
299 | + | ||
300 | +static void event_loop_base_complete(UserCreatable *uc, Error **errp) | ||
301 | +{ | ||
302 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); | ||
303 | + EventLoopBase *base = EVENT_LOOP_BASE(uc); | ||
304 | + | ||
305 | + if (bc->init) { | ||
306 | + bc->init(base, errp); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | ||
311 | +{ | ||
312 | + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | ||
313 | + ucc->complete = event_loop_base_complete; | ||
314 | + | ||
315 | + object_class_property_add(klass, "aio-max-batch", "int", | ||
316 | + event_loop_base_get_param, | ||
317 | + event_loop_base_set_param, | ||
318 | + NULL, &aio_max_batch_info); | ||
319 | +} | ||
320 | + | ||
321 | +static const TypeInfo event_loop_base_info = { | ||
322 | + .name = TYPE_EVENT_LOOP_BASE, | ||
323 | + .parent = TYPE_OBJECT, | ||
324 | + .instance_size = sizeof(EventLoopBase), | ||
325 | + .class_size = sizeof(EventLoopBaseClass), | ||
326 | + .class_init = event_loop_base_class_init, | ||
327 | + .abstract = true, | ||
328 | + .interfaces = (InterfaceInfo[]) { | ||
329 | + { TYPE_USER_CREATABLE }, | ||
330 | + { } | ||
331 | + } | ||
332 | +}; | ||
333 | + | ||
334 | +static void register_types(void) | ||
335 | +{ | ||
336 | + type_register_static(&event_loop_base_info); | ||
337 | +} | ||
338 | +type_init(register_types); | ||
115 | diff --git a/iothread.c b/iothread.c | 339 | diff --git a/iothread.c b/iothread.c |
116 | index XXXXXXX..XXXXXXX 100644 | 340 | index XXXXXXX..XXXXXXX 100644 |
117 | --- a/iothread.c | 341 | --- a/iothread.c |
118 | +++ b/iothread.c | 342 | +++ b/iothread.c |
119 | @@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread) | 343 | @@ -XXX,XX +XXX,XX @@ |
120 | qemu_thread_join(&iothread->thread); | 344 | #include "qemu/module.h" |
345 | #include "block/aio.h" | ||
346 | #include "block/block.h" | ||
347 | +#include "sysemu/event-loop-base.h" | ||
348 | #include "sysemu/iothread.h" | ||
349 | #include "qapi/error.h" | ||
350 | #include "qapi/qapi-commands-misc.h" | ||
351 | @@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread) | ||
352 | iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); | ||
121 | } | 353 | } |
122 | 354 | ||
123 | -static int iothread_stop_iter(Object *object, void *opaque) | 355 | -static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) |
356 | +static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) | ||
357 | { | ||
358 | + IOThread *iothread = IOTHREAD(base); | ||
359 | ERRP_GUARD(); | ||
360 | |||
361 | + if (!iothread->ctx) { | ||
362 | + return; | ||
363 | + } | ||
364 | + | ||
365 | aio_context_set_poll_params(iothread->ctx, | ||
366 | iothread->poll_max_ns, | ||
367 | iothread->poll_grow, | ||
368 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) | ||
369 | } | ||
370 | |||
371 | aio_context_set_aio_params(iothread->ctx, | ||
372 | - iothread->aio_max_batch, | ||
373 | + iothread->parent_obj.aio_max_batch, | ||
374 | errp); | ||
375 | } | ||
376 | |||
377 | -static void iothread_complete(UserCreatable *obj, Error **errp) | ||
378 | + | ||
379 | +static void iothread_init(EventLoopBase *base, Error **errp) | ||
380 | { | ||
381 | Error *local_error = NULL; | ||
382 | - IOThread *iothread = IOTHREAD(obj); | ||
383 | + IOThread *iothread = IOTHREAD(base); | ||
384 | char *thread_name; | ||
385 | |||
386 | iothread->stopping = false; | ||
387 | @@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp) | ||
388 | */ | ||
389 | iothread_init_gcontext(iothread); | ||
390 | |||
391 | - iothread_set_aio_context_params(iothread, &local_error); | ||
392 | + iothread_set_aio_context_params(base, &local_error); | ||
393 | if (local_error) { | ||
394 | error_propagate(errp, local_error); | ||
395 | aio_context_unref(iothread->ctx); | ||
396 | @@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp) | ||
397 | * to inherit. | ||
398 | */ | ||
399 | thread_name = g_strdup_printf("IO %s", | ||
400 | - object_get_canonical_path_component(OBJECT(obj))); | ||
401 | + object_get_canonical_path_component(OBJECT(base))); | ||
402 | qemu_thread_create(&iothread->thread, thread_name, iothread_run, | ||
403 | iothread, QEMU_THREAD_JOINABLE); | ||
404 | g_free(thread_name); | ||
405 | @@ -XXX,XX +XXX,XX @@ static IOThreadParamInfo poll_grow_info = { | ||
406 | static IOThreadParamInfo poll_shrink_info = { | ||
407 | "poll-shrink", offsetof(IOThread, poll_shrink), | ||
408 | }; | ||
409 | -static IOThreadParamInfo aio_max_batch_info = { | ||
410 | - "aio-max-batch", offsetof(IOThread, aio_max_batch), | ||
411 | -}; | ||
412 | |||
413 | static void iothread_get_param(Object *obj, Visitor *v, | ||
414 | const char *name, IOThreadParamInfo *info, Error **errp) | ||
415 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v, | ||
416 | } | ||
417 | } | ||
418 | |||
419 | -static void iothread_get_aio_param(Object *obj, Visitor *v, | ||
420 | - const char *name, void *opaque, Error **errp) | ||
124 | -{ | 421 | -{ |
125 | - IOThread *iothread; | 422 | - IOThreadParamInfo *info = opaque; |
126 | - | 423 | - |
127 | - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); | 424 | - iothread_get_param(obj, v, name, info, errp); |
128 | - if (!iothread) { | 425 | -} |
129 | - return 0; | 426 | - |
427 | -static void iothread_set_aio_param(Object *obj, Visitor *v, | ||
428 | - const char *name, void *opaque, Error **errp) | ||
429 | -{ | ||
430 | - IOThread *iothread = IOTHREAD(obj); | ||
431 | - IOThreadParamInfo *info = opaque; | ||
432 | - | ||
433 | - if (!iothread_set_param(obj, v, name, info, errp)) { | ||
434 | - return; | ||
130 | - } | 435 | - } |
131 | - iothread_stop(iothread); | 436 | - |
132 | - return 0; | 437 | - if (iothread->ctx) { |
438 | - aio_context_set_aio_params(iothread->ctx, | ||
439 | - iothread->aio_max_batch, | ||
440 | - errp); | ||
441 | - } | ||
133 | -} | 442 | -} |
134 | - | 443 | - |
135 | static void iothread_instance_init(Object *obj) | 444 | static void iothread_class_init(ObjectClass *klass, void *class_data) |
136 | { | 445 | { |
137 | IOThread *iothread = IOTHREAD(obj); | 446 | - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); |
138 | @@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp) | 447 | - ucc->complete = iothread_complete; |
139 | return head; | 448 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); |
449 | + | ||
450 | + bc->init = iothread_init; | ||
451 | + bc->update_params = iothread_set_aio_context_params; | ||
452 | |||
453 | object_class_property_add(klass, "poll-max-ns", "int", | ||
454 | iothread_get_poll_param, | ||
455 | @@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data) | ||
456 | iothread_get_poll_param, | ||
457 | iothread_set_poll_param, | ||
458 | NULL, &poll_shrink_info); | ||
459 | - object_class_property_add(klass, "aio-max-batch", "int", | ||
460 | - iothread_get_aio_param, | ||
461 | - iothread_set_aio_param, | ||
462 | - NULL, &aio_max_batch_info); | ||
140 | } | 463 | } |
141 | 464 | ||
142 | -void iothread_stop_all(void) | 465 | static const TypeInfo iothread_info = { |
143 | -{ | 466 | .name = TYPE_IOTHREAD, |
144 | - Object *container = object_get_objects_root(); | 467 | - .parent = TYPE_OBJECT, |
145 | - BlockDriverState *bs; | 468 | + .parent = TYPE_EVENT_LOOP_BASE, |
146 | - BdrvNextIterator it; | 469 | .class_init = iothread_class_init, |
147 | - | 470 | .instance_size = sizeof(IOThread), |
148 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | 471 | .instance_init = iothread_instance_init, |
149 | - AioContext *ctx = bdrv_get_aio_context(bs); | 472 | .instance_finalize = iothread_instance_finalize, |
150 | - if (ctx == qemu_get_aio_context()) { | 473 | - .interfaces = (InterfaceInfo[]) { |
151 | - continue; | 474 | - {TYPE_USER_CREATABLE}, |
152 | - } | 475 | - {} |
153 | - aio_context_acquire(ctx); | 476 | - }, |
154 | - bdrv_set_aio_context(bs, qemu_get_aio_context()); | 477 | }; |
155 | - aio_context_release(ctx); | 478 | |
156 | - } | 479 | static void iothread_register_types(void) |
157 | - | 480 | @@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque) |
158 | - object_child_foreach(container, iothread_stop_iter, NULL); | 481 | info->poll_max_ns = iothread->poll_max_ns; |
159 | -} | 482 | info->poll_grow = iothread->poll_grow; |
160 | - | 483 | info->poll_shrink = iothread->poll_shrink; |
161 | static gpointer iothread_g_main_context_init(gpointer opaque) | 484 | - info->aio_max_batch = iothread->aio_max_batch; |
162 | { | 485 | + info->aio_max_batch = iothread->parent_obj.aio_max_batch; |
163 | AioContext *ctx; | 486 | |
164 | diff --git a/vl.c b/vl.c | 487 | QAPI_LIST_APPEND(*tail, info); |
165 | index XXXXXXX..XXXXXXX 100644 | 488 | return 0; |
166 | --- a/vl.c | ||
167 | +++ b/vl.c | ||
168 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
169 | os_setup_post(); | ||
170 | |||
171 | main_loop(); | ||
172 | - replay_disable_events(); | ||
173 | |||
174 | - /* The ordering of the following is delicate. Stop vcpus to prevent new | ||
175 | - * I/O requests being queued by the guest. Then stop IOThreads (this | ||
176 | - * includes a drain operation and completes all request processing). At | ||
177 | - * this point emulated devices are still associated with their IOThreads | ||
178 | - * (if any) but no longer have any work to do. Only then can we close | ||
179 | - * block devices safely because we know there is no more I/O coming. | ||
180 | - */ | ||
181 | - pause_all_vcpus(); | ||
182 | - iothread_stop_all(); | ||
183 | + /* No more vcpu or device emulation activity beyond this point */ | ||
184 | + vm_shutdown(); | ||
185 | + | ||
186 | bdrv_close_all(); | ||
187 | |||
188 | res_free(); | ||
189 | -- | 489 | -- |
190 | 2.14.3 | 490 | 2.35.1 |
191 | |||
192 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | 2 | |
3 | the AioContext lock. By the time the vq handler is able to acquire the | 3 | 'event-loop-base' provides basic property handling for all 'AioContext' |
4 | AioContext lock the ioeventfd has already been removed and the handler | 4 | based event loops. So let's define a new 'MainLoopClass' that inherits |
5 | isn't supposed to run anymore! | 5 | from it. This will permit tweaking the main loop's properties through |
6 | 6 | qapi as well as through the command line using the '-object' keyword[1]. | |
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 7 | Only one instance of 'MainLoopClass' might be created at any time. |
8 | from within the IOThread. This way no races with the vq handler are | 8 | |
9 | possible. | 9 | 'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to |
10 | 10 | mark 'MainLoop' as non-deletable. | |
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 12 | [1] For example: |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 13 | -object main-loop,id=main-loop,aio-max-batch=<value> |
14 | Message-id: 20180307144205.20619-4-stefanha@redhat.com | 14 | |
15 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
18 | Message-id: 20220425075723.20019-3-nsaenzju@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 20 | --- |
17 | hw/scsi/virtio-scsi-dataplane.c | 9 +++++---- | 21 | qapi/qom.json | 13 ++++++++ |
18 | 1 file changed, 5 insertions(+), 4 deletions(-) | 22 | meson.build | 3 +- |
19 | 23 | include/qemu/main-loop.h | 10 ++++++ | |
20 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c | 24 | include/sysemu/event-loop-base.h | 1 + |
21 | index XXXXXXX..XXXXXXX 100644 | 25 | event-loop-base.c | 13 ++++++++ |
22 | --- a/hw/scsi/virtio-scsi-dataplane.c | 26 | util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ |
23 | +++ b/hw/scsi/virtio-scsi-dataplane.c | 27 | 6 files changed, 95 insertions(+), 1 deletion(-) |
24 | @@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, | 28 | |
29 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/qapi/qom.json | ||
32 | +++ b/qapi/qom.json | ||
33 | @@ -XXX,XX +XXX,XX @@ | ||
34 | '*poll-grow': 'int', | ||
35 | '*poll-shrink': 'int' } } | ||
36 | |||
37 | +## | ||
38 | +# @MainLoopProperties: | ||
39 | +# | ||
40 | +# Properties for the main-loop object. | ||
41 | +# | ||
42 | +# Since: 7.1 | ||
43 | +## | ||
44 | +{ 'struct': 'MainLoopProperties', | ||
45 | + 'base': 'EventLoopBaseProperties', | ||
46 | + 'data': {} } | ||
47 | + | ||
48 | ## | ||
49 | # @MemoryBackendProperties: | ||
50 | # | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | { 'name': 'input-linux', | ||
53 | 'if': 'CONFIG_LINUX' }, | ||
54 | 'iothread', | ||
55 | + 'main-loop', | ||
56 | { 'name': 'memory-backend-epc', | ||
57 | 'if': 'CONFIG_LINUX' }, | ||
58 | 'memory-backend-file', | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | 'input-linux': { 'type': 'InputLinuxProperties', | ||
61 | 'if': 'CONFIG_LINUX' }, | ||
62 | 'iothread': 'IothreadProperties', | ||
63 | + 'main-loop': 'MainLoopProperties', | ||
64 | 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', | ||
65 | 'if': 'CONFIG_LINUX' }, | ||
66 | 'memory-backend-file': 'MemoryBackendFileProperties', | ||
67 | diff --git a/meson.build b/meson.build | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/meson.build | ||
70 | +++ b/meson.build | ||
71 | @@ -XXX,XX +XXX,XX @@ libqemuutil = static_library('qemuutil', | ||
72 | sources: util_ss.sources() + stub_ss.sources() + genh, | ||
73 | dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) | ||
74 | qemuutil = declare_dependency(link_with: libqemuutil, | ||
75 | - sources: genh + version_res) | ||
76 | + sources: genh + version_res, | ||
77 | + dependencies: [event_loop_base]) | ||
78 | |||
79 | if have_system or have_user | ||
80 | decodetree = generator(find_program('scripts/decodetree.py'), | ||
81 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/include/qemu/main-loop.h | ||
84 | +++ b/include/qemu/main-loop.h | ||
85 | @@ -XXX,XX +XXX,XX @@ | ||
86 | #define QEMU_MAIN_LOOP_H | ||
87 | |||
88 | #include "block/aio.h" | ||
89 | +#include "qom/object.h" | ||
90 | +#include "sysemu/event-loop-base.h" | ||
91 | |||
92 | #define SIG_IPI SIGUSR1 | ||
93 | |||
94 | +#define TYPE_MAIN_LOOP "main-loop" | ||
95 | +OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) | ||
96 | + | ||
97 | +struct MainLoop { | ||
98 | + EventLoopBase parent_obj; | ||
99 | +}; | ||
100 | +typedef struct MainLoop MainLoop; | ||
101 | + | ||
102 | /** | ||
103 | * qemu_init_main_loop: Set up the process so that it can run the main loop. | ||
104 | * | ||
105 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/include/sysemu/event-loop-base.h | ||
108 | +++ b/include/sysemu/event-loop-base.h | ||
109 | @@ -XXX,XX +XXX,XX @@ struct EventLoopBaseClass { | ||
110 | |||
111 | void (*init)(EventLoopBase *base, Error **errp); | ||
112 | void (*update_params)(EventLoopBase *base, Error **errp); | ||
113 | + bool (*can_be_deleted)(EventLoopBase *base); | ||
114 | }; | ||
115 | |||
116 | struct EventLoopBase { | ||
117 | diff --git a/event-loop-base.c b/event-loop-base.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/event-loop-base.c | ||
120 | +++ b/event-loop-base.c | ||
121 | @@ -XXX,XX +XXX,XX @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) | ||
122 | } | ||
123 | } | ||
124 | |||
125 | +static bool event_loop_base_can_be_deleted(UserCreatable *uc) | ||
126 | +{ | ||
127 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); | ||
128 | + EventLoopBase *backend = EVENT_LOOP_BASE(uc); | ||
129 | + | ||
130 | + if (bc->can_be_deleted) { | ||
131 | + return bc->can_be_deleted(backend); | ||
132 | + } | ||
133 | + | ||
134 | + return true; | ||
135 | +} | ||
136 | + | ||
137 | static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | ||
138 | { | ||
139 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | ||
140 | ucc->complete = event_loop_base_complete; | ||
141 | + ucc->can_be_deleted = event_loop_base_can_be_deleted; | ||
142 | |||
143 | object_class_property_add(klass, "aio-max-batch", "int", | ||
144 | event_loop_base_get_param, | ||
145 | diff --git a/util/main-loop.c b/util/main-loop.c | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/util/main-loop.c | ||
148 | +++ b/util/main-loop.c | ||
149 | @@ -XXX,XX +XXX,XX @@ | ||
150 | #include "qemu/error-report.h" | ||
151 | #include "qemu/queue.h" | ||
152 | #include "qemu/compiler.h" | ||
153 | +#include "qom/object.h" | ||
154 | |||
155 | #ifndef _WIN32 | ||
156 | #include <sys/wait.h> | ||
157 | @@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp) | ||
25 | return 0; | 158 | return 0; |
26 | } | 159 | } |
27 | 160 | ||
28 | -/* assumes s->ctx held */ | 161 | +static void main_loop_update_params(EventLoopBase *base, Error **errp) |
29 | -static void virtio_scsi_clear_aio(VirtIOSCSI *s) | 162 | +{ |
30 | +/* Context: BH in IOThread */ | 163 | + if (!qemu_aio_context) { |
31 | +static void virtio_scsi_dataplane_stop_bh(void *opaque) | 164 | + error_setg(errp, "qemu aio context not ready"); |
32 | { | 165 | + return; |
33 | + VirtIOSCSI *s = opaque; | 166 | + } |
34 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); | 167 | + |
35 | int i; | 168 | + aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); |
36 | 169 | +} | |
37 | @@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) | 170 | + |
38 | return 0; | 171 | +MainLoop *mloop; |
39 | 172 | + | |
40 | fail_vrings: | 173 | +static void main_loop_init(EventLoopBase *base, Error **errp) |
41 | - virtio_scsi_clear_aio(s); | 174 | +{ |
42 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 175 | + MainLoop *m = MAIN_LOOP(base); |
43 | aio_context_release(s->ctx); | 176 | + |
44 | for (i = 0; i < vs->conf.num_queues + 2; i++) { | 177 | + if (mloop) { |
45 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | 178 | + error_setg(errp, "only one main-loop instance allowed"); |
46 | @@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) | 179 | + return; |
47 | s->dataplane_stopping = true; | 180 | + } |
48 | 181 | + | |
49 | aio_context_acquire(s->ctx); | 182 | + main_loop_update_params(base, errp); |
50 | - virtio_scsi_clear_aio(s); | 183 | + |
51 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 184 | + mloop = m; |
52 | aio_context_release(s->ctx); | 185 | + return; |
53 | 186 | +} | |
54 | blk_drain_all(); /* ensure there are no in-flight requests */ | 187 | + |
188 | +static bool main_loop_can_be_deleted(EventLoopBase *base) | ||
189 | +{ | ||
190 | + return false; | ||
191 | +} | ||
192 | + | ||
193 | +static void main_loop_class_init(ObjectClass *oc, void *class_data) | ||
194 | +{ | ||
195 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); | ||
196 | + | ||
197 | + bc->init = main_loop_init; | ||
198 | + bc->update_params = main_loop_update_params; | ||
199 | + bc->can_be_deleted = main_loop_can_be_deleted; | ||
200 | +} | ||
201 | + | ||
202 | +static const TypeInfo main_loop_info = { | ||
203 | + .name = TYPE_MAIN_LOOP, | ||
204 | + .parent = TYPE_EVENT_LOOP_BASE, | ||
205 | + .class_init = main_loop_class_init, | ||
206 | + .instance_size = sizeof(MainLoop), | ||
207 | +}; | ||
208 | + | ||
209 | +static void main_loop_register_types(void) | ||
210 | +{ | ||
211 | + type_register_static(&main_loop_info); | ||
212 | +} | ||
213 | + | ||
214 | +type_init(main_loop_register_types) | ||
215 | + | ||
216 | static int max_priority; | ||
217 | |||
218 | #ifndef _WIN32 | ||
55 | -- | 219 | -- |
56 | 2.14.3 | 220 | 2.35.1 |
57 | |||
58 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | ||
3 | the AioContext lock. By the time the vq handler is able to acquire the | ||
4 | AioContext lock the ioeventfd has already been removed and the handler | ||
5 | isn't supposed to run anymore! | ||
6 | 2 | ||
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 3 | The thread pool regulates itself: when idle, it kills threads until |
8 | from within the IOThread. This way no races with the vq handler are | 4 | empty, when in demand, it creates new threads until full. This behaviour |
9 | possible. | 5 | doesn't play well with latency sensitive workloads where the price of |
6 | creating a new thread is too high. For example, when paired with qemu's | ||
7 | '-mlock', or using safety features like SafeStack, creating a new thread | ||
8 | has been measured take multiple milliseconds. | ||
10 | 9 | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | In order to mitigate this let's introduce a new 'EventLoopBase' |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 11 | property to set the thread pool size. The threads will be created during |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 12 | the pool's initialization or upon updating the property's value, remain |
14 | Message-id: 20180307144205.20619-3-stefanha@redhat.com | 13 | available during its lifetime regardless of demand, and destroyed upon |
14 | freeing it. A properly characterized workload will then be able to | ||
15 | configure the pool to avoid any latency spikes. | ||
16 | |||
17 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
20 | Message-id: 20220425075723.20019-4-nsaenzju@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 21 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 22 | --- |
17 | hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++------- | 23 | qapi/qom.json | 10 +++++- |
18 | 1 file changed, 17 insertions(+), 7 deletions(-) | 24 | include/block/aio.h | 10 ++++++ |
25 | include/block/thread-pool.h | 3 ++ | ||
26 | include/sysemu/event-loop-base.h | 4 +++ | ||
27 | event-loop-base.c | 23 +++++++++++++ | ||
28 | iothread.c | 3 ++ | ||
29 | util/aio-posix.c | 1 + | ||
30 | util/async.c | 20 ++++++++++++ | ||
31 | util/main-loop.c | 9 ++++++ | ||
32 | util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- | ||
33 | 10 files changed, 133 insertions(+), 5 deletions(-) | ||
19 | 34 | ||
20 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 35 | diff --git a/qapi/qom.json b/qapi/qom.json |
21 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/block/dataplane/virtio-blk.c | 37 | --- a/qapi/qom.json |
23 | +++ b/hw/block/dataplane/virtio-blk.c | 38 | +++ b/qapi/qom.json |
24 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | 39 | @@ -XXX,XX +XXX,XX @@ |
25 | return -ENOSYS; | 40 | # 0 means that the engine will use its default. |
26 | } | 41 | # (default: 0) |
27 | 42 | # | |
28 | +/* Stop notifications for new requests from guest. | 43 | +# @thread-pool-min: minimum number of threads reserved in the thread pool |
29 | + * | 44 | +# (default:0) |
30 | + * Context: BH in IOThread | 45 | +# |
46 | +# @thread-pool-max: maximum number of threads the thread pool can contain | ||
47 | +# (default:64) | ||
48 | +# | ||
49 | # Since: 7.1 | ||
50 | ## | ||
51 | { 'struct': 'EventLoopBaseProperties', | ||
52 | - 'data': { '*aio-max-batch': 'int' } } | ||
53 | + 'data': { '*aio-max-batch': 'int', | ||
54 | + '*thread-pool-min': 'int', | ||
55 | + '*thread-pool-max': 'int' } } | ||
56 | |||
57 | ## | ||
58 | # @IothreadProperties: | ||
59 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/include/block/aio.h | ||
62 | +++ b/include/block/aio.h | ||
63 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | ||
64 | QSLIST_HEAD(, Coroutine) scheduled_coroutines; | ||
65 | QEMUBH *co_schedule_bh; | ||
66 | |||
67 | + int thread_pool_min; | ||
68 | + int thread_pool_max; | ||
69 | /* Thread pool for performing work and receiving completion callbacks. | ||
70 | * Has its own locking. | ||
71 | */ | ||
72 | @@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, | ||
73 | void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, | ||
74 | Error **errp); | ||
75 | |||
76 | +/** | ||
77 | + * aio_context_set_thread_pool_params: | ||
78 | + * @ctx: the aio context | ||
79 | + * @min: min number of threads to have readily available in the thread pool | ||
80 | + * @min: max number of threads the thread pool can contain | ||
31 | + */ | 81 | + */ |
32 | +static void virtio_blk_data_plane_stop_bh(void *opaque) | 82 | +void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, |
83 | + int64_t max, Error **errp); | ||
84 | #endif | ||
85 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/include/block/thread-pool.h | ||
88 | +++ b/include/block/thread-pool.h | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | |||
91 | #include "block/block.h" | ||
92 | |||
93 | +#define THREAD_POOL_MAX_THREADS_DEFAULT 64 | ||
94 | + | ||
95 | typedef int ThreadPoolFunc(void *opaque); | ||
96 | |||
97 | typedef struct ThreadPool ThreadPool; | ||
98 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, | ||
99 | int coroutine_fn thread_pool_submit_co(ThreadPool *pool, | ||
100 | ThreadPoolFunc *func, void *arg); | ||
101 | void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); | ||
102 | +void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); | ||
103 | |||
104 | #endif | ||
105 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/include/sysemu/event-loop-base.h | ||
108 | +++ b/include/sysemu/event-loop-base.h | ||
109 | @@ -XXX,XX +XXX,XX @@ struct EventLoopBase { | ||
110 | |||
111 | /* AioContext AIO engine parameters */ | ||
112 | int64_t aio_max_batch; | ||
113 | + | ||
114 | + /* AioContext thread pool parameters */ | ||
115 | + int64_t thread_pool_min; | ||
116 | + int64_t thread_pool_max; | ||
117 | }; | ||
118 | #endif | ||
119 | diff --git a/event-loop-base.c b/event-loop-base.c | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/event-loop-base.c | ||
122 | +++ b/event-loop-base.c | ||
123 | @@ -XXX,XX +XXX,XX @@ | ||
124 | #include "qemu/osdep.h" | ||
125 | #include "qom/object_interfaces.h" | ||
126 | #include "qapi/error.h" | ||
127 | +#include "block/thread-pool.h" | ||
128 | #include "sysemu/event-loop-base.h" | ||
129 | |||
130 | typedef struct { | ||
131 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
132 | ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ | ||
133 | } EventLoopBaseParamInfo; | ||
134 | |||
135 | +static void event_loop_base_instance_init(Object *obj) | ||
33 | +{ | 136 | +{ |
34 | + VirtIOBlockDataPlane *s = opaque; | 137 | + EventLoopBase *base = EVENT_LOOP_BASE(obj); |
35 | + unsigned i; | 138 | + |
36 | + | 139 | + base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; |
37 | + for (i = 0; i < s->conf->num_queues; i++) { | ||
38 | + VirtQueue *vq = virtio_get_queue(s->vdev, i); | ||
39 | + | ||
40 | + virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | ||
41 | + } | ||
42 | +} | 140 | +} |
43 | + | 141 | + |
44 | /* Context: QEMU global mutex held */ | 142 | static EventLoopBaseParamInfo aio_max_batch_info = { |
45 | void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 143 | "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), |
144 | }; | ||
145 | +static EventLoopBaseParamInfo thread_pool_min_info = { | ||
146 | + "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), | ||
147 | +}; | ||
148 | +static EventLoopBaseParamInfo thread_pool_max_info = { | ||
149 | + "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), | ||
150 | +}; | ||
151 | |||
152 | static void event_loop_base_get_param(Object *obj, Visitor *v, | ||
153 | const char *name, void *opaque, Error **errp) | ||
154 | @@ -XXX,XX +XXX,XX @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | ||
155 | event_loop_base_get_param, | ||
156 | event_loop_base_set_param, | ||
157 | NULL, &aio_max_batch_info); | ||
158 | + object_class_property_add(klass, "thread-pool-min", "int", | ||
159 | + event_loop_base_get_param, | ||
160 | + event_loop_base_set_param, | ||
161 | + NULL, &thread_pool_min_info); | ||
162 | + object_class_property_add(klass, "thread-pool-max", "int", | ||
163 | + event_loop_base_get_param, | ||
164 | + event_loop_base_set_param, | ||
165 | + NULL, &thread_pool_max_info); | ||
166 | } | ||
167 | |||
168 | static const TypeInfo event_loop_base_info = { | ||
169 | .name = TYPE_EVENT_LOOP_BASE, | ||
170 | .parent = TYPE_OBJECT, | ||
171 | .instance_size = sizeof(EventLoopBase), | ||
172 | + .instance_init = event_loop_base_instance_init, | ||
173 | .class_size = sizeof(EventLoopBaseClass), | ||
174 | .class_init = event_loop_base_class_init, | ||
175 | .abstract = true, | ||
176 | diff --git a/iothread.c b/iothread.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/iothread.c | ||
179 | +++ b/iothread.c | ||
180 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) | ||
181 | aio_context_set_aio_params(iothread->ctx, | ||
182 | iothread->parent_obj.aio_max_batch, | ||
183 | errp); | ||
184 | + | ||
185 | + aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, | ||
186 | + base->thread_pool_max, errp); | ||
187 | } | ||
188 | |||
189 | |||
190 | diff --git a/util/aio-posix.c b/util/aio-posix.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/util/aio-posix.c | ||
193 | +++ b/util/aio-posix.c | ||
194 | @@ -XXX,XX +XXX,XX @@ | ||
195 | |||
196 | #include "qemu/osdep.h" | ||
197 | #include "block/block.h" | ||
198 | +#include "block/thread-pool.h" | ||
199 | #include "qemu/main-loop.h" | ||
200 | #include "qemu/rcu.h" | ||
201 | #include "qemu/rcu_queue.h" | ||
202 | diff --git a/util/async.c b/util/async.c | ||
203 | index XXXXXXX..XXXXXXX 100644 | ||
204 | --- a/util/async.c | ||
205 | +++ b/util/async.c | ||
206 | @@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp) | ||
207 | |||
208 | ctx->aio_max_batch = 0; | ||
209 | |||
210 | + ctx->thread_pool_min = 0; | ||
211 | + ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; | ||
212 | + | ||
213 | return ctx; | ||
214 | fail: | ||
215 | g_source_destroy(&ctx->source); | ||
216 | @@ -XXX,XX +XXX,XX @@ void qemu_set_current_aio_context(AioContext *ctx) | ||
217 | assert(!get_my_aiocontext()); | ||
218 | set_my_aiocontext(ctx); | ||
219 | } | ||
220 | + | ||
221 | +void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, | ||
222 | + int64_t max, Error **errp) | ||
223 | +{ | ||
224 | + | ||
225 | + if (min > max || !max || min > INT_MAX || max > INT_MAX) { | ||
226 | + error_setg(errp, "bad thread-pool-min/thread-pool-max values"); | ||
227 | + return; | ||
228 | + } | ||
229 | + | ||
230 | + ctx->thread_pool_min = min; | ||
231 | + ctx->thread_pool_max = max; | ||
232 | + | ||
233 | + if (ctx->thread_pool) { | ||
234 | + thread_pool_update_params(ctx->thread_pool, ctx); | ||
235 | + } | ||
236 | +} | ||
237 | diff --git a/util/main-loop.c b/util/main-loop.c | ||
238 | index XXXXXXX..XXXXXXX 100644 | ||
239 | --- a/util/main-loop.c | ||
240 | +++ b/util/main-loop.c | ||
241 | @@ -XXX,XX +XXX,XX @@ | ||
242 | #include "sysemu/replay.h" | ||
243 | #include "qemu/main-loop.h" | ||
244 | #include "block/aio.h" | ||
245 | +#include "block/thread-pool.h" | ||
246 | #include "qemu/error-report.h" | ||
247 | #include "qemu/queue.h" | ||
248 | #include "qemu/compiler.h" | ||
249 | @@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp) | ||
250 | |||
251 | static void main_loop_update_params(EventLoopBase *base, Error **errp) | ||
46 | { | 252 | { |
47 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 253 | + ERRP_GUARD(); |
48 | trace_virtio_blk_data_plane_stop(s); | 254 | + |
49 | 255 | if (!qemu_aio_context) { | |
50 | aio_context_acquire(s->ctx); | 256 | error_setg(errp, "qemu aio context not ready"); |
51 | - | 257 | return; |
52 | - /* Stop notifications for new requests from guest */ | 258 | } |
53 | - for (i = 0; i < nvqs; i++) { | 259 | |
54 | - VirtQueue *vq = virtio_get_queue(s->vdev, i); | 260 | aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); |
55 | - | 261 | + if (*errp) { |
56 | - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | 262 | + return; |
57 | - } | 263 | + } |
58 | + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | 264 | + |
59 | 265 | + aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, | |
60 | /* Drain and switch bs back to the QEMU main loop */ | 266 | + base->thread_pool_max, errp); |
61 | blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); | 267 | } |
268 | |||
269 | MainLoop *mloop; | ||
270 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
271 | index XXXXXXX..XXXXXXX 100644 | ||
272 | --- a/util/thread-pool.c | ||
273 | +++ b/util/thread-pool.c | ||
274 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
275 | QemuMutex lock; | ||
276 | QemuCond worker_stopped; | ||
277 | QemuSemaphore sem; | ||
278 | - int max_threads; | ||
279 | QEMUBH *new_thread_bh; | ||
280 | |||
281 | /* The following variables are only accessed from one AioContext. */ | ||
282 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
283 | int new_threads; /* backlog of threads we need to create */ | ||
284 | int pending_threads; /* threads created but not running yet */ | ||
285 | bool stopping; | ||
286 | + int min_threads; | ||
287 | + int max_threads; | ||
288 | }; | ||
289 | |||
290 | +static inline bool back_to_sleep(ThreadPool *pool, int ret) | ||
291 | +{ | ||
292 | + /* | ||
293 | + * The semaphore timed out, we should exit the loop except when: | ||
294 | + * - There is work to do, we raced with the signal. | ||
295 | + * - The max threads threshold just changed, we raced with the signal. | ||
296 | + * - The thread pool forces a minimum number of readily available threads. | ||
297 | + */ | ||
298 | + if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || | ||
299 | + pool->cur_threads > pool->max_threads || | ||
300 | + pool->cur_threads <= pool->min_threads)) { | ||
301 | + return true; | ||
302 | + } | ||
303 | + | ||
304 | + return false; | ||
305 | +} | ||
306 | + | ||
307 | static void *worker_thread(void *opaque) | ||
308 | { | ||
309 | ThreadPool *pool = opaque; | ||
310 | @@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque) | ||
311 | ret = qemu_sem_timedwait(&pool->sem, 10000); | ||
312 | qemu_mutex_lock(&pool->lock); | ||
313 | pool->idle_threads--; | ||
314 | - } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); | ||
315 | - if (ret == -1 || pool->stopping) { | ||
316 | + } while (back_to_sleep(pool, ret)); | ||
317 | + if (ret == -1 || pool->stopping || | ||
318 | + pool->cur_threads > pool->max_threads) { | ||
319 | break; | ||
320 | } | ||
321 | |||
322 | @@ -XXX,XX +XXX,XX @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) | ||
323 | thread_pool_submit_aio(pool, func, arg, NULL, NULL); | ||
324 | } | ||
325 | |||
326 | +void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
327 | +{ | ||
328 | + qemu_mutex_lock(&pool->lock); | ||
329 | + | ||
330 | + pool->min_threads = ctx->thread_pool_min; | ||
331 | + pool->max_threads = ctx->thread_pool_max; | ||
332 | + | ||
333 | + /* | ||
334 | + * We either have to: | ||
335 | + * - Increase the number available of threads until over the min_threads | ||
336 | + * threshold. | ||
337 | + * - Decrease the number of available threads until under the max_threads | ||
338 | + * threshold. | ||
339 | + * - Do nothing. The current number of threads fall in between the min and | ||
340 | + * max thresholds. We'll let the pool manage itself. | ||
341 | + */ | ||
342 | + for (int i = pool->cur_threads; i < pool->min_threads; i++) { | ||
343 | + spawn_thread(pool); | ||
344 | + } | ||
345 | + | ||
346 | + for (int i = pool->cur_threads; i > pool->max_threads; i--) { | ||
347 | + qemu_sem_post(&pool->sem); | ||
348 | + } | ||
349 | + | ||
350 | + qemu_mutex_unlock(&pool->lock); | ||
351 | +} | ||
352 | + | ||
353 | static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
354 | { | ||
355 | if (!ctx) { | ||
356 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
357 | qemu_mutex_init(&pool->lock); | ||
358 | qemu_cond_init(&pool->worker_stopped); | ||
359 | qemu_sem_init(&pool->sem, 0); | ||
360 | - pool->max_threads = 64; | ||
361 | pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); | ||
362 | |||
363 | QLIST_INIT(&pool->head); | ||
364 | QTAILQ_INIT(&pool->request_list); | ||
365 | + | ||
366 | + thread_pool_update_params(pool, ctx); | ||
367 | } | ||
368 | |||
369 | ThreadPool *thread_pool_new(AioContext *ctx) | ||
62 | -- | 370 | -- |
63 | 2.14.3 | 371 | 2.35.1 |
64 | |||
65 | diff view generated by jsdifflib |