1 | The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769: | 1 | The following changes since commit 6c769690ac845fa62642a5f93b4e4bd906adab95: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000) | 3 | Merge remote-tracking branch 'remotes/vsementsov/tags/pull-simplebench-2021-05-04' into staging (2021-05-21 12:02:34 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268: | 9 | for you to fetch changes up to 0a6f0c76a030710780ce10d6347a70f098024d21: |
10 | 10 | ||
11 | vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000) | 11 | coroutine-sleep: introduce qemu_co_sleep (2021-05-21 18:22:33 +0100) |
12 | |||
13 | ---------------------------------------------------------------- | ||
14 | Pull request | ||
15 | |||
16 | (Resent due to an email preparation mistake.) | ||
12 | 17 | ||
13 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
14 | 19 | ||
15 | ---------------------------------------------------------------- | 20 | Paolo Bonzini (6): |
21 | coroutine-sleep: use a stack-allocated timer | ||
22 | coroutine-sleep: disallow NULL QemuCoSleepState** argument | ||
23 | coroutine-sleep: allow qemu_co_sleep_wake that wakes nothing | ||
24 | coroutine-sleep: move timer out of QemuCoSleepState | ||
25 | coroutine-sleep: replace QemuCoSleepState pointer with struct in the | ||
26 | API | ||
27 | coroutine-sleep: introduce qemu_co_sleep | ||
16 | 28 | ||
17 | Deepa Srinivasan (1): | 29 | Philippe Mathieu-Daudé (1): |
18 | block: Fix qemu crash when using scsi-block | 30 | bitops.h: Improve find_xxx_bit() documentation |
19 | 31 | ||
20 | Fam Zheng (1): | 32 | Zenghui Yu (1): |
21 | README: Fix typo 'git-publish' | 33 | multi-process: Initialize variables declared with g_auto* |
22 | 34 | ||
23 | Sergio Lopez (1): | 35 | include/qemu/bitops.h | 15 ++++++-- |
24 | virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is | 36 | include/qemu/coroutine.h | 27 ++++++++----- |
25 | present | 37 | block/block-copy.c | 10 ++--- |
26 | 38 | block/nbd.c | 14 +++---- | |
27 | Stefan Hajnoczi (4): | 39 | hw/remote/memory.c | 5 +-- |
28 | block: add aio_wait_bh_oneshot() | 40 | hw/remote/proxy.c | 3 +- |
29 | virtio-blk: fix race between .ioeventfd_stop() and vq handler | 41 | util/qemu-coroutine-sleep.c | 75 +++++++++++++++++++------------------ |
30 | virtio-scsi: fix race between .ioeventfd_stop() and vq handler | 42 | 7 files changed, 79 insertions(+), 70 deletions(-) |
31 | vl: introduce vm_shutdown() | ||
32 | |||
33 | include/block/aio-wait.h | 13 +++++++++++ | ||
34 | include/sysemu/iothread.h | 1 - | ||
35 | include/sysemu/sysemu.h | 1 + | ||
36 | block/block-backend.c | 51 ++++++++++++++++++++--------------------- | ||
37 | cpus.c | 16 ++++++++++--- | ||
38 | hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++-------- | ||
39 | hw/scsi/virtio-scsi-dataplane.c | 9 ++++---- | ||
40 | iothread.c | 31 ------------------------- | ||
41 | util/aio-wait.c | 31 +++++++++++++++++++++++++ | ||
42 | vl.c | 13 +++-------- | ||
43 | README | 2 +- | ||
44 | 11 files changed, 122 insertions(+), 85 deletions(-) | ||
45 | 43 | ||
46 | -- | 44 | -- |
47 | 2.14.3 | 45 | 2.31.1 |
48 | 46 | ||
49 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Zenghui Yu <yuzenghui@huawei.com> | ||
1 | 2 | ||
3 | Quote docs/devel/style.rst (section "Automatic memory deallocation"): | ||
4 | |||
5 | * Variables declared with g_auto* MUST always be initialized, | ||
6 | otherwise the cleanup function will use uninitialized stack memory | ||
7 | |||
8 | Initialize @name properly to get rid of the compilation error (using | ||
9 | gcc-7.3.0 on CentOS): | ||
10 | |||
11 | ../hw/remote/proxy.c: In function 'pci_proxy_dev_realize': | ||
12 | /usr/include/glib-2.0/glib/glib-autocleanups.h:28:3: error: 'name' may be used uninitialized in this function [-Werror=maybe-uninitialized] | ||
13 | g_free (*pp); | ||
14 | ^~~~~~~~~~~~ | ||
15 | ../hw/remote/proxy.c:350:30: note: 'name' was declared here | ||
16 | g_autofree char *name; | ||
17 | ^~~~ | ||
18 | |||
19 | Signed-off-by: Zenghui Yu <yuzenghui@huawei.com> | ||
20 | Reviewed-by: Jagannathan Raman <jag.raman@oracle.com> | ||
21 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
22 | Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com> | ||
23 | Message-id: 20210312112143.1369-1-yuzenghui@huawei.com | ||
24 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
25 | --- | ||
26 | hw/remote/memory.c | 5 ++--- | ||
27 | hw/remote/proxy.c | 3 +-- | ||
28 | 2 files changed, 3 insertions(+), 5 deletions(-) | ||
29 | |||
30 | diff --git a/hw/remote/memory.c b/hw/remote/memory.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/hw/remote/memory.c | ||
33 | +++ b/hw/remote/memory.c | ||
34 | @@ -XXX,XX +XXX,XX @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp) | ||
35 | |||
36 | remote_sysmem_reset(); | ||
37 | |||
38 | - for (region = 0; region < msg->num_fds; region++) { | ||
39 | - g_autofree char *name; | ||
40 | + for (region = 0; region < msg->num_fds; region++, suffix++) { | ||
41 | + g_autofree char *name = g_strdup_printf("remote-mem-%u", suffix); | ||
42 | subregion = g_new(MemoryRegion, 1); | ||
43 | - name = g_strdup_printf("remote-mem-%u", suffix++); | ||
44 | memory_region_init_ram_from_fd(subregion, NULL, | ||
45 | name, sysmem_info->sizes[region], | ||
46 | true, msg->fds[region], | ||
47 | diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/hw/remote/proxy.c | ||
50 | +++ b/hw/remote/proxy.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static void probe_pci_info(PCIDevice *dev, Error **errp) | ||
52 | PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY; | ||
53 | |||
54 | if (size) { | ||
55 | - g_autofree char *name; | ||
56 | + g_autofree char *name = g_strdup_printf("bar-region-%d", i); | ||
57 | pdev->region[i].dev = pdev; | ||
58 | pdev->region[i].present = true; | ||
59 | if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) { | ||
60 | pdev->region[i].memory = true; | ||
61 | } | ||
62 | - name = g_strdup_printf("bar-region-%d", i); | ||
63 | memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev), | ||
64 | &proxy_mr_ops, &pdev->region[i], | ||
65 | name, size); | ||
66 | -- | ||
67 | 2.31.1 | ||
68 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | ||
3 | the AioContext lock. By the time the vq handler is able to acquire the | ||
4 | AioContext lock the ioeventfd has already been removed and the handler | ||
5 | isn't supposed to run anymore! | ||
6 | 2 | ||
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 3 | Document the following functions return the bitmap size |
8 | from within the IOThread. This way no races with the vq handler are | 4 | if no matching bit is found: |
9 | possible. | ||
10 | 5 | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 6 | - find_first_bit |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 7 | - find_next_bit |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | - find_last_bit |
14 | Message-id: 20180307144205.20619-4-stefanha@redhat.com | 9 | - find_first_zero_bit |
10 | - find_next_zero_bit | ||
11 | |||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Message-id: 20210510200758.2623154-2-philmd@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 17 | --- |
17 | hw/scsi/virtio-scsi-dataplane.c | 9 +++++---- | 18 | include/qemu/bitops.h | 15 ++++++++++++--- |
18 | 1 file changed, 5 insertions(+), 4 deletions(-) | 19 | 1 file changed, 12 insertions(+), 3 deletions(-) |
19 | 20 | ||
20 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c | 21 | diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h |
21 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/scsi/virtio-scsi-dataplane.c | 23 | --- a/include/qemu/bitops.h |
23 | +++ b/hw/scsi/virtio-scsi-dataplane.c | 24 | +++ b/include/qemu/bitops.h |
24 | @@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, | 25 | @@ -XXX,XX +XXX,XX @@ static inline int test_bit(long nr, const unsigned long *addr) |
25 | return 0; | 26 | * @addr: The address to start the search at |
26 | } | 27 | * @size: The maximum size to search |
27 | 28 | * | |
28 | -/* assumes s->ctx held */ | 29 | - * Returns the bit number of the first set bit, or size. |
29 | -static void virtio_scsi_clear_aio(VirtIOSCSI *s) | 30 | + * Returns the bit number of the last set bit, |
30 | +/* Context: BH in IOThread */ | 31 | + * or @size if there is no set bit in the bitmap. |
31 | +static void virtio_scsi_dataplane_stop_bh(void *opaque) | 32 | */ |
32 | { | 33 | unsigned long find_last_bit(const unsigned long *addr, |
33 | + VirtIOSCSI *s = opaque; | 34 | unsigned long size); |
34 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); | 35 | @@ -XXX,XX +XXX,XX @@ unsigned long find_last_bit(const unsigned long *addr, |
35 | int i; | 36 | * @addr: The address to base the search on |
36 | 37 | * @offset: The bitnumber to start searching at | |
37 | @@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) | 38 | * @size: The bitmap size in bits |
38 | return 0; | 39 | + * |
39 | 40 | + * Returns the bit number of the next set bit, | |
40 | fail_vrings: | 41 | + * or @size if there are no further set bits in the bitmap. |
41 | - virtio_scsi_clear_aio(s); | 42 | */ |
42 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 43 | unsigned long find_next_bit(const unsigned long *addr, |
43 | aio_context_release(s->ctx); | 44 | unsigned long size, |
44 | for (i = 0; i < vs->conf.num_queues + 2; i++) { | 45 | @@ -XXX,XX +XXX,XX @@ unsigned long find_next_bit(const unsigned long *addr, |
45 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | 46 | * @addr: The address to base the search on |
46 | @@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) | 47 | * @offset: The bitnumber to start searching at |
47 | s->dataplane_stopping = true; | 48 | * @size: The bitmap size in bits |
48 | 49 | + * | |
49 | aio_context_acquire(s->ctx); | 50 | + * Returns the bit number of the next cleared bit, |
50 | - virtio_scsi_clear_aio(s); | 51 | + * or @size if there are no further clear bits in the bitmap. |
51 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | 52 | */ |
52 | aio_context_release(s->ctx); | 53 | |
53 | 54 | unsigned long find_next_zero_bit(const unsigned long *addr, | |
54 | blk_drain_all(); /* ensure there are no in-flight requests */ | 55 | @@ -XXX,XX +XXX,XX @@ unsigned long find_next_zero_bit(const unsigned long *addr, |
56 | * @addr: The address to start the search at | ||
57 | * @size: The maximum size to search | ||
58 | * | ||
59 | - * Returns the bit number of the first set bit. | ||
60 | + * Returns the bit number of the first set bit, | ||
61 | + * or @size if there is no set bit in the bitmap. | ||
62 | */ | ||
63 | static inline unsigned long find_first_bit(const unsigned long *addr, | ||
64 | unsigned long size) | ||
65 | @@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_bit(const unsigned long *addr, | ||
66 | * @addr: The address to start the search at | ||
67 | * @size: The maximum size to search | ||
68 | * | ||
69 | - * Returns the bit number of the first cleared bit. | ||
70 | + * Returns the bit number of the first cleared bit, | ||
71 | + * or @size if there is no clear bit in the bitmap. | ||
72 | */ | ||
73 | static inline unsigned long find_first_zero_bit(const unsigned long *addr, | ||
74 | unsigned long size) | ||
55 | -- | 75 | -- |
56 | 2.14.3 | 76 | 2.31.1 |
57 | 77 | ||
58 | diff view generated by jsdifflib |
1 | If the main loop thread invokes .ioeventfd_stop() just as the vq handler | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | function begins in the IOThread then the handler may lose the race for | ||
3 | the AioContext lock. By the time the vq handler is able to acquire the | ||
4 | AioContext lock the ioeventfd has already been removed and the handler | ||
5 | isn't supposed to run anymore! | ||
6 | 2 | ||
7 | Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal | 3 | The lifetime of the timer is well-known (it cannot outlive |
8 | from within the IOThread. This way no races with the vq handler are | 4 | qemu_co_sleep_ns_wakeable, because it's deleted by the time the |
9 | possible. | 5 | coroutine resumes), so it is not necessary to place it on the heap. |
10 | 6 | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 8 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
13 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 9 | Message-id: 20210517100548.28806-2-pbonzini@redhat.com |
14 | Message-id: 20180307144205.20619-3-stefanha@redhat.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 11 | --- |
17 | hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++------- | 12 | util/qemu-coroutine-sleep.c | 9 ++++----- |
18 | 1 file changed, 17 insertions(+), 7 deletions(-) | 13 | 1 file changed, 4 insertions(+), 5 deletions(-) |
19 | 14 | ||
20 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 15 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c |
21 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/block/dataplane/virtio-blk.c | 17 | --- a/util/qemu-coroutine-sleep.c |
23 | +++ b/hw/block/dataplane/virtio-blk.c | 18 | +++ b/util/qemu-coroutine-sleep.c |
24 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | 19 | @@ -XXX,XX +XXX,XX @@ static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns"; |
25 | return -ENOSYS; | 20 | |
21 | struct QemuCoSleepState { | ||
22 | Coroutine *co; | ||
23 | - QEMUTimer *ts; | ||
24 | + QEMUTimer ts; | ||
25 | QemuCoSleepState **user_state_pointer; | ||
26 | }; | ||
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state) | ||
29 | if (sleep_state->user_state_pointer) { | ||
30 | *sleep_state->user_state_pointer = NULL; | ||
31 | } | ||
32 | - timer_del(sleep_state->ts); | ||
33 | + timer_del(&sleep_state->ts); | ||
34 | aio_co_wake(sleep_state->co); | ||
26 | } | 35 | } |
27 | 36 | ||
28 | +/* Stop notifications for new requests from guest. | 37 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
29 | + * | 38 | AioContext *ctx = qemu_get_current_aio_context(); |
30 | + * Context: BH in IOThread | 39 | QemuCoSleepState state = { |
31 | + */ | 40 | .co = qemu_coroutine_self(), |
32 | +static void virtio_blk_data_plane_stop_bh(void *opaque) | 41 | - .ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &state), |
33 | +{ | 42 | .user_state_pointer = sleep_state, |
34 | + VirtIOBlockDataPlane *s = opaque; | 43 | }; |
35 | + unsigned i; | 44 | |
36 | + | 45 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
37 | + for (i = 0; i < s->conf->num_queues; i++) { | 46 | abort(); |
38 | + VirtQueue *vq = virtio_get_queue(s->vdev, i); | 47 | } |
39 | + | 48 | |
40 | + virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | 49 | + aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state); |
41 | + } | 50 | if (sleep_state) { |
42 | +} | 51 | *sleep_state = &state; |
43 | + | 52 | } |
44 | /* Context: QEMU global mutex held */ | 53 | - timer_mod(state.ts, qemu_clock_get_ns(type) + ns); |
45 | void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 54 | + timer_mod(&state.ts, qemu_clock_get_ns(type) + ns); |
46 | { | 55 | qemu_coroutine_yield(); |
47 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | 56 | if (sleep_state) { |
48 | trace_virtio_blk_data_plane_stop(s); | 57 | /* |
49 | 58 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | |
50 | aio_context_acquire(s->ctx); | 59 | */ |
51 | - | 60 | assert(*sleep_state == NULL); |
52 | - /* Stop notifications for new requests from guest */ | 61 | } |
53 | - for (i = 0; i < nvqs; i++) { | 62 | - timer_free(state.ts); |
54 | - VirtQueue *vq = virtio_get_queue(s->vdev, i); | 63 | } |
55 | - | ||
56 | - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); | ||
57 | - } | ||
58 | + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | ||
59 | |||
60 | /* Drain and switch bs back to the QEMU main loop */ | ||
61 | blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); | ||
62 | -- | 64 | -- |
63 | 2.14.3 | 65 | 2.31.1 |
64 | 66 | ||
65 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane: | 3 | Simplify the code by removing conditionals. qemu_co_sleep_ns |
4 | notify guest as a batch") deferred guest notification to a BH in order | 4 | can simply point the argument to an on-stack temporary. |
5 | batch notifications, with purpose of avoiding flooding the guest with | ||
6 | interruptions. | ||
7 | 5 | ||
8 | This optimization came with a cost. The average latency perceived in the | 6 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | guest is increased by a few microseconds, but also when multiple IO | 7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
10 | operations finish at the same time, the guest won't be notified until | 8 | Message-id: 20210517100548.28806-3-pbonzini@redhat.com |
11 | all completions from each operation has been run. On the contrary, | ||
12 | virtio-scsi issues the notification at the end of each completion. | ||
13 | |||
14 | On the other hand, nowadays we have the EVENT_IDX feature that allows a | ||
15 | better coordination between QEMU and the Guest OS to avoid sending | ||
16 | unnecessary interruptions. | ||
17 | |||
18 | With this change, virtio-blk/dataplane only batches notifications if the | ||
19 | EVENT_IDX feature is not present. | ||
20 | |||
21 | Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1): | ||
22 | - Test specs: | ||
23 | * fio-3.4 (ioengine=sync, iodepth=1, direct=1) | ||
24 | * qemu master | ||
25 | * virtio-blk with a dedicated iothread (default poll-max-ns) | ||
26 | * backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000 | ||
27 | * 8 vCPUs pinned to isolated physical cores | ||
28 | * Emulator and iothread also pinned to separate isolated cores | ||
29 | * variance between runs < 1% | ||
30 | |||
31 | - Not patched | ||
32 | * numjobs=1: lat_avg=327.32 irqs=29998 | ||
33 | * numjobs=4: lat_avg=337.89 irqs=29073 | ||
34 | * numjobs=8: lat_avg=342.98 irqs=28643 | ||
35 | |||
36 | - Patched: | ||
37 | * numjobs=1: lat_avg=323.92 irqs=30262 | ||
38 | * numjobs=4: lat_avg=332.65 irqs=29520 | ||
39 | * numjobs=8: lat_avg=335.54 irqs=29323 | ||
40 | |||
41 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
42 | Message-id: 20180307114459.26636-1-slp@redhat.com | ||
43 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
44 | --- | 10 | --- |
45 | hw/block/dataplane/virtio-blk.c | 15 +++++++++++++-- | 11 | include/qemu/coroutine.h | 5 +++-- |
46 | 1 file changed, 13 insertions(+), 2 deletions(-) | 12 | util/qemu-coroutine-sleep.c | 18 +++++------------- |
13 | 2 files changed, 8 insertions(+), 15 deletions(-) | ||
47 | 14 | ||
48 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 15 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h |
49 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/hw/block/dataplane/virtio-blk.c | 17 | --- a/include/qemu/coroutine.h |
51 | +++ b/hw/block/dataplane/virtio-blk.c | 18 | +++ b/include/qemu/coroutine.h |
52 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct QemuCoSleepState QemuCoSleepState; |
53 | VirtIODevice *vdev; | 20 | |
54 | QEMUBH *bh; /* bh for guest notification */ | 21 | /** |
55 | unsigned long *batch_notify_vqs; | 22 | * Yield the coroutine for a given duration. During this yield, @sleep_state |
56 | + bool batch_notifications; | 23 | - * (if not NULL) is set to an opaque pointer, which may be used for |
57 | 24 | + * is set to an opaque pointer, which may be used for | |
58 | /* Note that these EventNotifiers are assigned by value. This is | 25 | * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the |
59 | * fine as long as you do not call event_notifier_cleanup on them | 26 | * timer fires. Don't save the obtained value to other variables and don't call |
60 | @@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane { | 27 | * qemu_co_sleep_wake from another aio context. |
61 | /* Raise an interrupt to signal guest, if necessary */ | 28 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
62 | void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) | 29 | QemuCoSleepState **sleep_state); |
30 | static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) | ||
63 | { | 31 | { |
64 | - set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | 32 | - qemu_co_sleep_ns_wakeable(type, ns, NULL); |
65 | - qemu_bh_schedule(s->bh); | 33 | + QemuCoSleepState *unused = NULL; |
66 | + if (s->batch_notifications) { | 34 | + qemu_co_sleep_ns_wakeable(type, ns, &unused); |
67 | + set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); | ||
68 | + qemu_bh_schedule(s->bh); | ||
69 | + } else { | ||
70 | + virtio_notify_irqfd(s->vdev, vq); | ||
71 | + } | ||
72 | } | 35 | } |
73 | 36 | ||
74 | static void notify_guest_bh(void *opaque) | 37 | /** |
75 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | 38 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c |
76 | 39 | index XXXXXXX..XXXXXXX 100644 | |
77 | s->starting = true; | 40 | --- a/util/qemu-coroutine-sleep.c |
78 | 41 | +++ b/util/qemu-coroutine-sleep.c | |
79 | + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { | 42 | @@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state) |
80 | + s->batch_notifications = true; | 43 | qemu_co_sleep_ns__scheduled, NULL); |
81 | + } else { | 44 | |
82 | + s->batch_notifications = false; | 45 | assert(scheduled == qemu_co_sleep_ns__scheduled); |
83 | + } | 46 | - if (sleep_state->user_state_pointer) { |
47 | - *sleep_state->user_state_pointer = NULL; | ||
48 | - } | ||
49 | + *sleep_state->user_state_pointer = NULL; | ||
50 | timer_del(&sleep_state->ts); | ||
51 | aio_co_wake(sleep_state->co); | ||
52 | } | ||
53 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | ||
54 | } | ||
55 | |||
56 | aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state); | ||
57 | - if (sleep_state) { | ||
58 | - *sleep_state = &state; | ||
59 | - } | ||
60 | + *sleep_state = &state; | ||
61 | timer_mod(&state.ts, qemu_clock_get_ns(type) + ns); | ||
62 | qemu_coroutine_yield(); | ||
63 | - if (sleep_state) { | ||
64 | - /* | ||
65 | - * Note that *sleep_state is cleared during qemu_co_sleep_wake | ||
66 | - * before resuming this coroutine. | ||
67 | - */ | ||
68 | - assert(*sleep_state == NULL); | ||
69 | - } | ||
84 | + | 70 | + |
85 | /* Set up guest notifier (irq) */ | 71 | + /* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */ |
86 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); | 72 | + assert(*sleep_state == NULL); |
87 | if (r != 0) { | 73 | } |
88 | -- | 74 | -- |
89 | 2.14.3 | 75 | 2.31.1 |
90 | 76 | ||
91 | diff view generated by jsdifflib |
1 | From: Deepa Srinivasan <deepa.srinivasan@oracle.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Starting qemu with the following arguments causes qemu to segfault: | 3 | All callers of qemu_co_sleep_wake are checking whether they are passing |
4 | ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= | 4 | a NULL argument inside the pointer-to-pointer: do the check in |
5 | iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 | 5 | qemu_co_sleep_wake itself. |
6 | 6 | ||
7 | This patch fixes blk_aio_ioctl() so it does not pass stack addresses to | 7 | As a side effect, qemu_co_sleep_wake can be called more than once and |
8 | blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More | 8 | it will only wake the coroutine once; after the first time, the argument |
9 | details about the bug follow. | 9 | will be set to NULL via *sleep_state->user_state_pointer. However, this |
10 | would not be safe unless co_sleep_cb keeps using the QemuCoSleepState* | ||
11 | directly, so make it go through the pointer-to-pointer instead. | ||
10 | 12 | ||
11 | blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the | 13 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
12 | coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). | 14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
13 | 15 | Message-id: 20210517100548.28806-4-pbonzini@redhat.com | |
14 | When blk_aio_ioctl() is executed from within a coroutine context (e.g. | ||
15 | iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to | ||
16 | the current coroutine's wakeup queue. blk_aio_ioctl() then returns. | ||
17 | |||
18 | When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: | ||
19 | .... | ||
20 | BlkRwCo *rwco = &acb->rwco; | ||
21 | |||
22 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
23 | rwco->qiov->iov[0].iov_base); <--- qiov is | ||
24 | invalid here | ||
25 | ... | ||
26 | |||
27 | In the case when blk_aio_ioctl() is called from a non-coroutine context, | ||
28 | blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls | ||
29 | qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine | ||
30 | execution is complete, control returns to blk_aio_ioctl_entry() after the call | ||
31 | to blk_co_ioctl(). There is no invalid reference after this point, but the | ||
32 | function is still holding on to invalid pointers. | ||
33 | |||
34 | The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer | ||
35 | rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the | ||
36 | coroutine function casts it to QEMUIOVector or uses the void pointer directly. | ||
37 | |||
38 | Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> | ||
39 | Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
40 | Reviewed-by: Mark Kanda <mark.kanda@oracle.com> | ||
41 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
42 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
43 | --- | 17 | --- |
44 | block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- | 18 | block/block-copy.c | 4 +--- |
45 | 1 file changed, 25 insertions(+), 26 deletions(-) | 19 | block/nbd.c | 8 ++------ |
20 | util/qemu-coroutine-sleep.c | 21 ++++++++++++--------- | ||
21 | 3 files changed, 15 insertions(+), 18 deletions(-) | ||
46 | 22 | ||
47 | diff --git a/block/block-backend.c b/block/block-backend.c | 23 | diff --git a/block/block-copy.c b/block/block-copy.c |
48 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/block/block-backend.c | 25 | --- a/block/block-copy.c |
50 | +++ b/block/block-backend.c | 26 | +++ b/block/block-copy.c |
51 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, | 27 | @@ -XXX,XX +XXX,XX @@ out: |
52 | typedef struct BlkRwCo { | 28 | |
53 | BlockBackend *blk; | 29 | void block_copy_kick(BlockCopyCallState *call_state) |
54 | int64_t offset; | ||
55 | - QEMUIOVector *qiov; | ||
56 | + void *iobuf; | ||
57 | int ret; | ||
58 | BdrvRequestFlags flags; | ||
59 | } BlkRwCo; | ||
60 | @@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo { | ||
61 | static void blk_read_entry(void *opaque) | ||
62 | { | 30 | { |
63 | BlkRwCo *rwco = opaque; | 31 | - if (call_state->sleep_state) { |
64 | + QEMUIOVector *qiov = rwco->iobuf; | 32 | - qemu_co_sleep_wake(call_state->sleep_state); |
65 | 33 | - } | |
66 | - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, | 34 | + qemu_co_sleep_wake(call_state->sleep_state); |
67 | - rwco->qiov, rwco->flags); | ||
68 | + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, | ||
69 | + qiov, rwco->flags); | ||
70 | } | 35 | } |
71 | 36 | ||
72 | static void blk_write_entry(void *opaque) | 37 | /* |
38 | diff --git a/block/nbd.c b/block/nbd.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/block/nbd.c | ||
41 | +++ b/block/nbd.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs) | ||
43 | BDRVNBDState *s = (BDRVNBDState *)bs->opaque; | ||
44 | |||
45 | s->drained = true; | ||
46 | - if (s->connection_co_sleep_ns_state) { | ||
47 | - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
48 | - } | ||
49 | + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
50 | |||
51 | nbd_co_establish_connection_cancel(bs, false); | ||
52 | |||
53 | @@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs) | ||
54 | |||
55 | s->state = NBD_CLIENT_QUIT; | ||
56 | if (s->connection_co) { | ||
57 | - if (s->connection_co_sleep_ns_state) { | ||
58 | - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
59 | - } | ||
60 | + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
61 | nbd_co_establish_connection_cancel(bs, true); | ||
62 | } | ||
63 | if (qemu_in_coroutine()) { | ||
64 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/util/qemu-coroutine-sleep.c | ||
67 | +++ b/util/qemu-coroutine-sleep.c | ||
68 | @@ -XXX,XX +XXX,XX @@ struct QemuCoSleepState { | ||
69 | |||
70 | void qemu_co_sleep_wake(QemuCoSleepState *sleep_state) | ||
73 | { | 71 | { |
74 | BlkRwCo *rwco = opaque; | 72 | - /* Write of schedule protected by barrier write in aio_co_schedule */ |
75 | + QEMUIOVector *qiov = rwco->iobuf; | 73 | - const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled, |
76 | 74 | - qemu_co_sleep_ns__scheduled, NULL); | |
77 | - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, | 75 | + if (sleep_state) { |
78 | - rwco->qiov, rwco->flags); | 76 | + /* Write of schedule protected by barrier write in aio_co_schedule */ |
79 | + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, | 77 | + const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled, |
80 | + qiov, rwco->flags); | 78 | + qemu_co_sleep_ns__scheduled, NULL); |
79 | |||
80 | - assert(scheduled == qemu_co_sleep_ns__scheduled); | ||
81 | - *sleep_state->user_state_pointer = NULL; | ||
82 | - timer_del(&sleep_state->ts); | ||
83 | - aio_co_wake(sleep_state->co); | ||
84 | + assert(scheduled == qemu_co_sleep_ns__scheduled); | ||
85 | + *sleep_state->user_state_pointer = NULL; | ||
86 | + timer_del(&sleep_state->ts); | ||
87 | + aio_co_wake(sleep_state->co); | ||
88 | + } | ||
81 | } | 89 | } |
82 | 90 | ||
83 | static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | 91 | static void co_sleep_cb(void *opaque) |
84 | @@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | 92 | { |
85 | rwco = (BlkRwCo) { | 93 | - qemu_co_sleep_wake(opaque); |
86 | .blk = blk, | 94 | + QemuCoSleepState **sleep_state = opaque; |
87 | .offset = offset, | 95 | + qemu_co_sleep_wake(*sleep_state); |
88 | - .qiov = &qiov, | ||
89 | + .iobuf = &qiov, | ||
90 | .flags = flags, | ||
91 | .ret = NOT_DONE, | ||
92 | }; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque) | ||
94 | } | 96 | } |
95 | 97 | ||
96 | static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | 98 | void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
97 | - QEMUIOVector *qiov, CoroutineEntry co_entry, | 99 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
98 | + void *iobuf, CoroutineEntry co_entry, | 100 | abort(); |
99 | BdrvRequestFlags flags, | 101 | } |
100 | BlockCompletionFunc *cb, void *opaque) | 102 | |
101 | { | 103 | - aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state); |
102 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | 104 | + aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, sleep_state); |
103 | acb->rwco = (BlkRwCo) { | 105 | *sleep_state = &state; |
104 | .blk = blk, | 106 | timer_mod(&state.ts, qemu_clock_get_ns(type) + ns); |
105 | .offset = offset, | 107 | qemu_coroutine_yield(); |
106 | - .qiov = qiov, | ||
107 | + .iobuf = iobuf, | ||
108 | .flags = flags, | ||
109 | .ret = NOT_DONE, | ||
110 | }; | ||
111 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque) | ||
112 | { | ||
113 | BlkAioEmAIOCB *acb = opaque; | ||
114 | BlkRwCo *rwco = &acb->rwco; | ||
115 | + QEMUIOVector *qiov = rwco->iobuf; | ||
116 | |||
117 | - assert(rwco->qiov->size == acb->bytes); | ||
118 | + assert(qiov->size == acb->bytes); | ||
119 | rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, | ||
120 | - rwco->qiov, rwco->flags); | ||
121 | + qiov, rwco->flags); | ||
122 | blk_aio_complete(acb); | ||
123 | } | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque) | ||
126 | { | ||
127 | BlkAioEmAIOCB *acb = opaque; | ||
128 | BlkRwCo *rwco = &acb->rwco; | ||
129 | + QEMUIOVector *qiov = rwco->iobuf; | ||
130 | |||
131 | - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); | ||
132 | + assert(!qiov || qiov->size == acb->bytes); | ||
133 | rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, | ||
134 | - rwco->qiov, rwco->flags); | ||
135 | + qiov, rwco->flags); | ||
136 | blk_aio_complete(acb); | ||
137 | } | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
140 | static void blk_ioctl_entry(void *opaque) | ||
141 | { | ||
142 | BlkRwCo *rwco = opaque; | ||
143 | + QEMUIOVector *qiov = rwco->iobuf; | ||
144 | + | ||
145 | rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
146 | - rwco->qiov->iov[0].iov_base); | ||
147 | + qiov->iov[0].iov_base); | ||
148 | } | ||
149 | |||
150 | int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
151 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque) | ||
152 | BlkAioEmAIOCB *acb = opaque; | ||
153 | BlkRwCo *rwco = &acb->rwco; | ||
154 | |||
155 | - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, | ||
156 | - rwco->qiov->iov[0].iov_base); | ||
157 | + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); | ||
158 | + | ||
159 | blk_aio_complete(acb); | ||
160 | } | ||
161 | |||
162 | BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, | ||
163 | BlockCompletionFunc *cb, void *opaque) | ||
164 | { | ||
165 | - QEMUIOVector qiov; | ||
166 | - struct iovec iov; | ||
167 | - | ||
168 | - iov = (struct iovec) { | ||
169 | - .iov_base = buf, | ||
170 | - .iov_len = 0, | ||
171 | - }; | ||
172 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
173 | - | ||
174 | - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); | ||
175 | + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); | ||
176 | } | ||
177 | |||
178 | int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
179 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, | ||
180 | static void blk_pdiscard_entry(void *opaque) | ||
181 | { | ||
182 | BlkRwCo *rwco = opaque; | ||
183 | - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); | ||
184 | + QEMUIOVector *qiov = rwco->iobuf; | ||
185 | + | ||
186 | + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); | ||
187 | } | ||
188 | |||
189 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
190 | -- | 108 | -- |
191 | 2.14.3 | 109 | 2.31.1 |
192 | 110 | ||
193 | diff view generated by jsdifflib |
1 | Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa | ||
3 | ("iothread: Stop threads before main() quits") tried to work around the | ||
4 | fact that emulation was still active during termination by stopping | ||
5 | iothreads. They suffer from race conditions: | ||
6 | 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the | ||
7 | virtio_scsi_ctx_check() assertion failure because the BDS AioContext | ||
8 | has been modified by iothread_stop_all(). | ||
9 | 2. Guest vq kick racing with main loop termination leaves a readable | ||
10 | ioeventfd that is handled by the next aio_poll() when external | ||
11 | clients are enabled again, resulting in unwanted emulation activity. | ||
12 | 2 | ||
13 | This patch obsoletes those commits by fully disabling emulation activity | 3 | This simplification is enabled by the previous patch. Now aio_co_wake |
14 | when vcpus are stopped. | 4 | will only be called once, therefore we do not care about a spurious |
5 | firing of the timer after a qemu_co_sleep_wake. | ||
15 | 6 | ||
16 | Use the new vm_shutdown() function instead of pause_all_vcpus() so that | 7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
17 | vm change state handlers are invoked too. Virtio devices will now stop | 8 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
18 | their ioeventfds, preventing further emulation activity after vm_stop(). | 9 | Message-id: 20210517100548.28806-5-pbonzini@redhat.com |
19 | |||
20 | Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a | ||
21 | QMP STOP event that may affect existing clients. | ||
22 | |||
23 | It is no longer necessary to call replay_disable_events() directly since | ||
24 | vm_shutdown() does so already. | ||
25 | |||
26 | Drop iothread_stop_all() since it is no longer used. | ||
27 | |||
28 | Cc: Fam Zheng <famz@redhat.com> | ||
29 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
30 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
31 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
32 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
33 | Message-id: 20180307144205.20619-5-stefanha@redhat.com | ||
34 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
35 | --- | 11 | --- |
36 | include/sysemu/iothread.h | 1 - | 12 | util/qemu-coroutine-sleep.c | 8 ++++---- |
37 | include/sysemu/sysemu.h | 1 + | 13 | 1 file changed, 4 insertions(+), 4 deletions(-) |
38 | cpus.c | 16 +++++++++++++--- | ||
39 | iothread.c | 31 ------------------------------- | ||
40 | vl.c | 13 +++---------- | ||
41 | 5 files changed, 17 insertions(+), 45 deletions(-) | ||
42 | 14 | ||
43 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h | 15 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c |
44 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/include/sysemu/iothread.h | 17 | --- a/util/qemu-coroutine-sleep.c |
46 | +++ b/include/sysemu/iothread.h | 18 | +++ b/util/qemu-coroutine-sleep.c |
47 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 19 | @@ -XXX,XX +XXX,XX @@ static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns"; |
48 | char *iothread_get_id(IOThread *iothread); | 20 | |
49 | IOThread *iothread_by_id(const char *id); | 21 | struct QemuCoSleepState { |
50 | AioContext *iothread_get_aio_context(IOThread *iothread); | 22 | Coroutine *co; |
51 | -void iothread_stop_all(void); | 23 | - QEMUTimer ts; |
52 | GMainContext *iothread_get_g_main_context(IOThread *iothread); | 24 | QemuCoSleepState **user_state_pointer; |
53 | 25 | }; | |
54 | /* | 26 | |
55 | diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h | 27 | @@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state) |
56 | index XXXXXXX..XXXXXXX 100644 | 28 | |
57 | --- a/include/sysemu/sysemu.h | 29 | assert(scheduled == qemu_co_sleep_ns__scheduled); |
58 | +++ b/include/sysemu/sysemu.h | 30 | *sleep_state->user_state_pointer = NULL; |
59 | @@ -XXX,XX +XXX,XX @@ void vm_start(void); | 31 | - timer_del(&sleep_state->ts); |
60 | int vm_prepare_start(void); | 32 | aio_co_wake(sleep_state->co); |
61 | int vm_stop(RunState state); | ||
62 | int vm_stop_force_state(RunState state); | ||
63 | +int vm_shutdown(void); | ||
64 | |||
65 | typedef enum WakeupReason { | ||
66 | /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ | ||
67 | diff --git a/cpus.c b/cpus.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/cpus.c | ||
70 | +++ b/cpus.c | ||
71 | @@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void) | ||
72 | } | 33 | } |
73 | } | 34 | } |
74 | 35 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | |
75 | -static int do_vm_stop(RunState state) | 36 | QemuCoSleepState **sleep_state) |
76 | +static int do_vm_stop(RunState state, bool send_stop) | ||
77 | { | 37 | { |
78 | int ret = 0; | 38 | AioContext *ctx = qemu_get_current_aio_context(); |
79 | 39 | + QEMUTimer ts; | |
80 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | 40 | QemuCoSleepState state = { |
81 | pause_all_vcpus(); | 41 | .co = qemu_coroutine_self(), |
82 | runstate_set(state); | 42 | .user_state_pointer = sleep_state, |
83 | vm_state_notify(0, state); | 43 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, |
84 | - qapi_event_send_stop(&error_abort); | 44 | abort(); |
85 | + if (send_stop) { | ||
86 | + qapi_event_send_stop(&error_abort); | ||
87 | + } | ||
88 | } | 45 | } |
89 | 46 | ||
90 | bdrv_drain_all(); | 47 | - aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, sleep_state); |
91 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state) | 48 | + aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, sleep_state); |
92 | return ret; | 49 | *sleep_state = &state; |
93 | } | 50 | - timer_mod(&state.ts, qemu_clock_get_ns(type) + ns); |
94 | 51 | + timer_mod(&ts, qemu_clock_get_ns(type) + ns); | |
95 | +/* Special vm_stop() variant for terminating the process. Historically clients | 52 | qemu_coroutine_yield(); |
96 | + * did not expect a QMP STOP event and so we need to retain compatibility. | 53 | + timer_del(&ts); |
97 | + */ | 54 | |
98 | +int vm_shutdown(void) | 55 | /* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */ |
99 | +{ | 56 | assert(*sleep_state == NULL); |
100 | + return do_vm_stop(RUN_STATE_SHUTDOWN, false); | ||
101 | +} | ||
102 | + | ||
103 | static bool cpu_can_run(CPUState *cpu) | ||
104 | { | ||
105 | if (cpu->stop) { | ||
106 | @@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state) | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | - return do_vm_stop(state); | ||
111 | + return do_vm_stop(state, true); | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | diff --git a/iothread.c b/iothread.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/iothread.c | ||
118 | +++ b/iothread.c | ||
119 | @@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread) | ||
120 | qemu_thread_join(&iothread->thread); | ||
121 | } | ||
122 | |||
123 | -static int iothread_stop_iter(Object *object, void *opaque) | ||
124 | -{ | ||
125 | - IOThread *iothread; | ||
126 | - | ||
127 | - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); | ||
128 | - if (!iothread) { | ||
129 | - return 0; | ||
130 | - } | ||
131 | - iothread_stop(iothread); | ||
132 | - return 0; | ||
133 | -} | ||
134 | - | ||
135 | static void iothread_instance_init(Object *obj) | ||
136 | { | ||
137 | IOThread *iothread = IOTHREAD(obj); | ||
138 | @@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp) | ||
139 | return head; | ||
140 | } | ||
141 | |||
142 | -void iothread_stop_all(void) | ||
143 | -{ | ||
144 | - Object *container = object_get_objects_root(); | ||
145 | - BlockDriverState *bs; | ||
146 | - BdrvNextIterator it; | ||
147 | - | ||
148 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
149 | - AioContext *ctx = bdrv_get_aio_context(bs); | ||
150 | - if (ctx == qemu_get_aio_context()) { | ||
151 | - continue; | ||
152 | - } | ||
153 | - aio_context_acquire(ctx); | ||
154 | - bdrv_set_aio_context(bs, qemu_get_aio_context()); | ||
155 | - aio_context_release(ctx); | ||
156 | - } | ||
157 | - | ||
158 | - object_child_foreach(container, iothread_stop_iter, NULL); | ||
159 | -} | ||
160 | - | ||
161 | static gpointer iothread_g_main_context_init(gpointer opaque) | ||
162 | { | ||
163 | AioContext *ctx; | ||
164 | diff --git a/vl.c b/vl.c | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/vl.c | ||
167 | +++ b/vl.c | ||
168 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
169 | os_setup_post(); | ||
170 | |||
171 | main_loop(); | ||
172 | - replay_disable_events(); | ||
173 | |||
174 | - /* The ordering of the following is delicate. Stop vcpus to prevent new | ||
175 | - * I/O requests being queued by the guest. Then stop IOThreads (this | ||
176 | - * includes a drain operation and completes all request processing). At | ||
177 | - * this point emulated devices are still associated with their IOThreads | ||
178 | - * (if any) but no longer have any work to do. Only then can we close | ||
179 | - * block devices safely because we know there is no more I/O coming. | ||
180 | - */ | ||
181 | - pause_all_vcpus(); | ||
182 | - iothread_stop_all(); | ||
183 | + /* No more vcpu or device emulation activity beyond this point */ | ||
184 | + vm_shutdown(); | ||
185 | + | ||
186 | bdrv_close_all(); | ||
187 | |||
188 | res_free(); | ||
189 | -- | 57 | -- |
190 | 2.14.3 | 58 | 2.31.1 |
191 | 59 | ||
192 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Reported-by: Alberto Garcia <berto@igalia.com> | 3 | Right now, users of qemu_co_sleep_ns_wakeable are simply passing |
4 | Signed-off-by: Fam Zheng <famz@redhat.com> | 4 | a pointer to QemuCoSleepState by reference to the function. But |
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 5 | QemuCoSleepState really is just a Coroutine*; making the |
6 | Message-id: 20180306024328.19195-1-famz@redhat.com | 6 | content of the struct public is just as efficient and lets us |
7 | skip the user_state_pointer indirection. | ||
8 | |||
9 | Since the usage is changed, take the occasion to rename the | ||
10 | struct to QemuCoSleep. | ||
11 | |||
12 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
14 | Message-id: 20210517100548.28806-6-pbonzini@redhat.com | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | --- | 16 | --- |
9 | README | 2 +- | 17 | include/qemu/coroutine.h | 23 +++++++++++---------- |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 18 | block/block-copy.c | 8 ++++---- |
11 | 19 | block/nbd.c | 10 ++++----- | |
12 | diff --git a/README b/README | 20 | util/qemu-coroutine-sleep.c | 41 ++++++++++++++++--------------------- |
13 | index XXXXXXX..XXXXXXX 100644 | 21 | 4 files changed, 39 insertions(+), 43 deletions(-) |
14 | --- a/README | 22 | |
15 | +++ b/README | 23 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h |
16 | @@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control. | 24 | index XXXXXXX..XXXXXXX 100644 |
17 | git clone git://git.qemu.org/qemu-web.git | 25 | --- a/include/qemu/coroutine.h |
18 | https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ | 26 | +++ b/include/qemu/coroutine.h |
19 | 27 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_wrlock(CoRwlock *lock); | |
20 | -A 'git-profile' utility was created to make above process less | 28 | */ |
21 | +A 'git-publish' utility was created to make above process less | 29 | void qemu_co_rwlock_unlock(CoRwlock *lock); |
22 | cumbersome, and is highly recommended for making regular contributions, | 30 | |
23 | or even just for sending consecutive patch series revisions. It also | 31 | -typedef struct QemuCoSleepState QemuCoSleepState; |
24 | requires a working 'git send-email' setup, and by default doesn't | 32 | +typedef struct QemuCoSleep { |
33 | + Coroutine *to_wake; | ||
34 | +} QemuCoSleep; | ||
35 | |||
36 | /** | ||
37 | - * Yield the coroutine for a given duration. During this yield, @sleep_state | ||
38 | - * is set to an opaque pointer, which may be used for | ||
39 | - * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the | ||
40 | - * timer fires. Don't save the obtained value to other variables and don't call | ||
41 | - * qemu_co_sleep_wake from another aio context. | ||
42 | + * Yield the coroutine for a given duration. Initializes @w so that, | ||
43 | + * during this yield, it can be passed to qemu_co_sleep_wake() to | ||
44 | + * terminate the sleep. | ||
45 | */ | ||
46 | -void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | ||
47 | - QemuCoSleepState **sleep_state); | ||
48 | +void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, | ||
49 | + QEMUClockType type, int64_t ns); | ||
50 | + | ||
51 | static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) | ||
52 | { | ||
53 | - QemuCoSleepState *unused = NULL; | ||
54 | - qemu_co_sleep_ns_wakeable(type, ns, &unused); | ||
55 | + QemuCoSleep w = { 0 }; | ||
56 | + qemu_co_sleep_ns_wakeable(&w, type, ns); | ||
57 | } | ||
58 | |||
59 | /** | ||
60 | @@ -XXX,XX +XXX,XX @@ static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) | ||
61 | * qemu_co_sleep_ns() and should be checked to be non-NULL before calling | ||
62 | * qemu_co_sleep_wake(). | ||
63 | */ | ||
64 | -void qemu_co_sleep_wake(QemuCoSleepState *sleep_state); | ||
65 | +void qemu_co_sleep_wake(QemuCoSleep *w); | ||
66 | |||
67 | /** | ||
68 | * Yield until a file descriptor becomes readable | ||
69 | diff --git a/block/block-copy.c b/block/block-copy.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/block/block-copy.c | ||
72 | +++ b/block/block-copy.c | ||
73 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyCallState { | ||
74 | /* State */ | ||
75 | int ret; | ||
76 | bool finished; | ||
77 | - QemuCoSleepState *sleep_state; | ||
78 | + QemuCoSleep sleep; | ||
79 | bool cancelled; | ||
80 | |||
81 | /* OUT parameters */ | ||
82 | @@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) | ||
83 | if (ns > 0) { | ||
84 | block_copy_task_end(task, -EAGAIN); | ||
85 | g_free(task); | ||
86 | - qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns, | ||
87 | - &call_state->sleep_state); | ||
88 | + qemu_co_sleep_ns_wakeable(&call_state->sleep, | ||
89 | + QEMU_CLOCK_REALTIME, ns); | ||
90 | continue; | ||
91 | } | ||
92 | } | ||
93 | @@ -XXX,XX +XXX,XX @@ out: | ||
94 | |||
95 | void block_copy_kick(BlockCopyCallState *call_state) | ||
96 | { | ||
97 | - qemu_co_sleep_wake(call_state->sleep_state); | ||
98 | + qemu_co_sleep_wake(&call_state->sleep); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | diff --git a/block/nbd.c b/block/nbd.c | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/block/nbd.c | ||
105 | +++ b/block/nbd.c | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVNBDState { | ||
107 | CoQueue free_sema; | ||
108 | Coroutine *connection_co; | ||
109 | Coroutine *teardown_co; | ||
110 | - QemuCoSleepState *connection_co_sleep_ns_state; | ||
111 | + QemuCoSleep reconnect_sleep; | ||
112 | bool drained; | ||
113 | bool wait_drained_end; | ||
114 | int in_flight; | ||
115 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs) | ||
116 | BDRVNBDState *s = (BDRVNBDState *)bs->opaque; | ||
117 | |||
118 | s->drained = true; | ||
119 | - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
120 | + qemu_co_sleep_wake(&s->reconnect_sleep); | ||
121 | |||
122 | nbd_co_establish_connection_cancel(bs, false); | ||
123 | |||
124 | @@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs) | ||
125 | |||
126 | s->state = NBD_CLIENT_QUIT; | ||
127 | if (s->connection_co) { | ||
128 | - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); | ||
129 | + qemu_co_sleep_wake(&s->reconnect_sleep); | ||
130 | nbd_co_establish_connection_cancel(bs, true); | ||
131 | } | ||
132 | if (qemu_in_coroutine()) { | ||
133 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s) | ||
134 | } | ||
135 | bdrv_inc_in_flight(s->bs); | ||
136 | } else { | ||
137 | - qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout, | ||
138 | - &s->connection_co_sleep_ns_state); | ||
139 | + qemu_co_sleep_ns_wakeable(&s->reconnect_sleep, | ||
140 | + QEMU_CLOCK_REALTIME, timeout); | ||
141 | if (s->drained) { | ||
142 | continue; | ||
143 | } | ||
144 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c | ||
145 | index XXXXXXX..XXXXXXX 100644 | ||
146 | --- a/util/qemu-coroutine-sleep.c | ||
147 | +++ b/util/qemu-coroutine-sleep.c | ||
148 | @@ -XXX,XX +XXX,XX @@ | ||
149 | |||
150 | static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns"; | ||
151 | |||
152 | -struct QemuCoSleepState { | ||
153 | +void qemu_co_sleep_wake(QemuCoSleep *w) | ||
154 | +{ | ||
155 | Coroutine *co; | ||
156 | - QemuCoSleepState **user_state_pointer; | ||
157 | -}; | ||
158 | |||
159 | -void qemu_co_sleep_wake(QemuCoSleepState *sleep_state) | ||
160 | -{ | ||
161 | - if (sleep_state) { | ||
162 | + co = w->to_wake; | ||
163 | + w->to_wake = NULL; | ||
164 | + if (co) { | ||
165 | /* Write of schedule protected by barrier write in aio_co_schedule */ | ||
166 | - const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled, | ||
167 | + const char *scheduled = qatomic_cmpxchg(&co->scheduled, | ||
168 | qemu_co_sleep_ns__scheduled, NULL); | ||
169 | |||
170 | assert(scheduled == qemu_co_sleep_ns__scheduled); | ||
171 | - *sleep_state->user_state_pointer = NULL; | ||
172 | - aio_co_wake(sleep_state->co); | ||
173 | + aio_co_wake(co); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static void co_sleep_cb(void *opaque) | ||
178 | { | ||
179 | - QemuCoSleepState **sleep_state = opaque; | ||
180 | - qemu_co_sleep_wake(*sleep_state); | ||
181 | + QemuCoSleep *w = opaque; | ||
182 | + qemu_co_sleep_wake(w); | ||
183 | } | ||
184 | |||
185 | -void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | ||
186 | - QemuCoSleepState **sleep_state) | ||
187 | +void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, | ||
188 | + QEMUClockType type, int64_t ns) | ||
189 | { | ||
190 | + Coroutine *co = qemu_coroutine_self(); | ||
191 | AioContext *ctx = qemu_get_current_aio_context(); | ||
192 | QEMUTimer ts; | ||
193 | - QemuCoSleepState state = { | ||
194 | - .co = qemu_coroutine_self(), | ||
195 | - .user_state_pointer = sleep_state, | ||
196 | - }; | ||
197 | |||
198 | - const char *scheduled = qatomic_cmpxchg(&state.co->scheduled, NULL, | ||
199 | - qemu_co_sleep_ns__scheduled); | ||
200 | + const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL, | ||
201 | + qemu_co_sleep_ns__scheduled); | ||
202 | if (scheduled) { | ||
203 | fprintf(stderr, | ||
204 | "%s: Co-routine was already scheduled in '%s'\n", | ||
205 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, | ||
206 | abort(); | ||
207 | } | ||
208 | |||
209 | - aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, sleep_state); | ||
210 | - *sleep_state = &state; | ||
211 | + w->to_wake = co; | ||
212 | + aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w), | ||
213 | timer_mod(&ts, qemu_clock_get_ns(type) + ns); | ||
214 | qemu_coroutine_yield(); | ||
215 | timer_del(&ts); | ||
216 | |||
217 | - /* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */ | ||
218 | - assert(*sleep_state == NULL); | ||
219 | + /* w->to_wake is cleared before resuming this coroutine. */ | ||
220 | + assert(w->to_wake == NULL); | ||
221 | } | ||
25 | -- | 222 | -- |
26 | 2.14.3 | 223 | 2.31.1 |
27 | 224 | ||
28 | diff view generated by jsdifflib |
1 | Sometimes it's necessary for the main loop thread to run a BH in an | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | IOThread and wait for its completion. This primitive is useful during | ||
3 | startup/shutdown to synchronize and avoid race conditions. | ||
4 | 2 | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 3 | Allow using QemuCoSleep to sleep forever until woken by qemu_co_sleep_wake. |
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | 4 | This makes the logic of qemu_co_sleep_ns_wakeable easy to understand. |
7 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | 5 | |
8 | Message-id: 20180307144205.20619-2-stefanha@redhat.com | 6 | In the future we will introduce an API that can work even if the |
7 | sleep and wake happen from different threads. For now, initializing | ||
8 | w->to_wake after timer_mod is fine because the timer can only fire in | ||
9 | the same AioContext. | ||
10 | |||
11 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
13 | Message-id: 20210517100548.28806-7-pbonzini@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | --- | 15 | --- |
11 | include/block/aio-wait.h | 13 +++++++++++++ | 16 | include/qemu/coroutine.h | 5 +++++ |
12 | util/aio-wait.c | 31 +++++++++++++++++++++++++++++++ | 17 | util/qemu-coroutine-sleep.c | 26 +++++++++++++++++++------- |
13 | 2 files changed, 44 insertions(+) | 18 | 2 files changed, 24 insertions(+), 7 deletions(-) |
14 | 19 | ||
15 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | 20 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/block/aio-wait.h | 22 | --- a/include/qemu/coroutine.h |
18 | +++ b/include/block/aio-wait.h | 23 | +++ b/include/qemu/coroutine.h |
19 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 24 | @@ -XXX,XX +XXX,XX @@ typedef struct QemuCoSleep { |
20 | */ | 25 | void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, |
21 | void aio_wait_kick(AioWait *wait); | 26 | QEMUClockType type, int64_t ns); |
22 | 27 | ||
23 | +/** | 28 | +/** |
24 | + * aio_wait_bh_oneshot: | 29 | + * Yield the coroutine until the next call to qemu_co_sleep_wake. |
25 | + * @ctx: the aio context | ||
26 | + * @cb: the BH callback function | ||
27 | + * @opaque: user data for the BH callback function | ||
28 | + * | ||
29 | + * Run a BH in @ctx and wait for it to complete. | ||
30 | + * | ||
31 | + * Must be called from the main loop thread with @ctx acquired exactly once. | ||
32 | + * Note that main loop event processing may occur. | ||
33 | + */ | 30 | + */ |
34 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | 31 | +void coroutine_fn qemu_co_sleep(QemuCoSleep *w); |
35 | + | 32 | + |
36 | #endif /* QEMU_AIO_WAIT */ | 33 | static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) |
37 | diff --git a/util/aio-wait.c b/util/aio-wait.c | 34 | { |
35 | QemuCoSleep w = { 0 }; | ||
36 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/util/aio-wait.c | 38 | --- a/util/qemu-coroutine-sleep.c |
40 | +++ b/util/aio-wait.c | 39 | +++ b/util/qemu-coroutine-sleep.c |
41 | @@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait) | 40 | @@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque) |
42 | aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); | 41 | qemu_co_sleep_wake(w); |
42 | } | ||
43 | |||
44 | -void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, | ||
45 | - QEMUClockType type, int64_t ns) | ||
46 | +void coroutine_fn qemu_co_sleep(QemuCoSleep *w) | ||
47 | { | ||
48 | Coroutine *co = qemu_coroutine_self(); | ||
49 | - AioContext *ctx = qemu_get_current_aio_context(); | ||
50 | - QEMUTimer ts; | ||
51 | |||
52 | const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL, | ||
53 | qemu_co_sleep_ns__scheduled); | ||
54 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, | ||
43 | } | 55 | } |
56 | |||
57 | w->to_wake = co; | ||
58 | - aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w), | ||
59 | - timer_mod(&ts, qemu_clock_get_ns(type) + ns); | ||
60 | qemu_coroutine_yield(); | ||
61 | - timer_del(&ts); | ||
62 | |||
63 | /* w->to_wake is cleared before resuming this coroutine. */ | ||
64 | assert(w->to_wake == NULL); | ||
44 | } | 65 | } |
45 | + | 66 | + |
46 | +typedef struct { | 67 | +void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, |
47 | + AioWait wait; | 68 | + QEMUClockType type, int64_t ns) |
48 | + bool done; | 69 | +{ |
49 | + QEMUBHFunc *cb; | 70 | + AioContext *ctx = qemu_get_current_aio_context(); |
50 | + void *opaque; | 71 | + QEMUTimer ts; |
51 | +} AioWaitBHData; | ||
52 | + | 72 | + |
53 | +/* Context: BH in IOThread */ | 73 | + aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w); |
54 | +static void aio_wait_bh(void *opaque) | 74 | + timer_mod(&ts, qemu_clock_get_ns(type) + ns); |
55 | +{ | ||
56 | + AioWaitBHData *data = opaque; | ||
57 | + | 75 | + |
58 | + data->cb(data->opaque); | 76 | + /* |
59 | + | 77 | + * The timer will fire in the current AiOContext, so the callback |
60 | + data->done = true; | 78 | + * must happen after qemu_co_sleep yields and there is no race |
61 | + aio_wait_kick(&data->wait); | 79 | + * between timer_mod and qemu_co_sleep. |
62 | +} | 80 | + */ |
63 | + | 81 | + qemu_co_sleep(w); |
64 | +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | 82 | + timer_del(&ts); |
65 | +{ | ||
66 | + AioWaitBHData data = { | ||
67 | + .cb = cb, | ||
68 | + .opaque = opaque, | ||
69 | + }; | ||
70 | + | ||
71 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
72 | + | ||
73 | + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); | ||
74 | + AIO_WAIT_WHILE(&data.wait, ctx, !data.done); | ||
75 | +} | 83 | +} |
76 | -- | 84 | -- |
77 | 2.14.3 | 85 | 2.31.1 |
78 | 86 | ||
79 | diff view generated by jsdifflib |