1
The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769:
1
The following changes since commit 6c769690ac845fa62642a5f93b4e4bd906adab95:
2
2
3
Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000)
3
Merge remote-tracking branch 'remotes/vsementsov/tags/pull-simplebench-2021-05-04' into staging (2021-05-21 12:02:34 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268:
9
for you to fetch changes up to 0a6f0c76a030710780ce10d6347a70f098024d21:
10
10
11
vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000)
11
coroutine-sleep: introduce qemu_co_sleep (2021-05-21 18:22:33 +0100)
12
13
----------------------------------------------------------------
14
Pull request
15
16
(Resent due to an email preparation mistake.)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
19
15
----------------------------------------------------------------
20
Paolo Bonzini (6):
21
coroutine-sleep: use a stack-allocated timer
22
coroutine-sleep: disallow NULL QemuCoSleepState** argument
23
coroutine-sleep: allow qemu_co_sleep_wake that wakes nothing
24
coroutine-sleep: move timer out of QemuCoSleepState
25
coroutine-sleep: replace QemuCoSleepState pointer with struct in the
26
API
27
coroutine-sleep: introduce qemu_co_sleep
16
28
17
Deepa Srinivasan (1):
29
Philippe Mathieu-Daudé (1):
18
block: Fix qemu crash when using scsi-block
30
bitops.h: Improve find_xxx_bit() documentation
19
31
20
Fam Zheng (1):
32
Zenghui Yu (1):
21
README: Fix typo 'git-publish'
33
multi-process: Initialize variables declared with g_auto*
22
34
23
Sergio Lopez (1):
35
include/qemu/bitops.h | 15 ++++++--
24
virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is
36
include/qemu/coroutine.h | 27 ++++++++-----
25
present
37
block/block-copy.c | 10 ++---
26
38
block/nbd.c | 14 +++----
27
Stefan Hajnoczi (4):
39
hw/remote/memory.c | 5 +--
28
block: add aio_wait_bh_oneshot()
40
hw/remote/proxy.c | 3 +-
29
virtio-blk: fix race between .ioeventfd_stop() and vq handler
41
util/qemu-coroutine-sleep.c | 75 +++++++++++++++++++------------------
30
virtio-scsi: fix race between .ioeventfd_stop() and vq handler
42
7 files changed, 79 insertions(+), 70 deletions(-)
31
vl: introduce vm_shutdown()
32
33
include/block/aio-wait.h | 13 +++++++++++
34
include/sysemu/iothread.h | 1 -
35
include/sysemu/sysemu.h | 1 +
36
block/block-backend.c | 51 ++++++++++++++++++++---------------------
37
cpus.c | 16 ++++++++++---
38
hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++--------
39
hw/scsi/virtio-scsi-dataplane.c | 9 ++++----
40
iothread.c | 31 -------------------------
41
util/aio-wait.c | 31 +++++++++++++++++++++++++
42
vl.c | 13 +++--------
43
README | 2 +-
44
11 files changed, 122 insertions(+), 85 deletions(-)
45
43
46
--
44
--
47
2.14.3
45
2.31.1
48
46
49
diff view generated by jsdifflib
New patch
1
From: Zenghui Yu <yuzenghui@huawei.com>
1
2
3
Quote docs/devel/style.rst (section "Automatic memory deallocation"):
4
5
* Variables declared with g_auto* MUST always be initialized,
6
otherwise the cleanup function will use uninitialized stack memory
7
8
Initialize @name properly to get rid of the compilation error (using
9
gcc-7.3.0 on CentOS):
10
11
../hw/remote/proxy.c: In function 'pci_proxy_dev_realize':
12
/usr/include/glib-2.0/glib/glib-autocleanups.h:28:3: error: 'name' may be used uninitialized in this function [-Werror=maybe-uninitialized]
13
g_free (*pp);
14
^~~~~~~~~~~~
15
../hw/remote/proxy.c:350:30: note: 'name' was declared here
16
g_autofree char *name;
17
^~~~
18
19
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
20
Reviewed-by: Jagannathan Raman <jag.raman@oracle.com>
21
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
23
Message-id: 20210312112143.1369-1-yuzenghui@huawei.com
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
---
26
hw/remote/memory.c | 5 ++---
27
hw/remote/proxy.c | 3 +--
28
2 files changed, 3 insertions(+), 5 deletions(-)
29
30
diff --git a/hw/remote/memory.c b/hw/remote/memory.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/remote/memory.c
33
+++ b/hw/remote/memory.c
34
@@ -XXX,XX +XXX,XX @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp)
35
36
remote_sysmem_reset();
37
38
- for (region = 0; region < msg->num_fds; region++) {
39
- g_autofree char *name;
40
+ for (region = 0; region < msg->num_fds; region++, suffix++) {
41
+ g_autofree char *name = g_strdup_printf("remote-mem-%u", suffix);
42
subregion = g_new(MemoryRegion, 1);
43
- name = g_strdup_printf("remote-mem-%u", suffix++);
44
memory_region_init_ram_from_fd(subregion, NULL,
45
name, sysmem_info->sizes[region],
46
true, msg->fds[region],
47
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/remote/proxy.c
50
+++ b/hw/remote/proxy.c
51
@@ -XXX,XX +XXX,XX @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
52
PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY;
53
54
if (size) {
55
- g_autofree char *name;
56
+ g_autofree char *name = g_strdup_printf("bar-region-%d", i);
57
pdev->region[i].dev = pdev;
58
pdev->region[i].present = true;
59
if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) {
60
pdev->region[i].memory = true;
61
}
62
- name = g_strdup_printf("bar-region-%d", i);
63
memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev),
64
&proxy_mr_ops, &pdev->region[i],
65
name, size);
66
--
67
2.31.1
68
diff view generated by jsdifflib
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
function begins in the IOThread then the handler may lose the race for
3
the AioContext lock. By the time the vq handler is able to acquire the
4
AioContext lock the ioeventfd has already been removed and the handler
5
isn't supposed to run anymore!
6
2
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
3
Document the following functions return the bitmap size
8
from within the IOThread. This way no races with the vq handler are
4
if no matching bit is found:
9
possible.
10
5
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
- find_first_bit
12
Reviewed-by: Fam Zheng <famz@redhat.com>
7
- find_next_bit
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
8
- find_last_bit
14
Message-id: 20180307144205.20619-4-stefanha@redhat.com
9
- find_first_zero_bit
10
- find_next_zero_bit
11
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
14
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-id: 20210510200758.2623154-2-philmd@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
---
17
hw/scsi/virtio-scsi-dataplane.c | 9 +++++----
18
include/qemu/bitops.h | 15 ++++++++++++---
18
1 file changed, 5 insertions(+), 4 deletions(-)
19
1 file changed, 12 insertions(+), 3 deletions(-)
19
20
20
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
21
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
21
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/scsi/virtio-scsi-dataplane.c
23
--- a/include/qemu/bitops.h
23
+++ b/hw/scsi/virtio-scsi-dataplane.c
24
+++ b/include/qemu/bitops.h
24
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
25
@@ -XXX,XX +XXX,XX @@ static inline int test_bit(long nr, const unsigned long *addr)
25
return 0;
26
* @addr: The address to start the search at
26
}
27
* @size: The maximum size to search
27
28
*
28
-/* assumes s->ctx held */
29
- * Returns the bit number of the first set bit, or size.
29
-static void virtio_scsi_clear_aio(VirtIOSCSI *s)
30
+ * Returns the bit number of the last set bit,
30
+/* Context: BH in IOThread */
31
+ * or @size if there is no set bit in the bitmap.
31
+static void virtio_scsi_dataplane_stop_bh(void *opaque)
32
*/
32
{
33
unsigned long find_last_bit(const unsigned long *addr,
33
+ VirtIOSCSI *s = opaque;
34
unsigned long size);
34
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
35
@@ -XXX,XX +XXX,XX @@ unsigned long find_last_bit(const unsigned long *addr,
35
int i;
36
* @addr: The address to base the search on
36
37
* @offset: The bitnumber to start searching at
37
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
38
* @size: The bitmap size in bits
38
return 0;
39
+ *
39
40
+ * Returns the bit number of the next set bit,
40
fail_vrings:
41
+ * or @size if there are no further set bits in the bitmap.
41
- virtio_scsi_clear_aio(s);
42
*/
42
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
43
unsigned long find_next_bit(const unsigned long *addr,
43
aio_context_release(s->ctx);
44
unsigned long size,
44
for (i = 0; i < vs->conf.num_queues + 2; i++) {
45
@@ -XXX,XX +XXX,XX @@ unsigned long find_next_bit(const unsigned long *addr,
45
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
46
* @addr: The address to base the search on
46
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
47
* @offset: The bitnumber to start searching at
47
s->dataplane_stopping = true;
48
* @size: The bitmap size in bits
48
49
+ *
49
aio_context_acquire(s->ctx);
50
+ * Returns the bit number of the next cleared bit,
50
- virtio_scsi_clear_aio(s);
51
+ * or @size if there are no further clear bits in the bitmap.
51
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
52
*/
52
aio_context_release(s->ctx);
53
53
54
unsigned long find_next_zero_bit(const unsigned long *addr,
54
blk_drain_all(); /* ensure there are no in-flight requests */
55
@@ -XXX,XX +XXX,XX @@ unsigned long find_next_zero_bit(const unsigned long *addr,
56
* @addr: The address to start the search at
57
* @size: The maximum size to search
58
*
59
- * Returns the bit number of the first set bit.
60
+ * Returns the bit number of the first set bit,
61
+ * or @size if there is no set bit in the bitmap.
62
*/
63
static inline unsigned long find_first_bit(const unsigned long *addr,
64
unsigned long size)
65
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_bit(const unsigned long *addr,
66
* @addr: The address to start the search at
67
* @size: The maximum size to search
68
*
69
- * Returns the bit number of the first cleared bit.
70
+ * Returns the bit number of the first cleared bit,
71
+ * or @size if there is no clear bit in the bitmap.
72
*/
73
static inline unsigned long find_first_zero_bit(const unsigned long *addr,
74
unsigned long size)
55
--
75
--
56
2.14.3
76
2.31.1
57
77
58
diff view generated by jsdifflib
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
function begins in the IOThread then the handler may lose the race for
3
the AioContext lock. By the time the vq handler is able to acquire the
4
AioContext lock the ioeventfd has already been removed and the handler
5
isn't supposed to run anymore!
6
2
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
3
The lifetime of the timer is well-known (it cannot outlive
8
from within the IOThread. This way no races with the vq handler are
4
qemu_co_sleep_ns_wakeable, because it's deleted by the time the
9
possible.
5
coroutine resumes), so it is not necessary to place it on the heap.
10
6
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
9
Message-id: 20210517100548.28806-2-pbonzini@redhat.com
14
Message-id: 20180307144205.20619-3-stefanha@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
11
---
17
hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++-------
12
util/qemu-coroutine-sleep.c | 9 ++++-----
18
1 file changed, 17 insertions(+), 7 deletions(-)
13
1 file changed, 4 insertions(+), 5 deletions(-)
19
14
20
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
15
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/block/dataplane/virtio-blk.c
17
--- a/util/qemu-coroutine-sleep.c
23
+++ b/hw/block/dataplane/virtio-blk.c
18
+++ b/util/qemu-coroutine-sleep.c
24
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
19
@@ -XXX,XX +XXX,XX @@ static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
25
return -ENOSYS;
20
21
struct QemuCoSleepState {
22
Coroutine *co;
23
- QEMUTimer *ts;
24
+ QEMUTimer ts;
25
QemuCoSleepState **user_state_pointer;
26
};
27
28
@@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
29
if (sleep_state->user_state_pointer) {
30
*sleep_state->user_state_pointer = NULL;
31
}
32
- timer_del(sleep_state->ts);
33
+ timer_del(&sleep_state->ts);
34
aio_co_wake(sleep_state->co);
26
}
35
}
27
36
28
+/* Stop notifications for new requests from guest.
37
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
29
+ *
38
AioContext *ctx = qemu_get_current_aio_context();
30
+ * Context: BH in IOThread
39
QemuCoSleepState state = {
31
+ */
40
.co = qemu_coroutine_self(),
32
+static void virtio_blk_data_plane_stop_bh(void *opaque)
41
- .ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &state),
33
+{
42
.user_state_pointer = sleep_state,
34
+ VirtIOBlockDataPlane *s = opaque;
43
};
35
+ unsigned i;
44
36
+
45
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
37
+ for (i = 0; i < s->conf->num_queues; i++) {
46
abort();
38
+ VirtQueue *vq = virtio_get_queue(s->vdev, i);
47
}
39
+
48
40
+ virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
49
+ aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state);
41
+ }
50
if (sleep_state) {
42
+}
51
*sleep_state = &state;
43
+
52
}
44
/* Context: QEMU global mutex held */
53
- timer_mod(state.ts, qemu_clock_get_ns(type) + ns);
45
void virtio_blk_data_plane_stop(VirtIODevice *vdev)
54
+ timer_mod(&state.ts, qemu_clock_get_ns(type) + ns);
46
{
55
qemu_coroutine_yield();
47
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
56
if (sleep_state) {
48
trace_virtio_blk_data_plane_stop(s);
57
/*
49
58
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
50
aio_context_acquire(s->ctx);
59
*/
51
-
60
assert(*sleep_state == NULL);
52
- /* Stop notifications for new requests from guest */
61
}
53
- for (i = 0; i < nvqs; i++) {
62
- timer_free(state.ts);
54
- VirtQueue *vq = virtio_get_queue(s->vdev, i);
63
}
55
-
56
- virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
57
- }
58
+ aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
59
60
/* Drain and switch bs back to the QEMU main loop */
61
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
62
--
64
--
63
2.14.3
65
2.31.1
64
66
65
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane:
3
Simplify the code by removing conditionals. qemu_co_sleep_ns
4
notify guest as a batch") deferred guest notification to a BH in order
4
can simply point the argument to an on-stack temporary.
5
batch notifications, with purpose of avoiding flooding the guest with
6
interruptions.
7
5
8
This optimization came with a cost. The average latency perceived in the
6
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
guest is increased by a few microseconds, but also when multiple IO
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
operations finish at the same time, the guest won't be notified until
8
Message-id: 20210517100548.28806-3-pbonzini@redhat.com
11
all completions from each operation has been run. On the contrary,
12
virtio-scsi issues the notification at the end of each completion.
13
14
On the other hand, nowadays we have the EVENT_IDX feature that allows a
15
better coordination between QEMU and the Guest OS to avoid sending
16
unnecessary interruptions.
17
18
With this change, virtio-blk/dataplane only batches notifications if the
19
EVENT_IDX feature is not present.
20
21
Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1):
22
- Test specs:
23
* fio-3.4 (ioengine=sync, iodepth=1, direct=1)
24
* qemu master
25
* virtio-blk with a dedicated iothread (default poll-max-ns)
26
* backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000
27
* 8 vCPUs pinned to isolated physical cores
28
* Emulator and iothread also pinned to separate isolated cores
29
* variance between runs < 1%
30
31
- Not patched
32
* numjobs=1: lat_avg=327.32 irqs=29998
33
* numjobs=4: lat_avg=337.89 irqs=29073
34
* numjobs=8: lat_avg=342.98 irqs=28643
35
36
- Patched:
37
* numjobs=1: lat_avg=323.92 irqs=30262
38
* numjobs=4: lat_avg=332.65 irqs=29520
39
* numjobs=8: lat_avg=335.54 irqs=29323
40
41
Signed-off-by: Sergio Lopez <slp@redhat.com>
42
Message-id: 20180307114459.26636-1-slp@redhat.com
43
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
44
---
10
---
45
hw/block/dataplane/virtio-blk.c | 15 +++++++++++++--
11
include/qemu/coroutine.h | 5 +++--
46
1 file changed, 13 insertions(+), 2 deletions(-)
12
util/qemu-coroutine-sleep.c | 18 +++++-------------
13
2 files changed, 8 insertions(+), 15 deletions(-)
47
14
48
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
15
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
49
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
50
--- a/hw/block/dataplane/virtio-blk.c
17
--- a/include/qemu/coroutine.h
51
+++ b/hw/block/dataplane/virtio-blk.c
18
+++ b/include/qemu/coroutine.h
52
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
19
@@ -XXX,XX +XXX,XX @@ typedef struct QemuCoSleepState QemuCoSleepState;
53
VirtIODevice *vdev;
20
54
QEMUBH *bh; /* bh for guest notification */
21
/**
55
unsigned long *batch_notify_vqs;
22
* Yield the coroutine for a given duration. During this yield, @sleep_state
56
+ bool batch_notifications;
23
- * (if not NULL) is set to an opaque pointer, which may be used for
57
24
+ * is set to an opaque pointer, which may be used for
58
/* Note that these EventNotifiers are assigned by value. This is
25
* qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the
59
* fine as long as you do not call event_notifier_cleanup on them
26
* timer fires. Don't save the obtained value to other variables and don't call
60
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
27
* qemu_co_sleep_wake from another aio context.
61
/* Raise an interrupt to signal guest, if necessary */
28
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
62
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
29
QemuCoSleepState **sleep_state);
30
static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
63
{
31
{
64
- set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
32
- qemu_co_sleep_ns_wakeable(type, ns, NULL);
65
- qemu_bh_schedule(s->bh);
33
+ QemuCoSleepState *unused = NULL;
66
+ if (s->batch_notifications) {
34
+ qemu_co_sleep_ns_wakeable(type, ns, &unused);
67
+ set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
68
+ qemu_bh_schedule(s->bh);
69
+ } else {
70
+ virtio_notify_irqfd(s->vdev, vq);
71
+ }
72
}
35
}
73
36
74
static void notify_guest_bh(void *opaque)
37
/**
75
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
38
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
76
39
index XXXXXXX..XXXXXXX 100644
77
s->starting = true;
40
--- a/util/qemu-coroutine-sleep.c
78
41
+++ b/util/qemu-coroutine-sleep.c
79
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
42
@@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
80
+ s->batch_notifications = true;
43
qemu_co_sleep_ns__scheduled, NULL);
81
+ } else {
44
82
+ s->batch_notifications = false;
45
assert(scheduled == qemu_co_sleep_ns__scheduled);
83
+ }
46
- if (sleep_state->user_state_pointer) {
47
- *sleep_state->user_state_pointer = NULL;
48
- }
49
+ *sleep_state->user_state_pointer = NULL;
50
timer_del(&sleep_state->ts);
51
aio_co_wake(sleep_state->co);
52
}
53
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
54
}
55
56
aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state);
57
- if (sleep_state) {
58
- *sleep_state = &state;
59
- }
60
+ *sleep_state = &state;
61
timer_mod(&state.ts, qemu_clock_get_ns(type) + ns);
62
qemu_coroutine_yield();
63
- if (sleep_state) {
64
- /*
65
- * Note that *sleep_state is cleared during qemu_co_sleep_wake
66
- * before resuming this coroutine.
67
- */
68
- assert(*sleep_state == NULL);
69
- }
84
+
70
+
85
/* Set up guest notifier (irq) */
71
+ /* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */
86
r = k->set_guest_notifiers(qbus->parent, nvqs, true);
72
+ assert(*sleep_state == NULL);
87
if (r != 0) {
73
}
88
--
74
--
89
2.14.3
75
2.31.1
90
76
91
diff view generated by jsdifflib
1
From: Deepa Srinivasan <deepa.srinivasan@oracle.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Starting qemu with the following arguments causes qemu to segfault:
3
All callers of qemu_co_sleep_wake are checking whether they are passing
4
... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
4
a NULL argument inside the pointer-to-pointer: do the check in
5
iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
5
qemu_co_sleep_wake itself.
6
6
7
This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
7
As a side effect, qemu_co_sleep_wake can be called more than once and
8
blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
8
it will only wake the coroutine once; after the first time, the argument
9
details about the bug follow.
9
will be set to NULL via *sleep_state->user_state_pointer. However, this
10
would not be safe unless co_sleep_cb keeps using the QemuCoSleepState*
11
directly, so make it go through the pointer-to-pointer instead.
10
12
11
blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
15
Message-id: 20210517100548.28806-4-pbonzini@redhat.com
14
When blk_aio_ioctl() is executed from within a coroutine context (e.g.
15
iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
16
the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
17
18
When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
19
....
20
BlkRwCo *rwco = &acb->rwco;
21
22
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
23
rwco->qiov->iov[0].iov_base); <--- qiov is
24
invalid here
25
...
26
27
In the case when blk_aio_ioctl() is called from a non-coroutine context,
28
blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
29
qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
30
execution is complete, control returns to blk_aio_ioctl_entry() after the call
31
to blk_co_ioctl(). There is no invalid reference after this point, but the
32
function is still holding on to invalid pointers.
33
34
The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
35
rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
36
coroutine function casts it to QEMUIOVector or uses the void pointer directly.
37
38
Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
39
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
40
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
41
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
42
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
43
---
17
---
44
block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
18
block/block-copy.c | 4 +---
45
1 file changed, 25 insertions(+), 26 deletions(-)
19
block/nbd.c | 8 ++------
20
util/qemu-coroutine-sleep.c | 21 ++++++++++++---------
21
3 files changed, 15 insertions(+), 18 deletions(-)
46
22
47
diff --git a/block/block-backend.c b/block/block-backend.c
23
diff --git a/block/block-copy.c b/block/block-copy.c
48
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
49
--- a/block/block-backend.c
25
--- a/block/block-copy.c
50
+++ b/block/block-backend.c
26
+++ b/block/block-copy.c
51
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
27
@@ -XXX,XX +XXX,XX @@ out:
52
typedef struct BlkRwCo {
28
53
BlockBackend *blk;
29
void block_copy_kick(BlockCopyCallState *call_state)
54
int64_t offset;
55
- QEMUIOVector *qiov;
56
+ void *iobuf;
57
int ret;
58
BdrvRequestFlags flags;
59
} BlkRwCo;
60
@@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo {
61
static void blk_read_entry(void *opaque)
62
{
30
{
63
BlkRwCo *rwco = opaque;
31
- if (call_state->sleep_state) {
64
+ QEMUIOVector *qiov = rwco->iobuf;
32
- qemu_co_sleep_wake(call_state->sleep_state);
65
33
- }
66
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
34
+ qemu_co_sleep_wake(call_state->sleep_state);
67
- rwco->qiov, rwco->flags);
68
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
69
+ qiov, rwco->flags);
70
}
35
}
71
36
72
static void blk_write_entry(void *opaque)
37
/*
38
diff --git a/block/nbd.c b/block/nbd.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/nbd.c
41
+++ b/block/nbd.c
42
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
43
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
44
45
s->drained = true;
46
- if (s->connection_co_sleep_ns_state) {
47
- qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
48
- }
49
+ qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
50
51
nbd_co_establish_connection_cancel(bs, false);
52
53
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
54
55
s->state = NBD_CLIENT_QUIT;
56
if (s->connection_co) {
57
- if (s->connection_co_sleep_ns_state) {
58
- qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
59
- }
60
+ qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
61
nbd_co_establish_connection_cancel(bs, true);
62
}
63
if (qemu_in_coroutine()) {
64
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/qemu-coroutine-sleep.c
67
+++ b/util/qemu-coroutine-sleep.c
68
@@ -XXX,XX +XXX,XX @@ struct QemuCoSleepState {
69
70
void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
73
{
71
{
74
BlkRwCo *rwco = opaque;
72
- /* Write of schedule protected by barrier write in aio_co_schedule */
75
+ QEMUIOVector *qiov = rwco->iobuf;
73
- const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
76
74
- qemu_co_sleep_ns__scheduled, NULL);
77
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
75
+ if (sleep_state) {
78
- rwco->qiov, rwco->flags);
76
+ /* Write of schedule protected by barrier write in aio_co_schedule */
79
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
77
+ const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
80
+ qiov, rwco->flags);
78
+ qemu_co_sleep_ns__scheduled, NULL);
79
80
- assert(scheduled == qemu_co_sleep_ns__scheduled);
81
- *sleep_state->user_state_pointer = NULL;
82
- timer_del(&sleep_state->ts);
83
- aio_co_wake(sleep_state->co);
84
+ assert(scheduled == qemu_co_sleep_ns__scheduled);
85
+ *sleep_state->user_state_pointer = NULL;
86
+ timer_del(&sleep_state->ts);
87
+ aio_co_wake(sleep_state->co);
88
+ }
81
}
89
}
82
90
83
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
91
static void co_sleep_cb(void *opaque)
84
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
92
{
85
rwco = (BlkRwCo) {
93
- qemu_co_sleep_wake(opaque);
86
.blk = blk,
94
+ QemuCoSleepState **sleep_state = opaque;
87
.offset = offset,
95
+ qemu_co_sleep_wake(*sleep_state);
88
- .qiov = &qiov,
89
+ .iobuf = &qiov,
90
.flags = flags,
91
.ret = NOT_DONE,
92
};
93
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque)
94
}
96
}
95
97
96
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
98
void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
97
- QEMUIOVector *qiov, CoroutineEntry co_entry,
99
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
98
+ void *iobuf, CoroutineEntry co_entry,
100
abort();
99
BdrvRequestFlags flags,
101
}
100
BlockCompletionFunc *cb, void *opaque)
102
101
{
103
- aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, &state);
102
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
104
+ aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, sleep_state);
103
acb->rwco = (BlkRwCo) {
105
*sleep_state = &state;
104
.blk = blk,
106
timer_mod(&state.ts, qemu_clock_get_ns(type) + ns);
105
.offset = offset,
107
qemu_coroutine_yield();
106
- .qiov = qiov,
107
+ .iobuf = iobuf,
108
.flags = flags,
109
.ret = NOT_DONE,
110
};
111
@@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque)
112
{
113
BlkAioEmAIOCB *acb = opaque;
114
BlkRwCo *rwco = &acb->rwco;
115
+ QEMUIOVector *qiov = rwco->iobuf;
116
117
- assert(rwco->qiov->size == acb->bytes);
118
+ assert(qiov->size == acb->bytes);
119
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
120
- rwco->qiov, rwco->flags);
121
+ qiov, rwco->flags);
122
blk_aio_complete(acb);
123
}
124
125
@@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque)
126
{
127
BlkAioEmAIOCB *acb = opaque;
128
BlkRwCo *rwco = &acb->rwco;
129
+ QEMUIOVector *qiov = rwco->iobuf;
130
131
- assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
132
+ assert(!qiov || qiov->size == acb->bytes);
133
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
134
- rwco->qiov, rwco->flags);
135
+ qiov, rwco->flags);
136
blk_aio_complete(acb);
137
}
138
139
@@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
140
static void blk_ioctl_entry(void *opaque)
141
{
142
BlkRwCo *rwco = opaque;
143
+ QEMUIOVector *qiov = rwco->iobuf;
144
+
145
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
146
- rwco->qiov->iov[0].iov_base);
147
+ qiov->iov[0].iov_base);
148
}
149
150
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
151
@@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque)
152
BlkAioEmAIOCB *acb = opaque;
153
BlkRwCo *rwco = &acb->rwco;
154
155
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
156
- rwco->qiov->iov[0].iov_base);
157
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
158
+
159
blk_aio_complete(acb);
160
}
161
162
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
163
BlockCompletionFunc *cb, void *opaque)
164
{
165
- QEMUIOVector qiov;
166
- struct iovec iov;
167
-
168
- iov = (struct iovec) {
169
- .iov_base = buf,
170
- .iov_len = 0,
171
- };
172
- qemu_iovec_init_external(&qiov, &iov, 1);
173
-
174
- return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
175
+ return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
176
}
177
178
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
179
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
180
static void blk_pdiscard_entry(void *opaque)
181
{
182
BlkRwCo *rwco = opaque;
183
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
184
+ QEMUIOVector *qiov = rwco->iobuf;
185
+
186
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
187
}
188
189
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
190
--
108
--
191
2.14.3
109
2.31.1
192
110
193
diff view generated by jsdifflib
1
Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa
3
("iothread: Stop threads before main() quits") tried to work around the
4
fact that emulation was still active during termination by stopping
5
iothreads. They suffer from race conditions:
6
1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the
7
virtio_scsi_ctx_check() assertion failure because the BDS AioContext
8
has been modified by iothread_stop_all().
9
2. Guest vq kick racing with main loop termination leaves a readable
10
ioeventfd that is handled by the next aio_poll() when external
11
clients are enabled again, resulting in unwanted emulation activity.
12
2
13
This patch obsoletes those commits by fully disabling emulation activity
3
This simplification is enabled by the previous patch. Now aio_co_wake
14
when vcpus are stopped.
4
will only be called once, therefore we do not care about a spurious
5
firing of the timer after a qemu_co_sleep_wake.
15
6
16
Use the new vm_shutdown() function instead of pause_all_vcpus() so that
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
17
vm change state handlers are invoked too. Virtio devices will now stop
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
18
their ioeventfds, preventing further emulation activity after vm_stop().
9
Message-id: 20210517100548.28806-5-pbonzini@redhat.com
19
20
Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a
21
QMP STOP event that may affect existing clients.
22
23
It is no longer necessary to call replay_disable_events() directly since
24
vm_shutdown() does so already.
25
26
Drop iothread_stop_all() since it is no longer used.
27
28
Cc: Fam Zheng <famz@redhat.com>
29
Cc: Kevin Wolf <kwolf@redhat.com>
30
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Fam Zheng <famz@redhat.com>
32
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
33
Message-id: 20180307144205.20619-5-stefanha@redhat.com
34
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
35
---
11
---
36
include/sysemu/iothread.h | 1 -
12
util/qemu-coroutine-sleep.c | 8 ++++----
37
include/sysemu/sysemu.h | 1 +
13
1 file changed, 4 insertions(+), 4 deletions(-)
38
cpus.c | 16 +++++++++++++---
39
iothread.c | 31 -------------------------------
40
vl.c | 13 +++----------
41
5 files changed, 17 insertions(+), 45 deletions(-)
42
14
43
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
15
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
44
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
45
--- a/include/sysemu/iothread.h
17
--- a/util/qemu-coroutine-sleep.c
46
+++ b/include/sysemu/iothread.h
18
+++ b/util/qemu-coroutine-sleep.c
47
@@ -XXX,XX +XXX,XX @@ typedef struct {
19
@@ -XXX,XX +XXX,XX @@ static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
48
char *iothread_get_id(IOThread *iothread);
20
49
IOThread *iothread_by_id(const char *id);
21
struct QemuCoSleepState {
50
AioContext *iothread_get_aio_context(IOThread *iothread);
22
Coroutine *co;
51
-void iothread_stop_all(void);
23
- QEMUTimer ts;
52
GMainContext *iothread_get_g_main_context(IOThread *iothread);
24
QemuCoSleepState **user_state_pointer;
53
25
};
54
/*
26
55
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
27
@@ -XXX,XX +XXX,XX @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
56
index XXXXXXX..XXXXXXX 100644
28
57
--- a/include/sysemu/sysemu.h
29
assert(scheduled == qemu_co_sleep_ns__scheduled);
58
+++ b/include/sysemu/sysemu.h
30
*sleep_state->user_state_pointer = NULL;
59
@@ -XXX,XX +XXX,XX @@ void vm_start(void);
31
- timer_del(&sleep_state->ts);
60
int vm_prepare_start(void);
32
aio_co_wake(sleep_state->co);
61
int vm_stop(RunState state);
62
int vm_stop_force_state(RunState state);
63
+int vm_shutdown(void);
64
65
typedef enum WakeupReason {
66
/* Always keep QEMU_WAKEUP_REASON_NONE = 0 */
67
diff --git a/cpus.c b/cpus.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/cpus.c
70
+++ b/cpus.c
71
@@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void)
72
}
33
}
73
}
34
}
74
35
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
75
-static int do_vm_stop(RunState state)
36
QemuCoSleepState **sleep_state)
76
+static int do_vm_stop(RunState state, bool send_stop)
77
{
37
{
78
int ret = 0;
38
AioContext *ctx = qemu_get_current_aio_context();
79
39
+ QEMUTimer ts;
80
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
40
QemuCoSleepState state = {
81
pause_all_vcpus();
41
.co = qemu_coroutine_self(),
82
runstate_set(state);
42
.user_state_pointer = sleep_state,
83
vm_state_notify(0, state);
43
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
84
- qapi_event_send_stop(&error_abort);
44
abort();
85
+ if (send_stop) {
86
+ qapi_event_send_stop(&error_abort);
87
+ }
88
}
45
}
89
46
90
bdrv_drain_all();
47
- aio_timer_init(ctx, &state.ts, type, SCALE_NS, co_sleep_cb, sleep_state);
91
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
48
+ aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, sleep_state);
92
return ret;
49
*sleep_state = &state;
93
}
50
- timer_mod(&state.ts, qemu_clock_get_ns(type) + ns);
94
51
+ timer_mod(&ts, qemu_clock_get_ns(type) + ns);
95
+/* Special vm_stop() variant for terminating the process. Historically clients
52
qemu_coroutine_yield();
96
+ * did not expect a QMP STOP event and so we need to retain compatibility.
53
+ timer_del(&ts);
97
+ */
54
98
+int vm_shutdown(void)
55
/* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */
99
+{
56
assert(*sleep_state == NULL);
100
+ return do_vm_stop(RUN_STATE_SHUTDOWN, false);
101
+}
102
+
103
static bool cpu_can_run(CPUState *cpu)
104
{
105
if (cpu->stop) {
106
@@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state)
107
return 0;
108
}
109
110
- return do_vm_stop(state);
111
+ return do_vm_stop(state, true);
112
}
113
114
/**
115
diff --git a/iothread.c b/iothread.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/iothread.c
118
+++ b/iothread.c
119
@@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread)
120
qemu_thread_join(&iothread->thread);
121
}
122
123
-static int iothread_stop_iter(Object *object, void *opaque)
124
-{
125
- IOThread *iothread;
126
-
127
- iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
128
- if (!iothread) {
129
- return 0;
130
- }
131
- iothread_stop(iothread);
132
- return 0;
133
-}
134
-
135
static void iothread_instance_init(Object *obj)
136
{
137
IOThread *iothread = IOTHREAD(obj);
138
@@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
139
return head;
140
}
141
142
-void iothread_stop_all(void)
143
-{
144
- Object *container = object_get_objects_root();
145
- BlockDriverState *bs;
146
- BdrvNextIterator it;
147
-
148
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
149
- AioContext *ctx = bdrv_get_aio_context(bs);
150
- if (ctx == qemu_get_aio_context()) {
151
- continue;
152
- }
153
- aio_context_acquire(ctx);
154
- bdrv_set_aio_context(bs, qemu_get_aio_context());
155
- aio_context_release(ctx);
156
- }
157
-
158
- object_child_foreach(container, iothread_stop_iter, NULL);
159
-}
160
-
161
static gpointer iothread_g_main_context_init(gpointer opaque)
162
{
163
AioContext *ctx;
164
diff --git a/vl.c b/vl.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/vl.c
167
+++ b/vl.c
168
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
169
os_setup_post();
170
171
main_loop();
172
- replay_disable_events();
173
174
- /* The ordering of the following is delicate. Stop vcpus to prevent new
175
- * I/O requests being queued by the guest. Then stop IOThreads (this
176
- * includes a drain operation and completes all request processing). At
177
- * this point emulated devices are still associated with their IOThreads
178
- * (if any) but no longer have any work to do. Only then can we close
179
- * block devices safely because we know there is no more I/O coming.
180
- */
181
- pause_all_vcpus();
182
- iothread_stop_all();
183
+ /* No more vcpu or device emulation activity beyond this point */
184
+ vm_shutdown();
185
+
186
bdrv_close_all();
187
188
res_free();
189
--
57
--
190
2.14.3
58
2.31.1
191
59
192
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Reported-by: Alberto Garcia <berto@igalia.com>
3
Right now, users of qemu_co_sleep_ns_wakeable are simply passing
4
Signed-off-by: Fam Zheng <famz@redhat.com>
4
a pointer to QemuCoSleepState by reference to the function. But
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
QemuCoSleepState really is just a Coroutine*; making the
6
Message-id: 20180306024328.19195-1-famz@redhat.com
6
content of the struct public is just as efficient and lets us
7
skip the user_state_pointer indirection.
8
9
Since the usage is changed, take the occasion to rename the
10
struct to QemuCoSleep.
11
12
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Message-id: 20210517100548.28806-6-pbonzini@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
16
---
9
README | 2 +-
17
include/qemu/coroutine.h | 23 +++++++++++----------
10
1 file changed, 1 insertion(+), 1 deletion(-)
18
block/block-copy.c | 8 ++++----
11
19
block/nbd.c | 10 ++++-----
12
diff --git a/README b/README
20
util/qemu-coroutine-sleep.c | 41 ++++++++++++++++---------------------
13
index XXXXXXX..XXXXXXX 100644
21
4 files changed, 39 insertions(+), 43 deletions(-)
14
--- a/README
22
15
+++ b/README
23
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
16
@@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control.
24
index XXXXXXX..XXXXXXX 100644
17
git clone git://git.qemu.org/qemu-web.git
25
--- a/include/qemu/coroutine.h
18
https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/
26
+++ b/include/qemu/coroutine.h
19
27
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
20
-A 'git-profile' utility was created to make above process less
28
*/
21
+A 'git-publish' utility was created to make above process less
29
void qemu_co_rwlock_unlock(CoRwlock *lock);
22
cumbersome, and is highly recommended for making regular contributions,
30
23
or even just for sending consecutive patch series revisions. It also
31
-typedef struct QemuCoSleepState QemuCoSleepState;
24
requires a working 'git send-email' setup, and by default doesn't
32
+typedef struct QemuCoSleep {
33
+ Coroutine *to_wake;
34
+} QemuCoSleep;
35
36
/**
37
- * Yield the coroutine for a given duration. During this yield, @sleep_state
38
- * is set to an opaque pointer, which may be used for
39
- * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the
40
- * timer fires. Don't save the obtained value to other variables and don't call
41
- * qemu_co_sleep_wake from another aio context.
42
+ * Yield the coroutine for a given duration. Initializes @w so that,
43
+ * during this yield, it can be passed to qemu_co_sleep_wake() to
44
+ * terminate the sleep.
45
*/
46
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
47
- QemuCoSleepState **sleep_state);
48
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
49
+ QEMUClockType type, int64_t ns);
50
+
51
static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
52
{
53
- QemuCoSleepState *unused = NULL;
54
- qemu_co_sleep_ns_wakeable(type, ns, &unused);
55
+ QemuCoSleep w = { 0 };
56
+ qemu_co_sleep_ns_wakeable(&w, type, ns);
57
}
58
59
/**
60
@@ -XXX,XX +XXX,XX @@ static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
61
* qemu_co_sleep_ns() and should be checked to be non-NULL before calling
62
* qemu_co_sleep_wake().
63
*/
64
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state);
65
+void qemu_co_sleep_wake(QemuCoSleep *w);
66
67
/**
68
* Yield until a file descriptor becomes readable
69
diff --git a/block/block-copy.c b/block/block-copy.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/block/block-copy.c
72
+++ b/block/block-copy.c
73
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyCallState {
74
/* State */
75
int ret;
76
bool finished;
77
- QemuCoSleepState *sleep_state;
78
+ QemuCoSleep sleep;
79
bool cancelled;
80
81
/* OUT parameters */
82
@@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
83
if (ns > 0) {
84
block_copy_task_end(task, -EAGAIN);
85
g_free(task);
86
- qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
87
- &call_state->sleep_state);
88
+ qemu_co_sleep_ns_wakeable(&call_state->sleep,
89
+ QEMU_CLOCK_REALTIME, ns);
90
continue;
91
}
92
}
93
@@ -XXX,XX +XXX,XX @@ out:
94
95
void block_copy_kick(BlockCopyCallState *call_state)
96
{
97
- qemu_co_sleep_wake(call_state->sleep_state);
98
+ qemu_co_sleep_wake(&call_state->sleep);
99
}
100
101
/*
102
diff --git a/block/nbd.c b/block/nbd.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/block/nbd.c
105
+++ b/block/nbd.c
106
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVNBDState {
107
CoQueue free_sema;
108
Coroutine *connection_co;
109
Coroutine *teardown_co;
110
- QemuCoSleepState *connection_co_sleep_ns_state;
111
+ QemuCoSleep reconnect_sleep;
112
bool drained;
113
bool wait_drained_end;
114
int in_flight;
115
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
116
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
117
118
s->drained = true;
119
- qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
120
+ qemu_co_sleep_wake(&s->reconnect_sleep);
121
122
nbd_co_establish_connection_cancel(bs, false);
123
124
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
125
126
s->state = NBD_CLIENT_QUIT;
127
if (s->connection_co) {
128
- qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
129
+ qemu_co_sleep_wake(&s->reconnect_sleep);
130
nbd_co_establish_connection_cancel(bs, true);
131
}
132
if (qemu_in_coroutine()) {
133
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
134
}
135
bdrv_inc_in_flight(s->bs);
136
} else {
137
- qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
138
- &s->connection_co_sleep_ns_state);
139
+ qemu_co_sleep_ns_wakeable(&s->reconnect_sleep,
140
+ QEMU_CLOCK_REALTIME, timeout);
141
if (s->drained) {
142
continue;
143
}
144
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/util/qemu-coroutine-sleep.c
147
+++ b/util/qemu-coroutine-sleep.c
148
@@ -XXX,XX +XXX,XX @@
149
150
static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
151
152
-struct QemuCoSleepState {
153
+void qemu_co_sleep_wake(QemuCoSleep *w)
154
+{
155
Coroutine *co;
156
- QemuCoSleepState **user_state_pointer;
157
-};
158
159
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
160
-{
161
- if (sleep_state) {
162
+ co = w->to_wake;
163
+ w->to_wake = NULL;
164
+ if (co) {
165
/* Write of schedule protected by barrier write in aio_co_schedule */
166
- const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
167
+ const char *scheduled = qatomic_cmpxchg(&co->scheduled,
168
qemu_co_sleep_ns__scheduled, NULL);
169
170
assert(scheduled == qemu_co_sleep_ns__scheduled);
171
- *sleep_state->user_state_pointer = NULL;
172
- aio_co_wake(sleep_state->co);
173
+ aio_co_wake(co);
174
}
175
}
176
177
static void co_sleep_cb(void *opaque)
178
{
179
- QemuCoSleepState **sleep_state = opaque;
180
- qemu_co_sleep_wake(*sleep_state);
181
+ QemuCoSleep *w = opaque;
182
+ qemu_co_sleep_wake(w);
183
}
184
185
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
186
- QemuCoSleepState **sleep_state)
187
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
188
+ QEMUClockType type, int64_t ns)
189
{
190
+ Coroutine *co = qemu_coroutine_self();
191
AioContext *ctx = qemu_get_current_aio_context();
192
QEMUTimer ts;
193
- QemuCoSleepState state = {
194
- .co = qemu_coroutine_self(),
195
- .user_state_pointer = sleep_state,
196
- };
197
198
- const char *scheduled = qatomic_cmpxchg(&state.co->scheduled, NULL,
199
- qemu_co_sleep_ns__scheduled);
200
+ const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
201
+ qemu_co_sleep_ns__scheduled);
202
if (scheduled) {
203
fprintf(stderr,
204
"%s: Co-routine was already scheduled in '%s'\n",
205
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
206
abort();
207
}
208
209
- aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, sleep_state);
210
- *sleep_state = &state;
211
+ w->to_wake = co;
212
+ aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w),
213
timer_mod(&ts, qemu_clock_get_ns(type) + ns);
214
qemu_coroutine_yield();
215
timer_del(&ts);
216
217
- /* qemu_co_sleep_wake clears *sleep_state before resuming this coroutine. */
218
- assert(*sleep_state == NULL);
219
+ /* w->to_wake is cleared before resuming this coroutine. */
220
+ assert(w->to_wake == NULL);
221
}
25
--
222
--
26
2.14.3
223
2.31.1
27
224
28
diff view generated by jsdifflib
1
Sometimes it's necessary for the main loop thread to run a BH in an
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
IOThread and wait for its completion. This primitive is useful during
3
startup/shutdown to synchronize and avoid race conditions.
4
2
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Allow using QemuCoSleep to sleep forever until woken by qemu_co_sleep_wake.
6
Reviewed-by: Fam Zheng <famz@redhat.com>
4
This makes the logic of qemu_co_sleep_ns_wakeable easy to understand.
7
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
8
Message-id: 20180307144205.20619-2-stefanha@redhat.com
6
In the future we will introduce an API that can work even if the
7
sleep and wake happen from different threads. For now, initializing
8
w->to_wake after timer_mod is fine because the timer can only fire in
9
the same AioContext.
10
11
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Message-id: 20210517100548.28806-7-pbonzini@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
15
---
11
include/block/aio-wait.h | 13 +++++++++++++
16
include/qemu/coroutine.h | 5 +++++
12
util/aio-wait.c | 31 +++++++++++++++++++++++++++++++
17
util/qemu-coroutine-sleep.c | 26 +++++++++++++++++++-------
13
2 files changed, 44 insertions(+)
18
2 files changed, 24 insertions(+), 7 deletions(-)
14
19
15
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
20
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/aio-wait.h
22
--- a/include/qemu/coroutine.h
18
+++ b/include/block/aio-wait.h
23
+++ b/include/qemu/coroutine.h
19
@@ -XXX,XX +XXX,XX @@ typedef struct {
24
@@ -XXX,XX +XXX,XX @@ typedef struct QemuCoSleep {
20
*/
25
void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
21
void aio_wait_kick(AioWait *wait);
26
QEMUClockType type, int64_t ns);
22
27
23
+/**
28
+/**
24
+ * aio_wait_bh_oneshot:
29
+ * Yield the coroutine until the next call to qemu_co_sleep_wake.
25
+ * @ctx: the aio context
26
+ * @cb: the BH callback function
27
+ * @opaque: user data for the BH callback function
28
+ *
29
+ * Run a BH in @ctx and wait for it to complete.
30
+ *
31
+ * Must be called from the main loop thread with @ctx acquired exactly once.
32
+ * Note that main loop event processing may occur.
33
+ */
30
+ */
34
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
31
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w);
35
+
32
+
36
#endif /* QEMU_AIO_WAIT */
33
static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
37
diff --git a/util/aio-wait.c b/util/aio-wait.c
34
{
35
QemuCoSleep w = { 0 };
36
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
38
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
39
--- a/util/aio-wait.c
38
--- a/util/qemu-coroutine-sleep.c
40
+++ b/util/aio-wait.c
39
+++ b/util/qemu-coroutine-sleep.c
41
@@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait)
40
@@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque)
42
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
41
qemu_co_sleep_wake(w);
42
}
43
44
-void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
45
- QEMUClockType type, int64_t ns)
46
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w)
47
{
48
Coroutine *co = qemu_coroutine_self();
49
- AioContext *ctx = qemu_get_current_aio_context();
50
- QEMUTimer ts;
51
52
const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
53
qemu_co_sleep_ns__scheduled);
54
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
43
}
55
}
56
57
w->to_wake = co;
58
- aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w),
59
- timer_mod(&ts, qemu_clock_get_ns(type) + ns);
60
qemu_coroutine_yield();
61
- timer_del(&ts);
62
63
/* w->to_wake is cleared before resuming this coroutine. */
64
assert(w->to_wake == NULL);
44
}
65
}
45
+
66
+
46
+typedef struct {
67
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
47
+ AioWait wait;
68
+ QEMUClockType type, int64_t ns)
48
+ bool done;
69
+{
49
+ QEMUBHFunc *cb;
70
+ AioContext *ctx = qemu_get_current_aio_context();
50
+ void *opaque;
71
+ QEMUTimer ts;
51
+} AioWaitBHData;
52
+
72
+
53
+/* Context: BH in IOThread */
73
+ aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w);
54
+static void aio_wait_bh(void *opaque)
74
+ timer_mod(&ts, qemu_clock_get_ns(type) + ns);
55
+{
56
+ AioWaitBHData *data = opaque;
57
+
75
+
58
+ data->cb(data->opaque);
76
+ /*
59
+
77
+ * The timer will fire in the current AiOContext, so the callback
60
+ data->done = true;
78
+ * must happen after qemu_co_sleep yields and there is no race
61
+ aio_wait_kick(&data->wait);
79
+ * between timer_mod and qemu_co_sleep.
62
+}
80
+ */
63
+
81
+ qemu_co_sleep(w);
64
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
82
+ timer_del(&ts);
65
+{
66
+ AioWaitBHData data = {
67
+ .cb = cb,
68
+ .opaque = opaque,
69
+ };
70
+
71
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
72
+
73
+ aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
74
+ AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
75
+}
83
+}
76
--
84
--
77
2.14.3
85
2.31.1
78
86
79
diff view generated by jsdifflib