1
The following changes since commit aa9bbd865502ed517624ab6fe7d4b5d89ca95e43:
1
The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8:
2
2
3
Merge tag 'pull-ppc-20230528' of https://gitlab.com/danielhb/qemu into staging (2023-05-29 14:31:52 -0700)
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 60f782b6b78211c125970768be726c9f380dbd61:
9
for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8:
10
10
11
aio: remove aio_disable_external() API (2023-05-30 17:37:26 +0200)
11
block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches
15
15
16
- Fix blockdev-create with iothreads
16
----------------------------------------------------------------
17
- Remove aio_disable_external() API
17
Doug Gale (1):
18
nvme: Add tracing
18
19
19
----------------------------------------------------------------
20
Edgar Kaziakhmedov (1):
20
Kevin Wolf (12):
21
qcow2: get rid of qcow2_backing_read1 routine
21
block-coroutine-wrapper: Take AioContext lock in no_co_wrappers
22
block: Clarify locking rules for bdrv_open(_inherit)()
23
block: Take main AioContext lock when calling bdrv_open()
24
block-backend: Fix blk_new_open() for iothreads
25
mirror: Hold main AioContext lock for calling bdrv_open_backing_file()
26
qcow2: Fix open with 'file' in iothread
27
raw-format: Fix open with 'file' in iothread
28
copy-before-write: Fix open with child in iothread
29
block: Take AioContext lock in bdrv_open_driver()
30
block: Fix AioContext locking in bdrv_insert_node()
31
iotests: Make verify_virtio_scsi_pci_or_ccw() public
32
iotests: Test blockdev-create in iothread
33
22
34
Stefan Hajnoczi (20):
23
Fam Zheng (2):
35
block-backend: split blk_do_set_aio_context()
24
block: Open backing image in force share mode for size probe
36
hw/qdev: introduce qdev_is_realized() helper
25
block: Remove unused bdrv_requests_pending
37
virtio-scsi: avoid race between unplug and transport event
38
virtio-scsi: stop using aio_disable_external() during unplug
39
util/vhost-user-server: rename refcount to in_flight counter
40
block/export: wait for vhost-user-blk requests when draining
41
block/export: stop using is_external in vhost-user-blk server
42
hw/xen: do not use aio_set_fd_handler(is_external=true) in xen_xenstore
43
block: add blk_in_drain() API
44
block: drain from main loop thread in bdrv_co_yield_to_drain()
45
xen-block: implement BlockDevOps->drained_begin()
46
hw/xen: do not set is_external=true on evtchn fds
47
block/export: rewrite vduse-blk drain code
48
block/export: don't require AioContext lock around blk_exp_ref/unref()
49
block/fuse: do not set is_external=true on FUSE fd
50
virtio: make it possible to detach host notifier from any thread
51
virtio-blk: implement BlockDevOps->drained_begin()
52
virtio-scsi: implement BlockDevOps->drained_begin()
53
virtio: do not set is_external=true on host notifiers
54
aio: remove aio_disable_external() API
55
26
56
hw/block/dataplane/xen-block.h | 2 +
27
John Snow (1):
57
include/block/aio.h | 57 ------------
28
iotests: fix 197 for vpc
58
include/block/block-common.h | 3 +
29
59
include/block/block_int-common.h | 72 +++++++--------
30
Kevin Wolf (27):
60
include/block/export.h | 2 +
31
block: Formats don't need CONSISTENT_READ with NO_IO
61
include/hw/qdev-core.h | 17 +++-
32
block: Make bdrv_drain_invoke() recursive
62
include/hw/scsi/scsi.h | 14 +++
33
block: Call .drain_begin only once in bdrv_drain_all_begin()
63
include/qemu/vhost-user-server.h | 8 +-
34
test-bdrv-drain: Test BlockDriver callbacks for drain
64
include/sysemu/block-backend-common.h | 25 ++---
35
block: bdrv_drain_recurse(): Remove unused begin parameter
65
include/sysemu/block-backend-global-state.h | 1 +
36
block: Don't wait for requests in bdrv_drain*_end()
66
util/aio-posix.h | 1 -
37
block: Unify order in drain functions
67
block.c | 46 ++++++---
38
block: Don't acquire AioContext in hmp_qemu_io()
68
block/blkio.c | 15 +--
39
block: Document that x-blockdev-change breaks quorum children list
69
block/block-backend.c | 104 ++++++++++++---------
40
block: Assert drain_all is only called from main AioContext
70
block/copy-before-write.c | 21 ++++-
41
block: Make bdrv_drain() driver callbacks non-recursive
71
block/curl.c | 10 +-
42
test-bdrv-drain: Test callback for bdrv_drain
72
block/export/export.c | 13 ++-
43
test-bdrv-drain: Test bs->quiesce_counter
73
block/export/fuse.c | 56 ++++++++++-
44
blockjob: Pause job on draining any job BDS
74
block/export/vduse-blk.c | 128 ++++++++++++++++++--------
45
test-bdrv-drain: Test drain vs. block jobs
75
block/export/vhost-user-blk-server.c | 52 +++++++++--
46
block: Don't block_job_pause_all() in bdrv_drain_all()
76
block/io.c | 16 ++--
47
block: Nested drain_end must still call callbacks
77
block/io_uring.c | 4 +-
48
test-bdrv-drain: Test nested drain sections
78
block/iscsi.c | 3 +-
49
block: Don't notify parents in drain call chain
79
block/linux-aio.c | 4 +-
50
block: Add bdrv_subtree_drained_begin/end()
80
block/mirror.c | 6 ++
51
test-bdrv-drain: Tests for bdrv_subtree_drain
81
block/nfs.c | 5 +-
52
test-bdrv-drain: Test behaviour in coroutine context
82
block/nvme.c | 8 +-
53
test-bdrv-drain: Recursive draining with multiple parents
83
block/qapi-sysemu.c | 3 +
54
block: Allow graph changes in subtree drained section
84
block/qcow2.c | 8 +-
55
test-bdrv-drain: Test graph changes in drained section
85
block/raw-format.c | 5 +
56
commit: Simplify reopen of base
86
block/ssh.c | 4 +-
57
block: Keep nodes drained between reopen_queue/multiple
87
block/win32-aio.c | 6 +-
58
88
blockdev.c | 29 ++++--
59
Thomas Huth (3):
89
hw/block/dataplane/virtio-blk.c | 23 +++--
60
block: Remove the obsolete -drive boot=on|off parameter
90
hw/block/dataplane/xen-block.c | 42 ++++++---
61
block: Remove the deprecated -hdachs option
91
hw/block/virtio-blk.c | 38 +++++++-
62
block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter
92
hw/block/xen-block.c | 24 ++++-
63
93
hw/i386/kvm/xen_xenstore.c | 2 +-
64
qapi/block-core.json | 4 +
94
hw/scsi/scsi-bus.c | 46 ++++++++-
65
block/qcow2.h | 3 -
95
hw/scsi/scsi-disk.c | 27 +++++-
66
include/block/block.h | 15 +-
96
hw/scsi/virtio-scsi-dataplane.c | 32 +++++--
67
include/block/block_int.h | 6 +-
97
hw/scsi/virtio-scsi.c | 127 +++++++++++++++++++------
68
block.c | 75 ++++-
98
hw/virtio/virtio.c | 9 +-
69
block/commit.c | 8 +-
99
hw/xen/xen-bus.c | 11 ++-
70
block/io.c | 164 +++++++---
100
io/channel-command.c | 6 +-
71
block/qcow2.c | 51 +--
101
io/channel-file.c | 3 +-
72
block/replication.c | 6 +
102
io/channel-socket.c | 3 +-
73
blockdev.c | 11 -
103
migration/rdma.c | 16 ++--
74
blockjob.c | 22 +-
104
qemu-nbd.c | 4 +
75
hmp.c | 6 -
105
tests/unit/test-aio.c | 27 +-----
76
hw/block/nvme.c | 349 +++++++++++++++++----
106
tests/unit/test-bdrv-drain.c | 15 +--
77
qemu-io-cmds.c | 3 +
107
tests/unit/test-block-iothread.c | 4 +-
78
tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++
108
tests/unit/test-fdmon-epoll.c | 73 ---------------
79
vl.c | 86 +-----
109
tests/unit/test-nested-aio-poll.c | 9 +-
80
hw/block/trace-events | 93 ++++++
110
util/aio-posix.c | 20 +---
81
qemu-doc.texi | 29 +-
111
util/aio-win32.c | 8 +-
82
qemu-options.hx | 19 +-
112
util/async.c | 3 +-
83
tests/Makefile.include | 2 +
113
util/fdmon-epoll.c | 10 --
84
tests/qemu-iotests/197 | 4 +
114
util/fdmon-io_uring.c | 8 +-
85
tests/qemu-iotests/common.filter | 3 +-
115
util/fdmon-poll.c | 3 +-
86
22 files changed, 1294 insertions(+), 316 deletions(-)
116
util/main-loop.c | 7 +-
87
create mode 100644 tests/test-bdrv-drain.c
117
util/qemu-coroutine-io.c | 7 +-
88
118
util/vhost-user-server.c | 33 ++++---
119
scripts/block-coroutine-wrapper.py | 25 +++--
120
tests/qemu-iotests/iotests.py | 2 +-
121
hw/scsi/trace-events | 2 +
122
tests/qemu-iotests/256 | 2 +-
123
tests/qemu-iotests/tests/iothreads-create | 67 ++++++++++++++
124
tests/qemu-iotests/tests/iothreads-create.out | 4 +
125
tests/unit/meson.build | 3 -
126
70 files changed, 931 insertions(+), 562 deletions(-)
127
delete mode 100644 tests/unit/test-fdmon-epoll.c
128
create mode 100755 tests/qemu-iotests/tests/iothreads-create
129
create mode 100644 tests/qemu-iotests/tests/iothreads-create.out
diff view generated by jsdifflib
1
While calling bdrv_new_open_driver_opts(), the main AioContext lock must
1
Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently
2
be held, not the lock of the AioContext of the block subtree it will be
2
in use as a mirror target. It is not enough for image formats, though,
3
added to afterwards.
3
as these still unconditionally request BLK_PERM_CONSISTENT_READ.
4
5
As this permission is geared towards whether the guest-visible data is
6
consistent, and has no impact on whether the metadata is sane, and
7
'qemu-img info' does not read guest-visible data (except for the raw
8
format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there
9
is not going to be any guest I/O performed, regardless of image format.
4
10
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-Id: <20230525124713.401149-11-kwolf@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
12
---
10
block.c | 11 +++++++++++
13
block.c | 6 +++++-
11
1 file changed, 11 insertions(+)
14
1 file changed, 5 insertions(+), 1 deletion(-)
12
15
13
diff --git a/block.c b/block.c
16
diff --git a/block.c b/block.c
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
18
--- a/block.c
16
+++ b/block.c
19
+++ b/block.c
17
@@ -XXX,XX +XXX,XX @@ static void bdrv_delete(BlockDriverState *bs)
20
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
18
* empty set of options. The reference to the QDict belongs to the block layer
21
assert(role == &child_backing || role == &child_file);
19
* after the call (even on failure), so if the caller intends to reuse the
22
20
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
23
if (!backing) {
21
+ *
24
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
22
+ * The caller holds the AioContext lock for @bs. It must make sure that @bs
23
+ * stays in the same AioContext, i.e. @options must not refer to nodes in a
24
+ * different AioContext.
25
*/
26
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
27
int flags, Error **errp)
28
{
29
ERRP_GUARD();
30
int ret;
31
+ AioContext *ctx = bdrv_get_aio_context(bs);
32
BlockDriverState *new_node_bs = NULL;
33
const char *drvname, *node_name;
34
BlockDriver *drv;
35
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
36
37
GLOBAL_STATE_CODE();
38
39
+ aio_context_release(ctx);
40
+ aio_context_acquire(qemu_get_aio_context());
41
new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
42
errp);
43
+ aio_context_release(qemu_get_aio_context());
44
+ aio_context_acquire(ctx);
45
+ assert(bdrv_get_aio_context(bs) == ctx);
46
+
25
+
47
options = NULL; /* bdrv_new_open_driver() eats options */
26
/* Apart from the modifications below, the same permissions are
48
if (!new_node_bs) {
27
* forwarded and left alone as for filters */
49
error_prepend(errp, "Could not create node: ");
28
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
29
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
30
31
/* bs->file always needs to be consistent because of the metadata. We
32
* can never allow other users to resize or write to it. */
33
- perm |= BLK_PERM_CONSISTENT_READ;
34
+ if (!(flags & BDRV_O_NO_IO)) {
35
+ perm |= BLK_PERM_CONSISTENT_READ;
36
+ }
37
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
38
} else {
39
/* We want consistent read from backing files if the parent needs it.
50
--
40
--
51
2.40.1
41
2.13.6
42
43
diff view generated by jsdifflib
1
The AioContext lock must not be held for bdrv_open_child(), but it is
1
From: John Snow <jsnow@redhat.com>
2
necessary for the following operations, in particular those using nested
3
event loops in coroutine wrappers.
4
2
5
Temporarily dropping the main AioContext lock is not necessary because
3
VPC has some difficulty creating geometries of particular size.
6
we know we run in the main thread.
4
However, we can indeed force it to use a literal one, so let's
5
do that for the sake of test 197, which is testing some specific
6
offsets.
7
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Message-Id: <20230525124713.401149-9-kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Lukáš Doktor <ldoktor@redhat.com>
12
---
13
---
13
block/copy-before-write.c | 21 ++++++++++++++++-----
14
tests/qemu-iotests/197 | 4 ++++
14
1 file changed, 16 insertions(+), 5 deletions(-)
15
tests/qemu-iotests/common.filter | 3 ++-
16
2 files changed, 6 insertions(+), 1 deletion(-)
15
17
16
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
18
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
19
index XXXXXXX..XXXXXXX 100755
20
--- a/tests/qemu-iotests/197
21
+++ b/tests/qemu-iotests/197
22
@@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ==='
23
echo
24
25
# Prep the images
26
+# VPC rounds image sizes to a specific geometry, force a specific size.
27
+if [ "$IMGFMT" = "vpc" ]; then
28
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
29
+fi
30
_make_test_img 4G
31
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
32
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
33
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
17
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
18
--- a/block/copy-before-write.c
35
--- a/tests/qemu-iotests/common.filter
19
+++ b/block/copy-before-write.c
36
+++ b/tests/qemu-iotests/common.filter
20
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
37
@@ -XXX,XX +XXX,XX @@ _filter_img_create()
21
int64_t cluster_size;
38
-e "s# log_size=[0-9]\\+##g" \
22
g_autoptr(BlockdevOptions) full_opts = NULL;
39
-e "s# refcount_bits=[0-9]\\+##g" \
23
BlockdevOptionsCbw *opts;
40
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
24
+ AioContext *ctx;
41
- -e "s# iter-time=[0-9]\\+##g"
25
int ret;
42
+ -e "s# iter-time=[0-9]\\+##g" \
26
43
+ -e "s# force_size=\\(on\\|off\\)##g"
27
full_opts = cbw_parse_options(options, errp);
28
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
29
return -EINVAL;
30
}
31
32
+ ctx = bdrv_get_aio_context(bs);
33
+ aio_context_acquire(ctx);
34
+
35
if (opts->bitmap) {
36
bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
37
opts->bitmap->name, NULL, errp);
38
if (!bitmap) {
39
- return -EINVAL;
40
+ ret = -EINVAL;
41
+ goto out;
42
}
43
}
44
s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
45
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
46
s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
47
if (!s->bcs) {
48
error_prepend(errp, "Cannot create block-copy-state: ");
49
- return -EINVAL;
50
+ ret = -EINVAL;
51
+ goto out;
52
}
53
54
cluster_size = block_copy_cluster_size(s->bcs);
55
56
s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
57
if (!s->done_bitmap) {
58
- return -EINVAL;
59
+ ret = -EINVAL;
60
+ goto out;
61
}
62
bdrv_disable_dirty_bitmap(s->done_bitmap);
63
64
/* s->access_bitmap starts equal to bcs bitmap */
65
s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
66
if (!s->access_bitmap) {
67
- return -EINVAL;
68
+ ret = -EINVAL;
69
+ goto out;
70
}
71
bdrv_disable_dirty_bitmap(s->access_bitmap);
72
bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
73
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
74
qemu_co_mutex_init(&s->lock);
75
QLIST_INIT(&s->frozen_read_reqs);
76
77
- return 0;
78
+ ret = 0;
79
+out:
80
+ aio_context_release(ctx);
81
+ return ret;
82
}
44
}
83
45
84
static void cbw_close(BlockDriverState *bs)
46
_filter_img_info()
85
--
47
--
86
2.40.1
48
2.13.6
49
50
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
This change separates bdrv_drain_invoke(), which calls the BlockDriver
2
drain callbacks, from bdrv_drain_recurse(). Instead, the function
3
performs its own recursion now.
2
4
3
Only report a transport reset event to the guest after the SCSIDevice
5
One reason for this is that bdrv_drain_recurse() can be called multiple
4
has been unrealized by qdev_simple_device_unplug_cb().
6
times by bdrv_drain_all_begin(), but the callbacks may only be called
7
once. The separation is necessary to fix this bug.
5
8
6
qdev_simple_device_unplug_cb() sets the SCSIDevice's qdev.realized field
9
The other reason is that we intend to go to a model where we call all
7
to false so that scsi_device_find/get() no longer see it.
10
driver callbacks first, and only then start polling. This is not fully
11
achieved yet with this patch, as bdrv_drain_invoke() contains a
12
BDRV_POLL_WHILE() loop for the block driver callbacks, which can still
13
call callbacks for any unrelated event. It's a step in this direction
14
anyway.
8
15
9
scsi_target_emulate_report_luns() also needs to be updated to filter out
16
Cc: qemu-stable@nongnu.org
10
SCSIDevices that are unrealized.
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
block/io.c | 14 +++++++++++---
21
1 file changed, 11 insertions(+), 3 deletions(-)
11
22
12
Change virtio_scsi_push_event() to take event information as an argument
23
diff --git a/block/io.c b/block/io.c
13
instead of the SCSIDevice. This allows virtio_scsi_hotunplug() to emit a
14
VIRTIO_SCSI_T_TRANSPORT_RESET event after the SCSIDevice has already
15
been unrealized.
16
17
These changes ensure that the guest driver does not see the SCSIDevice
18
that's being unplugged if it responds very quickly to the transport
19
reset event.
20
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
23
Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
26
Message-Id: <20230516190238.8401-4-stefanha@redhat.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
29
hw/scsi/scsi-bus.c | 3 +-
30
hw/scsi/virtio-scsi.c | 86 ++++++++++++++++++++++++++++++-------------
31
2 files changed, 63 insertions(+), 26 deletions(-)
32
33
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
34
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/scsi/scsi-bus.c
25
--- a/block/io.c
36
+++ b/hw/scsi/scsi-bus.c
26
+++ b/block/io.c
37
@@ -XXX,XX +XXX,XX @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
27
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
38
DeviceState *qdev = kid->child;
28
bdrv_wakeup(bs);
39
SCSIDevice *dev = SCSI_DEVICE(qdev);
40
41
- if (dev->channel == channel && dev->id == id && dev->lun != 0) {
42
+ if (dev->channel == channel && dev->id == id && dev->lun != 0 &&
43
+ qdev_is_realized(&dev->qdev)) {
44
store_lun(tmp, dev->lun);
45
g_byte_array_append(buf, tmp, 8);
46
len += 8;
47
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/scsi/virtio-scsi.c
50
+++ b/hw/scsi/virtio-scsi.c
51
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
52
s->events_dropped = false;
53
}
29
}
54
30
55
-static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
31
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
56
- uint32_t event, uint32_t reason)
32
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
57
+typedef struct {
33
{
58
+ uint32_t event;
34
+ BdrvChild *child, *tmp;
59
+ uint32_t reason;
35
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
60
+ union {
36
61
+ /* Used by messages specific to a device */
37
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
62
+ struct {
38
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
63
+ uint32_t id;
39
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
64
+ uint32_t lun;
40
bdrv_coroutine_enter(bs, data.co);
65
+ } address;
41
BDRV_POLL_WHILE(bs, !data.done);
66
+ };
67
+} VirtIOSCSIEventInfo;
68
+
42
+
69
+static void virtio_scsi_push_event(VirtIOSCSI *s,
43
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
70
+ const VirtIOSCSIEventInfo *info)
44
+ bdrv_drain_invoke(child->bs, begin);
71
{
72
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
73
VirtIOSCSIReq *req;
74
VirtIOSCSIEvent *evt;
75
VirtIODevice *vdev = VIRTIO_DEVICE(s);
76
+ uint32_t event = info->event;
77
+ uint32_t reason = info->reason;
78
79
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
80
return;
81
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
82
memset(evt, 0, sizeof(VirtIOSCSIEvent));
83
evt->event = virtio_tswap32(vdev, event);
84
evt->reason = virtio_tswap32(vdev, reason);
85
- if (!dev) {
86
- assert(event == VIRTIO_SCSI_T_EVENTS_MISSED);
87
- } else {
88
+ if (event != VIRTIO_SCSI_T_EVENTS_MISSED) {
89
evt->lun[0] = 1;
90
- evt->lun[1] = dev->id;
91
+ evt->lun[1] = info->address.id;
92
93
/* Linux wants us to keep the same encoding we use for REPORT LUNS. */
94
- if (dev->lun >= 256) {
95
- evt->lun[2] = (dev->lun >> 8) | 0x40;
96
+ if (info->address.lun >= 256) {
97
+ evt->lun[2] = (info->address.lun >> 8) | 0x40;
98
}
99
- evt->lun[3] = dev->lun & 0xFF;
100
+ evt->lun[3] = info->address.lun & 0xFF;
101
}
102
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
103
-
104
+
105
virtio_scsi_complete_req(req);
106
}
107
108
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
109
{
110
if (s->events_dropped) {
111
- virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
112
+ VirtIOSCSIEventInfo info = {
113
+ .event = VIRTIO_SCSI_T_NO_EVENT,
114
+ };
115
+ virtio_scsi_push_event(s, &info);
116
}
117
}
118
119
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
120
121
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) &&
122
dev->type != TYPE_ROM) {
123
+ VirtIOSCSIEventInfo info = {
124
+ .event = VIRTIO_SCSI_T_PARAM_CHANGE,
125
+ .reason = sense.asc | (sense.ascq << 8),
126
+ .address = {
127
+ .id = dev->id,
128
+ .lun = dev->lun,
129
+ },
130
+ };
131
+
132
virtio_scsi_acquire(s);
133
- virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
134
- sense.asc | (sense.ascq << 8));
135
+ virtio_scsi_push_event(s, &info);
136
virtio_scsi_release(s);
137
}
138
}
139
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
140
}
141
142
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
143
+ VirtIOSCSIEventInfo info = {
144
+ .event = VIRTIO_SCSI_T_TRANSPORT_RESET,
145
+ .reason = VIRTIO_SCSI_EVT_RESET_RESCAN,
146
+ .address = {
147
+ .id = sd->id,
148
+ .lun = sd->lun,
149
+ },
150
+ };
151
+
152
virtio_scsi_acquire(s);
153
- virtio_scsi_push_event(s, sd,
154
- VIRTIO_SCSI_T_TRANSPORT_RESET,
155
- VIRTIO_SCSI_EVT_RESET_RESCAN);
156
+ virtio_scsi_push_event(s, &info);
157
scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
158
virtio_scsi_release(s);
159
}
160
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
161
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
162
SCSIDevice *sd = SCSI_DEVICE(dev);
163
AioContext *ctx = s->ctx ?: qemu_get_aio_context();
164
-
165
- if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
166
- virtio_scsi_acquire(s);
167
- virtio_scsi_push_event(s, sd,
168
- VIRTIO_SCSI_T_TRANSPORT_RESET,
169
- VIRTIO_SCSI_EVT_RESET_REMOVED);
170
- scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
171
- virtio_scsi_release(s);
172
- }
173
+ VirtIOSCSIEventInfo info = {
174
+ .event = VIRTIO_SCSI_T_TRANSPORT_RESET,
175
+ .reason = VIRTIO_SCSI_EVT_RESET_REMOVED,
176
+ .address = {
177
+ .id = sd->id,
178
+ .lun = sd->lun,
179
+ },
180
+ };
181
182
aio_disable_external(ctx);
183
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
184
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
185
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
186
virtio_scsi_release(s);
187
}
188
+
189
+ if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
190
+ virtio_scsi_acquire(s);
191
+ virtio_scsi_push_event(s, &info);
192
+ scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
193
+ virtio_scsi_release(s);
194
+ }
45
+ }
195
}
46
}
196
47
197
static struct SCSIBusInfo virtio_scsi_scsi_info = {
48
static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
49
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
50
BdrvChild *child, *tmp;
51
bool waited;
52
53
- /* Ensure any pending metadata writes are submitted to bs->file. */
54
- bdrv_drain_invoke(bs, begin);
55
-
56
/* Wait for drained requests to finish */
57
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
58
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
60
bdrv_parent_drained_begin(bs);
61
}
62
63
+ bdrv_drain_invoke(bs, true);
64
bdrv_drain_recurse(bs, true);
65
}
66
67
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
68
}
69
70
bdrv_parent_drained_end(bs);
71
+ bdrv_drain_invoke(bs, false);
72
bdrv_drain_recurse(bs, false);
73
aio_enable_external(bdrv_get_aio_context(bs));
74
}
75
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
76
aio_context_acquire(aio_context);
77
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
78
if (aio_context == bdrv_get_aio_context(bs)) {
79
+ /* FIXME Calling this multiple times is wrong */
80
+ bdrv_drain_invoke(bs, true);
81
waited |= bdrv_drain_recurse(bs, true);
82
}
83
}
84
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
85
aio_context_acquire(aio_context);
86
aio_enable_external(aio_context);
87
bdrv_parent_drained_end(bs);
88
+ bdrv_drain_invoke(bs, false);
89
bdrv_drain_recurse(bs, false);
90
aio_context_release(aio_context);
91
}
198
--
92
--
199
2.40.1
93
2.13.6
94
95
diff view generated by jsdifflib
New patch
1
bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver
2
callback inside its polling loop. This means that how many times it got
3
called for each node depended on long it had to poll the event loop.
1
4
5
This is obviously not right and results in nodes that stay drained even
6
after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per
7
node.
8
9
Fix bdrv_drain_all_begin() to call the callback only once, too.
10
11
Cc: qemu-stable@nongnu.org
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
block/io.c | 3 +--
16
1 file changed, 1 insertion(+), 2 deletions(-)
17
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
23
aio_context_acquire(aio_context);
24
bdrv_parent_drained_begin(bs);
25
aio_disable_external(aio_context);
26
+ bdrv_drain_invoke(bs, true);
27
aio_context_release(aio_context);
28
29
if (!g_slist_find(aio_ctxs, aio_context)) {
30
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
31
aio_context_acquire(aio_context);
32
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
33
if (aio_context == bdrv_get_aio_context(bs)) {
34
- /* FIXME Calling this multiple times is wrong */
35
- bdrv_drain_invoke(bs, true);
36
waited |= bdrv_drain_recurse(bs, true);
37
}
38
}
39
--
40
2.13.6
41
42
diff view generated by jsdifflib
1
If blockdev-create references an existing node in an iothread (e.g. as
1
This adds a test case that the BlockDriver callbacks for drain are
2
it's 'file' child), then suddenly all of the image creation code must
2
called in bdrv_drained_all_begin/end(), and that both of them are called
3
run in that AioContext, too. Test that this actually works.
3
exactly once.
4
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-Id: <20230525124713.401149-13-kwolf@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
9
---
8
---
10
tests/qemu-iotests/tests/iothreads-create | 67 +++++++++++++++++++
9
tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
11
tests/qemu-iotests/tests/iothreads-create.out | 4 ++
10
tests/Makefile.include | 2 +
12
2 files changed, 71 insertions(+)
11
2 files changed, 139 insertions(+)
13
create mode 100755 tests/qemu-iotests/tests/iothreads-create
12
create mode 100644 tests/test-bdrv-drain.c
14
create mode 100644 tests/qemu-iotests/tests/iothreads-create.out
15
13
16
diff --git a/tests/qemu-iotests/tests/iothreads-create b/tests/qemu-iotests/tests/iothreads-create
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
17
new file mode 100755
18
index XXXXXXX..XXXXXXX
19
--- /dev/null
20
+++ b/tests/qemu-iotests/tests/iothreads-create
21
@@ -XXX,XX +XXX,XX @@
22
+#!/usr/bin/env python3
23
+# group: rw quick
24
+#
25
+# Copyright (C) 2023 Red Hat, Inc.
26
+#
27
+# This program is free software; you can redistribute it and/or modify
28
+# it under the terms of the GNU General Public License as published by
29
+# the Free Software Foundation; either version 2 of the License, or
30
+# (at your option) any later version.
31
+#
32
+# This program is distributed in the hope that it will be useful,
33
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35
+# GNU General Public License for more details.
36
+#
37
+# You should have received a copy of the GNU General Public License
38
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
39
+#
40
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
41
+
42
+import asyncio
43
+import iotests
44
+
45
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vdi',
46
+ 'vmdk', 'parallels'])
47
+iotests.verify_virtio_scsi_pci_or_ccw()
48
+
49
+with iotests.FilePath('disk.img') as img_path, \
50
+ iotests.VM() as vm:
51
+
52
+ iotests.qemu_img_create('-f', 'raw', img_path, '0')
53
+
54
+ vm.add_object('iothread,id=iothread0')
55
+ vm.add_blockdev(f'file,node-name=img-file,read-only=on,'
56
+ f'filename={img_path}')
57
+ vm.add_device('virtio-scsi,iothread=iothread0')
58
+ vm.add_device('scsi-hd,drive=img-file,share-rw=on')
59
+
60
+ vm.launch()
61
+
62
+ iotests.log(vm.qmp(
63
+ 'blockdev-reopen',
64
+ options=[{
65
+ 'driver': 'file',
66
+ 'filename': img_path,
67
+ 'node-name': 'img-file',
68
+ 'read-only': False,
69
+ }],
70
+ ))
71
+ iotests.log(vm.qmp(
72
+ 'blockdev-create',
73
+ job_id='job0',
74
+ options={
75
+ 'driver': iotests.imgfmt,
76
+ 'file': 'img-file',
77
+ 'size': 1024 * 1024,
78
+ },
79
+ ))
80
+
81
+ # Should succeed and not time out
82
+ try:
83
+ vm.run_job('job0', wait=5.0)
84
+ vm.shutdown()
85
+ except asyncio.TimeoutError:
86
+ # VM may be stuck, kill it
87
+ vm.kill()
88
+ raise
89
diff --git a/tests/qemu-iotests/tests/iothreads-create.out b/tests/qemu-iotests/tests/iothreads-create.out
90
new file mode 100644
15
new file mode 100644
91
index XXXXXXX..XXXXXXX
16
index XXXXXXX..XXXXXXX
92
--- /dev/null
17
--- /dev/null
93
+++ b/tests/qemu-iotests/tests/iothreads-create.out
18
+++ b/tests/test-bdrv-drain.c
94
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
95
+{"return": {}}
20
+/*
96
+{"return": {}}
21
+ * Block node draining tests
97
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
22
+ *
98
+{"return": {}}
23
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
24
+ *
25
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
26
+ * of this software and associated documentation files (the "Software"), to deal
27
+ * in the Software without restriction, including without limitation the rights
28
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
29
+ * copies of the Software, and to permit persons to whom the Software is
30
+ * furnished to do so, subject to the following conditions:
31
+ *
32
+ * The above copyright notice and this permission notice shall be included in
33
+ * all copies or substantial portions of the Software.
34
+ *
35
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
38
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
39
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
41
+ * THE SOFTWARE.
42
+ */
43
+
44
+#include "qemu/osdep.h"
45
+#include "block/block.h"
46
+#include "sysemu/block-backend.h"
47
+#include "qapi/error.h"
48
+
49
+typedef struct BDRVTestState {
50
+ int drain_count;
51
+} BDRVTestState;
52
+
53
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
54
+{
55
+ BDRVTestState *s = bs->opaque;
56
+ s->drain_count++;
57
+}
58
+
59
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
60
+{
61
+ BDRVTestState *s = bs->opaque;
62
+ s->drain_count--;
63
+}
64
+
65
+static void bdrv_test_close(BlockDriverState *bs)
66
+{
67
+ BDRVTestState *s = bs->opaque;
68
+ g_assert_cmpint(s->drain_count, >, 0);
69
+}
70
+
71
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
72
+ uint64_t offset, uint64_t bytes,
73
+ QEMUIOVector *qiov, int flags)
74
+{
75
+ /* We want this request to stay until the polling loop in drain waits for
76
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
77
+ * first and polls its result, too, but it shouldn't accidentally complete
78
+ * this request yet. */
79
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
80
+
81
+ return 0;
82
+}
83
+
84
+static BlockDriver bdrv_test = {
85
+ .format_name = "test",
86
+ .instance_size = sizeof(BDRVTestState),
87
+
88
+ .bdrv_close = bdrv_test_close,
89
+ .bdrv_co_preadv = bdrv_test_co_preadv,
90
+
91
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
92
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
93
+};
94
+
95
+static void aio_ret_cb(void *opaque, int ret)
96
+{
97
+ int *aio_ret = opaque;
98
+ *aio_ret = ret;
99
+}
100
+
101
+static void test_drv_cb_drain_all(void)
102
+{
103
+ BlockBackend *blk;
104
+ BlockDriverState *bs;
105
+ BDRVTestState *s;
106
+ BlockAIOCB *acb;
107
+ int aio_ret;
108
+
109
+ QEMUIOVector qiov;
110
+ struct iovec iov = {
111
+ .iov_base = NULL,
112
+ .iov_len = 0,
113
+ };
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
115
+
116
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
117
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
118
+ &error_abort);
119
+ s = bs->opaque;
120
+ blk_insert_bs(blk, bs, &error_abort);
121
+
122
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
123
+ g_assert_cmpint(s->drain_count, ==, 0);
124
+ bdrv_drain_all_begin();
125
+ g_assert_cmpint(s->drain_count, ==, 1);
126
+ bdrv_drain_all_end();
127
+ g_assert_cmpint(s->drain_count, ==, 0);
128
+
129
+ /* Now do the same while a request is pending */
130
+ aio_ret = -EINPROGRESS;
131
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
132
+ g_assert(acb != NULL);
133
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
134
+
135
+ g_assert_cmpint(s->drain_count, ==, 0);
136
+ bdrv_drain_all_begin();
137
+ g_assert_cmpint(aio_ret, ==, 0);
138
+ g_assert_cmpint(s->drain_count, ==, 1);
139
+ bdrv_drain_all_end();
140
+ g_assert_cmpint(s->drain_count, ==, 0);
141
+
142
+ bdrv_unref(bs);
143
+ blk_unref(blk);
144
+}
145
+
146
+int main(int argc, char **argv)
147
+{
148
+ bdrv_init();
149
+ qemu_init_main_loop(&error_abort);
150
+
151
+ g_test_init(&argc, &argv, NULL);
152
+
153
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
154
+
155
+ return g_test_run();
156
+}
157
diff --git a/tests/Makefile.include b/tests/Makefile.include
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tests/Makefile.include
160
+++ b/tests/Makefile.include
161
@@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c
162
gcov-files-test-hbitmap-y = util/hbitmap.c
163
check-unit-y += tests/test-hbitmap$(EXESUF)
164
gcov-files-test-hbitmap-y = blockjob.c
165
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
166
check-unit-y += tests/test-blockjob$(EXESUF)
167
check-unit-y += tests/test-blockjob-txn$(EXESUF)
168
check-unit-y += tests/test-x86-cpuid$(EXESUF)
169
@@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
170
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
171
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
172
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
173
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
174
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
175
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
176
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
99
--
177
--
100
2.40.1
178
2.13.6
179
180
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Now that the bdrv_drain_invoke() calls are pulled up to the callers of
2
bdrv_drain_recurse(), the 'begin' parameter isn't needed any more.
2
3
3
virtio_queue_aio_detach_host_notifier() does two things:
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
1. It removes the fd handler from the event loop.
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
2. It processes the virtqueue one last time.
6
---
7
block/io.c | 12 ++++++------
8
1 file changed, 6 insertions(+), 6 deletions(-)
6
9
7
The first step can be peformed by any thread and without taking the
10
diff --git a/block/io.c b/block/io.c
8
AioContext lock.
9
10
The second step may need the AioContext lock (depending on the device
11
implementation) and runs in the thread where request processing takes
12
place. virtio-blk and virtio-scsi therefore call
13
virtio_queue_aio_detach_host_notifier() from a BH that is scheduled in
14
AioContext.
15
16
The next patch will introduce a .drained_begin() function that needs to
17
call virtio_queue_aio_detach_host_notifier(). .drained_begin() functions
18
cannot call aio_poll() to wait synchronously for the BH. It is possible
19
for a .drained_poll() callback to asynchronously wait for the BH, but
20
that is more complex than necessary here.
21
22
Move the virtqueue processing out to the callers of
23
virtio_queue_aio_detach_host_notifier() so that the function can be
24
called from any thread. This is in preparation for the next patch.
25
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Message-Id: <20230516190238.8401-17-stefanha@redhat.com>
28
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
29
---
30
hw/block/dataplane/virtio-blk.c | 7 +++++++
31
hw/scsi/virtio-scsi-dataplane.c | 14 ++++++++++++++
32
hw/virtio/virtio.c | 3 ---
33
3 files changed, 21 insertions(+), 3 deletions(-)
34
35
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
36
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/block/dataplane/virtio-blk.c
12
--- a/block/io.c
38
+++ b/hw/block/dataplane/virtio-blk.c
13
+++ b/block/io.c
39
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_data_plane_stop_bh(void *opaque)
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
40
41
for (i = 0; i < s->conf->num_queues; i++) {
42
VirtQueue *vq = virtio_get_queue(s->vdev, i);
43
+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq);
44
45
virtio_queue_aio_detach_host_notifier(vq, s->ctx);
46
+
47
+ /*
48
+ * Test and clear notifier after disabling event, in case poll callback
49
+ * didn't have time to run.
50
+ */
51
+ virtio_queue_host_notifier_read(host_notifier);
52
}
15
}
53
}
16
}
54
17
55
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
18
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
56
index XXXXXXX..XXXXXXX 100644
19
+static bool bdrv_drain_recurse(BlockDriverState *bs)
57
--- a/hw/scsi/virtio-scsi-dataplane.c
58
+++ b/hw/scsi/virtio-scsi-dataplane.c
59
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_dataplane_stop_bh(void *opaque)
60
{
20
{
61
VirtIOSCSI *s = opaque;
21
BdrvChild *child, *tmp;
62
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
22
bool waited;
63
+ EventNotifier *host_notifier;
23
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
64
int i;
24
*/
65
25
bdrv_ref(bs);
66
virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
26
}
67
+ host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
27
- waited |= bdrv_drain_recurse(bs, begin);
68
+
28
+ waited |= bdrv_drain_recurse(bs);
69
+ /*
29
if (in_main_loop) {
70
+ * Test and clear notifier after disabling event, in case poll callback
30
bdrv_unref(bs);
71
+ * didn't have time to run.
31
}
72
+ */
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
73
+ virtio_queue_host_notifier_read(host_notifier);
74
+
75
virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
76
+ host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
77
+ virtio_queue_host_notifier_read(host_notifier);
78
+
79
for (i = 0; i < vs->conf.num_queues; i++) {
80
virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
81
+ host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
82
+ virtio_queue_host_notifier_read(host_notifier);
83
}
33
}
34
35
bdrv_drain_invoke(bs, true);
36
- bdrv_drain_recurse(bs, true);
37
+ bdrv_drain_recurse(bs);
84
}
38
}
85
39
86
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
40
void bdrv_drained_end(BlockDriverState *bs)
87
index XXXXXXX..XXXXXXX 100644
41
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
88
--- a/hw/virtio/virtio.c
42
89
+++ b/hw/virtio/virtio.c
43
bdrv_parent_drained_end(bs);
90
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct
44
bdrv_drain_invoke(bs, false);
91
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
45
- bdrv_drain_recurse(bs, false);
92
{
46
+ bdrv_drain_recurse(bs);
93
aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
47
aio_enable_external(bdrv_get_aio_context(bs));
94
- /* Test and clear notifier before after disabling event,
95
- * in case poll callback didn't have time to run. */
96
- virtio_queue_host_notifier_read(&vq->host_notifier);
97
}
48
}
98
49
99
void virtio_queue_host_notifier_read(EventNotifier *n)
50
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
51
aio_context_acquire(aio_context);
52
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
53
if (aio_context == bdrv_get_aio_context(bs)) {
54
- waited |= bdrv_drain_recurse(bs, true);
55
+ waited |= bdrv_drain_recurse(bs);
56
}
57
}
58
aio_context_release(aio_context);
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
60
aio_enable_external(aio_context);
61
bdrv_parent_drained_end(bs);
62
bdrv_drain_invoke(bs, false);
63
- bdrv_drain_recurse(bs, false);
64
+ bdrv_drain_recurse(bs);
65
aio_context_release(aio_context);
66
}
67
100
--
68
--
101
2.40.1
69
2.13.6
70
71
diff view generated by jsdifflib
1
When opening the 'file' child moves bs to an iothread, we need to hold
1
The device is drained, so there is no point in waiting for requests at
2
the AioContext lock of it before we can call raw_apply_options() (and
2
the end of the drained section. Remove the bdrv_drain_recurse() calls
3
more specifically, bdrv_getlength() inside of it).
3
there.
4
5
The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e
6
in order to call the .bdrv_co_drain_end() driver callback. This is now
7
done by a separate bdrv_drain_invoke() call.
4
8
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-Id: <20230525124713.401149-8-kwolf@redhat.com>
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
12
---
10
block/raw-format.c | 5 +++++
13
block/io.c | 2 --
11
tests/unit/test-block-iothread.c | 3 ---
14
1 file changed, 2 deletions(-)
12
2 files changed, 5 insertions(+), 3 deletions(-)
13
15
14
diff --git a/block/raw-format.c b/block/raw-format.c
16
diff --git a/block/io.c b/block/io.c
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/block/raw-format.c
18
--- a/block/io.c
17
+++ b/block/raw-format.c
19
+++ b/block/io.c
18
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
20
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
19
Error **errp)
21
20
{
22
bdrv_parent_drained_end(bs);
21
BDRVRawState *s = bs->opaque;
23
bdrv_drain_invoke(bs, false);
22
+ AioContext *ctx;
24
- bdrv_drain_recurse(bs);
23
bool has_size;
25
aio_enable_external(bdrv_get_aio_context(bs));
24
uint64_t offset, size;
26
}
25
BdrvChildRole file_role;
27
26
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
28
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
27
bs->file->bs->filename);
29
aio_enable_external(aio_context);
30
bdrv_parent_drained_end(bs);
31
bdrv_drain_invoke(bs, false);
32
- bdrv_drain_recurse(bs);
33
aio_context_release(aio_context);
28
}
34
}
29
35
30
+ ctx = bdrv_get_aio_context(bs);
31
+ aio_context_acquire(ctx);
32
ret = raw_apply_options(bs, s, offset, has_size, size, errp);
33
+ aio_context_release(ctx);
34
+
35
if (ret < 0) {
36
return ret;
37
}
38
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tests/unit/test-block-iothread.c
41
+++ b/tests/unit/test-block-iothread.c
42
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
43
qdict_put_str(options, "driver", "raw");
44
qdict_put_str(options, "file", "base");
45
46
- /* FIXME raw_open() should take ctx's lock internally */
47
- aio_context_acquire(ctx);
48
aio_context_acquire(main_ctx);
49
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
50
aio_context_release(main_ctx);
51
- aio_context_release(ctx);
52
53
g_assert(blk_get_aio_context(blk) == ctx);
54
g_assert(bdrv_get_aio_context(bs) == ctx);
55
--
36
--
56
2.40.1
37
2.13.6
38
39
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Drain requests are propagated to child nodes, parent nodes and directly
2
to the AioContext. The order in which this happened was different
3
between all combinations of drain/drain_all and begin/end.
2
4
3
The VuServer object has a refcount field and ref/unref APIs. The name is
5
The correct order is to keep children only drained when their parents
4
confusing because it's actually an in-flight request counter instead of
6
are also drained. This means that at the start of a drained section, the
5
a refcount.
7
AioContext needs to be drained first, the parents second and only then
8
the children. The correct order for the end of a drained section is the
9
opposite.
6
10
7
Normally a refcount destroys the object upon reaching zero. The VuServer
11
This patch changes the three other functions to follow the example of
8
counter is used to wake up the vhost-user coroutine when there are no
12
bdrv_drained_begin(), which is the only one that got it right.
9
more requests.
10
13
11
Avoid confusing by renaming refcount and ref/unref to in_flight and
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
inc/dec.
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/io.c | 12 ++++++++----
18
1 file changed, 8 insertions(+), 4 deletions(-)
13
19
14
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
20
diff --git a/block/io.c b/block/io.c
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-Id: <20230516190238.8401-6-stefanha@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
21
include/qemu/vhost-user-server.h | 6 +++---
22
block/export/vhost-user-blk-server.c | 11 +++++++----
23
util/vhost-user-server.c | 14 +++++++-------
24
3 files changed, 17 insertions(+), 14 deletions(-)
25
26
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
27
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
28
--- a/include/qemu/vhost-user-server.h
22
--- a/block/io.c
29
+++ b/include/qemu/vhost-user-server.h
23
+++ b/block/io.c
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
24
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
31
const VuDevIface *vu_iface;
32
33
/* Protected by ctx lock */
34
- unsigned int refcount;
35
+ unsigned int in_flight;
36
bool wait_idle;
37
VuDev vu_dev;
38
QIOChannel *ioc; /* The I/O channel with the client */
39
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
40
41
void vhost_user_server_stop(VuServer *server);
42
43
-void vhost_user_server_ref(VuServer *server);
44
-void vhost_user_server_unref(VuServer *server);
45
+void vhost_user_server_inc_in_flight(VuServer *server);
46
+void vhost_user_server_dec_in_flight(VuServer *server);
47
48
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
49
void vhost_user_server_detach_aio_context(VuServer *server);
50
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/block/export/vhost-user-blk-server.c
53
+++ b/block/export/vhost-user-blk-server.c
54
@@ -XXX,XX +XXX,XX @@ static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
55
free(req);
56
}
57
58
-/* Called with server refcount increased, must decrease before returning */
59
+/*
60
+ * Called with server in_flight counter increased, must decrease before
61
+ * returning.
62
+ */
63
static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
64
{
65
VuBlkReq *req = opaque;
66
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
67
in_num, out_num);
68
if (in_len < 0) {
69
free(req);
70
- vhost_user_server_unref(server);
71
+ vhost_user_server_dec_in_flight(server);
72
return;
25
return;
73
}
26
}
74
27
75
vu_blk_req_complete(req, in_len);
28
+ /* Stop things in parent-to-child order */
76
- vhost_user_server_unref(server);
29
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
77
+ vhost_user_server_dec_in_flight(server);
30
aio_disable_external(bdrv_get_aio_context(bs));
31
bdrv_parent_drained_begin(bs);
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
33
return;
34
}
35
36
- bdrv_parent_drained_end(bs);
37
+ /* Re-enable things in child-to-parent order */
38
bdrv_drain_invoke(bs, false);
39
+ bdrv_parent_drained_end(bs);
40
aio_enable_external(bdrv_get_aio_context(bs));
78
}
41
}
79
42
80
static void vu_blk_process_vq(VuDev *vu_dev, int idx)
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
81
@@ -XXX,XX +XXX,XX @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
44
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
82
Coroutine *co =
45
AioContext *aio_context = bdrv_get_aio_context(bs);
83
qemu_coroutine_create(vu_blk_virtio_process_req, req);
46
84
47
+ /* Stop things in parent-to-child order */
85
- vhost_user_server_ref(server);
48
aio_context_acquire(aio_context);
86
+ vhost_user_server_inc_in_flight(server);
49
- bdrv_parent_drained_begin(bs);
87
qemu_coroutine_enter(co);
50
aio_disable_external(aio_context);
51
+ bdrv_parent_drained_begin(bs);
52
bdrv_drain_invoke(bs, true);
53
aio_context_release(aio_context);
54
55
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
56
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
57
AioContext *aio_context = bdrv_get_aio_context(bs);
58
59
+ /* Re-enable things in child-to-parent order */
60
aio_context_acquire(aio_context);
61
- aio_enable_external(aio_context);
62
- bdrv_parent_drained_end(bs);
63
bdrv_drain_invoke(bs, false);
64
+ bdrv_parent_drained_end(bs);
65
+ aio_enable_external(aio_context);
66
aio_context_release(aio_context);
88
}
67
}
89
}
90
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/util/vhost-user-server.c
93
+++ b/util/vhost-user-server.c
94
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
95
error_report("vu_panic: %s", buf);
96
}
97
98
-void vhost_user_server_ref(VuServer *server)
99
+void vhost_user_server_inc_in_flight(VuServer *server)
100
{
101
assert(!server->wait_idle);
102
- server->refcount++;
103
+ server->in_flight++;
104
}
105
106
-void vhost_user_server_unref(VuServer *server)
107
+void vhost_user_server_dec_in_flight(VuServer *server)
108
{
109
- server->refcount--;
110
- if (server->wait_idle && !server->refcount) {
111
+ server->in_flight--;
112
+ if (server->wait_idle && !server->in_flight) {
113
aio_co_wake(server->co_trip);
114
}
115
}
116
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque)
117
/* Keep running */
118
}
119
120
- if (server->refcount) {
121
+ if (server->in_flight) {
122
/* Wait for requests to complete before we can unmap the memory */
123
server->wait_idle = true;
124
qemu_coroutine_yield();
125
server->wait_idle = false;
126
}
127
- assert(server->refcount == 0);
128
+ assert(server->in_flight == 0);
129
130
vu_deinit(vu_dev);
131
68
132
--
69
--
133
2.40.1
70
2.13.6
134
71
135
72
diff view generated by jsdifflib
1
This fixes blk_new_open() to not assume that bs is in the main context.
1
Commit 15afd94a047 added code to acquire and release the AioContext in
2
qemuio_command(). This means that the lock is taken twice now in the
3
call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for
4
any requests issued to nodes in a non-mainloop AioContext.
2
5
3
In particular, the BlockBackend must be created with the right
6
Dropping the first locking from hmp_qemu_io() fixes the problem.
4
AioContext because it will refuse to move to a different context
5
afterwards. (blk->allow_aio_context_change is false.)
6
7
Use this opportunity to use blk_insert_bs() instead of duplicating the
8
bdrv_root_attach_child() call. This is consistent with what
9
blk_new_with_bs() does. Add comments to document the locking rules.
10
7
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20230525124713.401149-5-kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
10
---
16
block/block-backend.c | 27 +++++++++++++++++++++------
11
hmp.c | 6 ------
17
1 file changed, 21 insertions(+), 6 deletions(-)
12
1 file changed, 6 deletions(-)
18
13
19
diff --git a/block/block-backend.c b/block/block-backend.c
14
diff --git a/hmp.c b/hmp.c
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/block/block-backend.c
16
--- a/hmp.c
22
+++ b/block/block-backend.c
17
+++ b/hmp.c
23
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
18
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
24
* Both sets of permissions can be changed later using blk_set_perm().
25
*
26
* Return the new BlockBackend on success, null on failure.
27
+ *
28
+ * Callers must hold the AioContext lock of @bs.
29
*/
30
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
31
uint64_t shared_perm, Error **errp)
32
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
33
34
/*
35
* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
36
- * The new BlockBackend is in the main AioContext.
37
+ * By default, the new BlockBackend is in the main AioContext, but if the
38
+ * parameters connect it with any existing node in a different AioContext, it
39
+ * may end up there instead.
40
*
41
* Just as with bdrv_open(), after having called this function the reference to
42
* @options belongs to the block layer (even on failure).
43
*
44
+ * Called without holding an AioContext lock.
45
+ *
46
* TODO: Remove @filename and @flags; it should be possible to specify a whole
47
* BDS tree just by specifying the @options QDict (or @reference,
48
* alternatively). At the time of adding this function, this is not possible,
49
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
50
{
19
{
51
BlockBackend *blk;
20
BlockBackend *blk;
52
BlockDriverState *bs;
21
BlockBackend *local_blk = NULL;
53
+ AioContext *ctx;
22
- AioContext *aio_context;
54
uint64_t perm = 0;
23
const char* device = qdict_get_str(qdict, "device");
55
uint64_t shared = BLK_PERM_ALL;
24
const char* command = qdict_get_str(qdict, "command");
56
25
Error *err = NULL;
57
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
26
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
58
shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
27
}
59
}
28
}
60
29
61
- blk = blk_new(qemu_get_aio_context(), perm, shared);
30
- aio_context = blk_get_aio_context(blk);
62
aio_context_acquire(qemu_get_aio_context());
31
- aio_context_acquire(aio_context);
63
bs = bdrv_open(filename, reference, options, flags, errp);
32
-
64
aio_context_release(qemu_get_aio_context());
33
/*
65
if (!bs) {
34
* Notably absent: Proper permission management. This is sad, but it seems
66
- blk_unref(blk);
35
* almost impossible to achieve without changing the semantics and thereby
67
return NULL;
36
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
68
}
37
*/
69
38
qemuio_command(blk, command);
70
- blk->root = bdrv_root_attach_child(bs, "root", &child_root,
39
71
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
40
- aio_context_release(aio_context);
72
- perm, shared, blk, errp);
41
-
73
+ /* bdrv_open() could have moved bs to a different AioContext */
42
fail:
74
+ ctx = bdrv_get_aio_context(bs);
43
blk_unref(local_blk);
75
+ blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
44
hmp_handle_error(mon, &err);
76
+ blk->perm = perm;
77
+ blk->shared_perm = shared;
78
+
79
+ aio_context_acquire(ctx);
80
+ blk_insert_bs(blk, bs, errp);
81
+ bdrv_unref(bs);
82
+ aio_context_release(ctx);
83
+
84
if (!blk->root) {
85
blk_unref(blk);
86
return NULL;
87
@@ -XXX,XX +XXX,XX @@ void blk_remove_bs(BlockBackend *blk)
88
89
/*
90
* Associates a new BlockDriverState with @blk.
91
+ *
92
+ * Callers must hold the AioContext lock of @bs.
93
*/
94
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
95
{
96
--
45
--
97
2.40.1
46
2.13.6
47
48
diff view generated by jsdifflib
1
qcow2_open() doesn't work correctly when opening the 'file' child moves
1
From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
2
bs to an iothread, for several reasons:
3
2
4
- It uses BDRV_POLL_WHILE() to wait for the qcow2_open_entry()
3
Since bdrv_co_preadv does all neccessary checks including
5
coroutine, which involves dropping the AioContext lock for bs when it
4
reading after the end of the backing file, avoid duplication
6
is not in the main context - but we don't hold it, so this crashes.
5
of verification before bdrv_co_preadv call.
7
6
8
- It runs the qcow2_open_entry() coroutine in the current thread instead
7
Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
9
of the new AioContext of bs.
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
9
Reviewed-by: Eric Blake <eblake@redhat.com>
11
- qcow2_open_entry() doesn't notify the main loop when it's done.
12
13
This patches fixes these issues around delegating work to a coroutine.
14
Temporarily dropping the main AioContext lock is not necessary because
15
we know we run in the main thread.
16
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Message-Id: <20230525124713.401149-7-kwolf@redhat.com>
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
21
---
11
---
22
block.c | 6 ++++++
12
block/qcow2.h | 3 ---
23
block/qcow2.c | 8 ++++++--
13
block/qcow2.c | 51 ++++++++-------------------------------------------
24
2 files changed, 12 insertions(+), 2 deletions(-)
14
2 files changed, 8 insertions(+), 46 deletions(-)
25
15
26
diff --git a/block.c b/block.c
16
diff --git a/block/qcow2.h b/block/qcow2.h
27
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
28
--- a/block.c
18
--- a/block/qcow2.h
29
+++ b/block.c
19
+++ b/block/qcow2.h
30
@@ -XXX,XX +XXX,XX @@ done:
20
@@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
31
* BlockdevRef.
21
}
32
*
22
33
* The BlockdevRef will be removed from the options QDict.
23
/* qcow2.c functions */
34
+ *
24
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
35
+ * @parent can move to a different AioContext in this function. Callers must
25
- int64_t sector_num, int nb_sectors);
36
+ * make sure that their AioContext locking is still correct after this.
26
-
37
*/
27
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
38
BdrvChild *bdrv_open_child(const char *filename,
28
int refcount_order, bool generous_increase,
39
QDict *options, const char *bdref_key,
29
uint64_t *refblock_count);
40
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_open_child(const char *filename,
41
42
/*
43
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
44
+ *
45
+ * @parent can move to a different AioContext in this function. Callers must
46
+ * make sure that their AioContext locking is still correct after this.
47
*/
48
int bdrv_open_file_child(const char *filename,
49
QDict *options, const char *bdref_key,
50
diff --git a/block/qcow2.c b/block/qcow2.c
30
diff --git a/block/qcow2.c b/block/qcow2.c
51
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
52
--- a/block/qcow2.c
32
--- a/block/qcow2.c
53
+++ b/block/qcow2.c
33
+++ b/block/qcow2.c
54
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qcow2_open_entry(void *opaque)
34
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
55
qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
35
return status;
56
qoc->errp);
57
qemu_co_mutex_unlock(&s->lock);
58
+
59
+ aio_wait_kick();
60
}
36
}
61
37
62
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
38
-/* handle reading after the end of the backing file */
63
@@ -XXX,XX +XXX,XX @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
39
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
64
40
- int64_t offset, int bytes)
65
assert(!qemu_in_coroutine());
41
-{
66
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
42
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
67
- qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
43
- int n1;
68
- BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
44
-
69
+
45
- if ((offset + bytes) <= bs_size) {
70
+ aio_co_enter(bdrv_get_aio_context(bs),
46
- return bytes;
71
+ qemu_coroutine_create(qcow2_open_entry, &qoc));
47
- }
72
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS);
48
-
73
49
- if (offset >= bs_size) {
74
return qoc.ret;
50
- n1 = 0;
75
}
51
- } else {
52
- n1 = bs_size - offset;
53
- }
54
-
55
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
56
-
57
- return n1;
58
-}
59
-
60
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
61
uint64_t bytes, QEMUIOVector *qiov,
62
int flags)
63
{
64
BDRVQcow2State *s = bs->opaque;
65
- int offset_in_cluster, n1;
66
+ int offset_in_cluster;
67
int ret;
68
unsigned int cur_bytes; /* number of bytes in current iteration */
69
uint64_t cluster_offset = 0;
70
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
71
case QCOW2_CLUSTER_UNALLOCATED:
72
73
if (bs->backing) {
74
- /* read from the base image */
75
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
76
- offset, cur_bytes);
77
- if (n1 > 0) {
78
- QEMUIOVector local_qiov;
79
-
80
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
81
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
82
-
83
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
84
- qemu_co_mutex_unlock(&s->lock);
85
- ret = bdrv_co_preadv(bs->backing, offset, n1,
86
- &local_qiov, 0);
87
- qemu_co_mutex_lock(&s->lock);
88
-
89
- qemu_iovec_destroy(&local_qiov);
90
-
91
- if (ret < 0) {
92
- goto fail;
93
- }
94
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
95
+ qemu_co_mutex_unlock(&s->lock);
96
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
97
+ &hd_qiov, 0);
98
+ qemu_co_mutex_lock(&s->lock);
99
+ if (ret < 0) {
100
+ goto fail;
101
}
102
} else {
103
/* Note: in this case, no need to wait */
76
--
104
--
77
2.40.1
105
2.13.6
106
107
diff view generated by jsdifflib
New patch
1
Removing a quorum child node with x-blockdev-change results in a quorum
2
driver state that cannot be recreated with create options because it
3
would require a list with gaps. This causes trouble in at least
4
.bdrv_refresh_filename().
1
5
6
Document this problem so that we won't accidentally mark the command
7
stable without having addressed it.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
---
12
qapi/block-core.json | 4 ++++
13
1 file changed, 4 insertions(+)
14
15
diff --git a/qapi/block-core.json b/qapi/block-core.json
16
index XXXXXXX..XXXXXXX 100644
17
--- a/qapi/block-core.json
18
+++ b/qapi/block-core.json
19
@@ -XXX,XX +XXX,XX @@
20
# does not support all kinds of operations, all kinds of children, nor
21
# all block drivers.
22
#
23
+# FIXME Removing children from a quorum node means introducing gaps in the
24
+# child indices. This cannot be represented in the 'children' list of
25
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
26
+#
27
# Warning: The data in a new quorum child MUST be consistent with that of
28
# the rest of the array.
29
#
30
--
31
2.13.6
32
33
diff view generated by jsdifflib
1
It has no internal callers, so its only use is being called from
1
From: Doug Gale <doug16k@gmail.com>
2
individual test cases. If the name starts with an underscore, it is
3
considered private and linters warn against calling it. 256 only gets
4
away with it currently because it's on the exception list for linters.
5
2
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Add trace output for commands, errors, and undefined behavior.
7
Message-Id: <20230525124713.401149-12-kwolf@redhat.com>
4
Add guest error log output for undefined behavior.
5
Report invalid undefined accesses to MMIO.
6
Annotate unlikely error checks with unlikely.
7
8
Signed-off-by: Doug Gale <doug16k@gmail.com>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
12
---
11
tests/qemu-iotests/iotests.py | 2 +-
13
hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++--------
12
tests/qemu-iotests/256 | 2 +-
14
hw/block/trace-events | 93 ++++++++++++++
13
2 files changed, 2 insertions(+), 2 deletions(-)
15
2 files changed, 390 insertions(+), 52 deletions(-)
14
16
15
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/qemu-iotests/iotests.py
19
--- a/hw/block/nvme.c
18
+++ b/tests/qemu-iotests/iotests.py
20
+++ b/hw/block/nvme.c
19
@@ -XXX,XX +XXX,XX @@ def _verify_virtio_blk() -> None:
21
@@ -XXX,XX +XXX,XX @@
20
if 'virtio-blk' not in out:
22
#include "qapi/visitor.h"
21
notrun('Missing virtio-blk in QEMU binary')
23
#include "sysemu/block-backend.h"
22
24
23
-def _verify_virtio_scsi_pci_or_ccw() -> None:
25
+#include "qemu/log.h"
24
+def verify_virtio_scsi_pci_or_ccw() -> None:
26
+#include "trace.h"
25
out = qemu_pipe('-M', 'none', '-device', 'help')
27
#include "nvme.h"
26
if 'virtio-scsi-pci' not in out and 'virtio-scsi-ccw' not in out:
28
27
notrun('Missing virtio-scsi-pci or virtio-scsi-ccw in QEMU binary')
29
+#define NVME_GUEST_ERR(trace, fmt, ...) \
28
diff --git a/tests/qemu-iotests/256 b/tests/qemu-iotests/256
30
+ do { \
29
index XXXXXXX..XXXXXXX 100755
31
+ (trace_##trace)(__VA_ARGS__); \
30
--- a/tests/qemu-iotests/256
32
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
31
+++ b/tests/qemu-iotests/256
33
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
32
@@ -XXX,XX +XXX,XX @@ import os
34
+ } while (0)
33
import iotests
35
+
34
from iotests import log
36
static void nvme_process_sq(void *opaque);
35
37
36
-iotests._verify_virtio_scsi_pci_or_ccw()
38
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
37
+iotests.verify_virtio_scsi_pci_or_ccw()
39
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
38
40
{
39
iotests.script_initialize(supported_fmts=['qcow2'])
41
if (cq->irq_enabled) {
40
size = 64 * 1024 * 1024
42
if (msix_enabled(&(n->parent_obj))) {
43
+ trace_nvme_irq_msix(cq->vector);
44
msix_notify(&(n->parent_obj), cq->vector);
45
} else {
46
+ trace_nvme_irq_pin();
47
pci_irq_pulse(&n->parent_obj);
48
}
49
+ } else {
50
+ trace_nvme_irq_masked();
51
}
52
}
53
54
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
55
trans_len = MIN(len, trans_len);
56
int num_prps = (len >> n->page_bits) + 1;
57
58
- if (!prp1) {
59
+ if (unlikely(!prp1)) {
60
+ trace_nvme_err_invalid_prp();
61
return NVME_INVALID_FIELD | NVME_DNR;
62
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
63
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
64
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
65
}
66
len -= trans_len;
67
if (len) {
68
- if (!prp2) {
69
+ if (unlikely(!prp2)) {
70
+ trace_nvme_err_invalid_prp2_missing();
71
goto unmap;
72
}
73
if (len > n->page_size) {
74
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
if (i == n->max_prp_ents - 1 && len > n->page_size) {
78
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
79
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
80
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
81
goto unmap;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
85
prp_ent = le64_to_cpu(prp_list[i]);
86
}
87
88
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
89
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
90
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
91
goto unmap;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
95
i++;
96
}
97
} else {
98
- if (prp2 & (n->page_size - 1)) {
99
+ if (unlikely(prp2 & (n->page_size - 1))) {
100
+ trace_nvme_err_invalid_prp2_align(prp2);
101
goto unmap;
102
}
103
if (qsg->nsg) {
104
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
105
QEMUIOVector iov;
106
uint16_t status = NVME_SUCCESS;
107
108
+ trace_nvme_dma_read(prp1, prp2);
109
+
110
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
111
return NVME_INVALID_FIELD | NVME_DNR;
112
}
113
if (qsg.nsg > 0) {
114
- if (dma_buf_read(ptr, len, &qsg)) {
115
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
116
+ trace_nvme_err_invalid_dma();
117
status = NVME_INVALID_FIELD | NVME_DNR;
118
}
119
qemu_sglist_destroy(&qsg);
120
} else {
121
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
122
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
123
+ trace_nvme_err_invalid_dma();
124
status = NVME_INVALID_FIELD | NVME_DNR;
125
}
126
qemu_iovec_destroy(&iov);
127
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
128
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
129
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
130
131
- if (slba + nlb > ns->id_ns.nsze) {
132
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
133
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
134
return NVME_LBA_RANGE | NVME_DNR;
135
}
136
137
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
138
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
139
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
140
141
- if ((slba + nlb) > ns->id_ns.nsze) {
142
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
143
+
144
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
145
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
146
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
147
return NVME_LBA_RANGE | NVME_DNR;
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
151
NvmeNamespace *ns;
152
uint32_t nsid = le32_to_cpu(cmd->nsid);
153
154
- if (nsid == 0 || nsid > n->num_namespaces) {
155
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
156
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
157
return NVME_INVALID_NSID | NVME_DNR;
158
}
159
160
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
161
case NVME_CMD_READ:
162
return nvme_rw(n, ns, cmd, req);
163
default:
164
+ trace_nvme_err_invalid_opc(cmd->opcode);
165
return NVME_INVALID_OPCODE | NVME_DNR;
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
169
NvmeCQueue *cq;
170
uint16_t qid = le16_to_cpu(c->qid);
171
172
- if (!qid || nvme_check_sqid(n, qid)) {
173
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
174
+ trace_nvme_err_invalid_del_sq(qid);
175
return NVME_INVALID_QID | NVME_DNR;
176
}
177
178
+ trace_nvme_del_sq(qid);
179
+
180
sq = n->sq[qid];
181
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
182
req = QTAILQ_FIRST(&sq->out_req_list);
183
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
184
uint16_t qflags = le16_to_cpu(c->sq_flags);
185
uint64_t prp1 = le64_to_cpu(c->prp1);
186
187
- if (!cqid || nvme_check_cqid(n, cqid)) {
188
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
189
+
190
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
191
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
192
return NVME_INVALID_CQID | NVME_DNR;
193
}
194
- if (!sqid || !nvme_check_sqid(n, sqid)) {
195
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
196
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
197
return NVME_INVALID_QID | NVME_DNR;
198
}
199
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
200
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
201
+ trace_nvme_err_invalid_create_sq_size(qsize);
202
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
203
}
204
- if (!prp1 || prp1 & (n->page_size - 1)) {
205
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
206
+ trace_nvme_err_invalid_create_sq_addr(prp1);
207
return NVME_INVALID_FIELD | NVME_DNR;
208
}
209
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
210
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
211
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
212
return NVME_INVALID_FIELD | NVME_DNR;
213
}
214
sq = g_malloc0(sizeof(*sq));
215
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
216
NvmeCQueue *cq;
217
uint16_t qid = le16_to_cpu(c->qid);
218
219
- if (!qid || nvme_check_cqid(n, qid)) {
220
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
221
+ trace_nvme_err_invalid_del_cq_cqid(qid);
222
return NVME_INVALID_CQID | NVME_DNR;
223
}
224
225
cq = n->cq[qid];
226
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
227
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
228
+ trace_nvme_err_invalid_del_cq_notempty(qid);
229
return NVME_INVALID_QUEUE_DEL;
230
}
231
+ trace_nvme_del_cq(qid);
232
nvme_free_cq(cq, n);
233
return NVME_SUCCESS;
234
}
235
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
236
uint16_t qflags = le16_to_cpu(c->cq_flags);
237
uint64_t prp1 = le64_to_cpu(c->prp1);
238
239
- if (!cqid || !nvme_check_cqid(n, cqid)) {
240
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
241
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
242
+
243
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
244
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
245
return NVME_INVALID_CQID | NVME_DNR;
246
}
247
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
248
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
249
+ trace_nvme_err_invalid_create_cq_size(qsize);
250
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
251
}
252
- if (!prp1) {
253
+ if (unlikely(!prp1)) {
254
+ trace_nvme_err_invalid_create_cq_addr(prp1);
255
return NVME_INVALID_FIELD | NVME_DNR;
256
}
257
- if (vector > n->num_queues) {
258
+ if (unlikely(vector > n->num_queues)) {
259
+ trace_nvme_err_invalid_create_cq_vector(vector);
260
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
261
}
262
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
263
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
264
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
265
return NVME_INVALID_FIELD | NVME_DNR;
266
}
267
268
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
269
uint64_t prp1 = le64_to_cpu(c->prp1);
270
uint64_t prp2 = le64_to_cpu(c->prp2);
271
272
+ trace_nvme_identify_ctrl();
273
+
274
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
275
prp1, prp2);
276
}
277
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
278
uint64_t prp1 = le64_to_cpu(c->prp1);
279
uint64_t prp2 = le64_to_cpu(c->prp2);
280
281
- if (nsid == 0 || nsid > n->num_namespaces) {
282
+ trace_nvme_identify_ns(nsid);
283
+
284
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
285
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
286
return NVME_INVALID_NSID | NVME_DNR;
287
}
288
289
ns = &n->namespaces[nsid - 1];
290
+
291
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
292
prp1, prp2);
293
}
294
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
295
uint16_t ret;
296
int i, j = 0;
297
298
+ trace_nvme_identify_nslist(min_nsid);
299
+
300
list = g_malloc0(data_len);
301
for (i = 0; i < n->num_namespaces; i++) {
302
if (i < min_nsid) {
303
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
304
case 0x02:
305
return nvme_identify_nslist(n, c);
306
default:
307
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
308
return NVME_INVALID_FIELD | NVME_DNR;
309
}
310
}
311
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
312
switch (dw10) {
313
case NVME_VOLATILE_WRITE_CACHE:
314
result = blk_enable_write_cache(n->conf.blk);
315
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
316
break;
317
case NVME_NUMBER_OF_QUEUES:
318
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
319
+ trace_nvme_getfeat_numq(result);
320
break;
321
default:
322
+ trace_nvme_err_invalid_getfeat(dw10);
323
return NVME_INVALID_FIELD | NVME_DNR;
324
}
325
326
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
327
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
328
break;
329
case NVME_NUMBER_OF_QUEUES:
330
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
331
+ ((dw11 >> 16) & 0xFFFF) + 1,
332
+ n->num_queues - 1, n->num_queues - 1);
333
req->cqe.result =
334
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
335
break;
336
default:
337
+ trace_nvme_err_invalid_setfeat(dw10);
338
return NVME_INVALID_FIELD | NVME_DNR;
339
}
340
return NVME_SUCCESS;
341
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
342
case NVME_ADM_CMD_GET_FEATURES:
343
return nvme_get_feature(n, cmd, req);
344
default:
345
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
346
return NVME_INVALID_OPCODE | NVME_DNR;
347
}
348
}
349
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
350
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
351
uint32_t page_size = 1 << page_bits;
352
353
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
354
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
355
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
356
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
357
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
358
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
359
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
360
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
361
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
362
+ if (unlikely(n->cq[0])) {
363
+ trace_nvme_err_startfail_cq();
364
+ return -1;
365
+ }
366
+ if (unlikely(n->sq[0])) {
367
+ trace_nvme_err_startfail_sq();
368
+ return -1;
369
+ }
370
+ if (unlikely(!n->bar.asq)) {
371
+ trace_nvme_err_startfail_nbarasq();
372
+ return -1;
373
+ }
374
+ if (unlikely(!n->bar.acq)) {
375
+ trace_nvme_err_startfail_nbaracq();
376
+ return -1;
377
+ }
378
+ if (unlikely(n->bar.asq & (page_size - 1))) {
379
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
380
+ return -1;
381
+ }
382
+ if (unlikely(n->bar.acq & (page_size - 1))) {
383
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
384
+ return -1;
385
+ }
386
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
387
+ NVME_CAP_MPSMIN(n->bar.cap))) {
388
+ trace_nvme_err_startfail_page_too_small(
389
+ NVME_CC_MPS(n->bar.cc),
390
+ NVME_CAP_MPSMIN(n->bar.cap));
391
+ return -1;
392
+ }
393
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
394
+ NVME_CAP_MPSMAX(n->bar.cap))) {
395
+ trace_nvme_err_startfail_page_too_large(
396
+ NVME_CC_MPS(n->bar.cc),
397
+ NVME_CAP_MPSMAX(n->bar.cap));
398
+ return -1;
399
+ }
400
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
401
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
402
+ trace_nvme_err_startfail_cqent_too_small(
403
+ NVME_CC_IOCQES(n->bar.cc),
404
+ NVME_CTRL_CQES_MIN(n->bar.cap));
405
+ return -1;
406
+ }
407
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
408
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
409
+ trace_nvme_err_startfail_cqent_too_large(
410
+ NVME_CC_IOCQES(n->bar.cc),
411
+ NVME_CTRL_CQES_MAX(n->bar.cap));
412
+ return -1;
413
+ }
414
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
415
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
416
+ trace_nvme_err_startfail_sqent_too_small(
417
+ NVME_CC_IOSQES(n->bar.cc),
418
+ NVME_CTRL_SQES_MIN(n->bar.cap));
419
+ return -1;
420
+ }
421
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
422
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
423
+ trace_nvme_err_startfail_sqent_too_large(
424
+ NVME_CC_IOSQES(n->bar.cc),
425
+ NVME_CTRL_SQES_MAX(n->bar.cap));
426
+ return -1;
427
+ }
428
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
429
+ trace_nvme_err_startfail_asqent_sz_zero();
430
+ return -1;
431
+ }
432
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
433
+ trace_nvme_err_startfail_acqent_sz_zero();
434
return -1;
435
}
436
437
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
438
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
439
unsigned size)
440
{
441
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
442
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
443
+ "MMIO write not 32-bit aligned,"
444
+ " offset=0x%"PRIx64"", offset);
445
+ /* should be ignored, fall through for now */
446
+ }
447
+
448
+ if (unlikely(size < sizeof(uint32_t))) {
449
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
450
+ "MMIO write smaller than 32-bits,"
451
+ " offset=0x%"PRIx64", size=%u",
452
+ offset, size);
453
+ /* should be ignored, fall through for now */
454
+ }
455
+
456
switch (offset) {
457
- case 0xc:
458
+ case 0xc: /* INTMS */
459
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
460
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
461
+ "undefined access to interrupt mask set"
462
+ " when MSI-X is enabled");
463
+ /* should be ignored, fall through for now */
464
+ }
465
n->bar.intms |= data & 0xffffffff;
466
n->bar.intmc = n->bar.intms;
467
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
468
+ n->bar.intmc);
469
break;
470
- case 0x10:
471
+ case 0x10: /* INTMC */
472
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
473
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
474
+ "undefined access to interrupt mask clr"
475
+ " when MSI-X is enabled");
476
+ /* should be ignored, fall through for now */
477
+ }
478
n->bar.intms &= ~(data & 0xffffffff);
479
n->bar.intmc = n->bar.intms;
480
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
481
+ n->bar.intmc);
482
break;
483
- case 0x14:
484
+ case 0x14: /* CC */
485
+ trace_nvme_mmio_cfg(data & 0xffffffff);
486
/* Windows first sends data, then sends enable bit */
487
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
488
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
489
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
490
491
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
492
n->bar.cc = data;
493
- if (nvme_start_ctrl(n)) {
494
+ if (unlikely(nvme_start_ctrl(n))) {
495
+ trace_nvme_err_startfail();
496
n->bar.csts = NVME_CSTS_FAILED;
497
} else {
498
+ trace_nvme_mmio_start_success();
499
n->bar.csts = NVME_CSTS_READY;
500
}
501
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
502
+ trace_nvme_mmio_stopped();
503
nvme_clear_ctrl(n);
504
n->bar.csts &= ~NVME_CSTS_READY;
505
}
506
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
507
- nvme_clear_ctrl(n);
508
- n->bar.cc = data;
509
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
510
+ trace_nvme_mmio_shutdown_set();
511
+ nvme_clear_ctrl(n);
512
+ n->bar.cc = data;
513
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
514
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
515
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
516
- n->bar.cc = data;
517
+ trace_nvme_mmio_shutdown_cleared();
518
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
519
+ n->bar.cc = data;
520
+ }
521
+ break;
522
+ case 0x1C: /* CSTS */
523
+ if (data & (1 << 4)) {
524
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
525
+ "attempted to W1C CSTS.NSSRO"
526
+ " but CAP.NSSRS is zero (not supported)");
527
+ } else if (data != 0) {
528
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
529
+ "attempted to set a read only bit"
530
+ " of controller status");
531
+ }
532
+ break;
533
+ case 0x20: /* NSSR */
534
+ if (data == 0x4E564D65) {
535
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
536
+ } else {
537
+ /* The spec says that writes of other values have no effect */
538
+ return;
539
}
540
break;
541
- case 0x24:
542
+ case 0x24: /* AQA */
543
n->bar.aqa = data & 0xffffffff;
544
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
545
break;
546
- case 0x28:
547
+ case 0x28: /* ASQ */
548
n->bar.asq = data;
549
+ trace_nvme_mmio_asqaddr(data);
550
break;
551
- case 0x2c:
552
+ case 0x2c: /* ASQ hi */
553
n->bar.asq |= data << 32;
554
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
555
break;
556
- case 0x30:
557
+ case 0x30: /* ACQ */
558
+ trace_nvme_mmio_acqaddr(data);
559
n->bar.acq = data;
560
break;
561
- case 0x34:
562
+ case 0x34: /* ACQ hi */
563
n->bar.acq |= data << 32;
564
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
565
break;
566
+ case 0x38: /* CMBLOC */
567
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
568
+ "invalid write to reserved CMBLOC"
569
+ " when CMBSZ is zero, ignored");
570
+ return;
571
+ case 0x3C: /* CMBSZ */
572
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
573
+ "invalid write to read only CMBSZ, ignored");
574
+ return;
575
default:
576
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
577
+ "invalid MMIO write,"
578
+ " offset=0x%"PRIx64", data=%"PRIx64"",
579
+ offset, data);
580
break;
581
}
582
}
583
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
584
uint8_t *ptr = (uint8_t *)&n->bar;
585
uint64_t val = 0;
586
587
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
588
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
589
+ "MMIO read not 32-bit aligned,"
590
+ " offset=0x%"PRIx64"", addr);
591
+ /* should RAZ, fall through for now */
592
+ } else if (unlikely(size < sizeof(uint32_t))) {
593
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
594
+ "MMIO read smaller than 32-bits,"
595
+ " offset=0x%"PRIx64"", addr);
596
+ /* should RAZ, fall through for now */
597
+ }
598
+
599
if (addr < sizeof(n->bar)) {
600
memcpy(&val, ptr + addr, size);
601
+ } else {
602
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
603
+ "MMIO read beyond last register,"
604
+ " offset=0x%"PRIx64", returning 0", addr);
605
}
606
+
607
return val;
608
}
609
610
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
611
{
612
uint32_t qid;
613
614
- if (addr & ((1 << 2) - 1)) {
615
+ if (unlikely(addr & ((1 << 2) - 1))) {
616
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
617
+ "doorbell write not 32-bit aligned,"
618
+ " offset=0x%"PRIx64", ignoring", addr);
619
return;
620
}
621
622
if (((addr - 0x1000) >> 2) & 1) {
623
+ /* Completion queue doorbell write */
624
+
625
uint16_t new_head = val & 0xffff;
626
int start_sqs;
627
NvmeCQueue *cq;
628
629
qid = (addr - (0x1000 + (1 << 2))) >> 3;
630
- if (nvme_check_cqid(n, qid)) {
631
+ if (unlikely(nvme_check_cqid(n, qid))) {
632
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
633
+ "completion queue doorbell write"
634
+ " for nonexistent queue,"
635
+ " sqid=%"PRIu32", ignoring", qid);
636
return;
637
}
638
639
cq = n->cq[qid];
640
- if (new_head >= cq->size) {
641
+ if (unlikely(new_head >= cq->size)) {
642
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
643
+ "completion queue doorbell write value"
644
+ " beyond queue size, sqid=%"PRIu32","
645
+ " new_head=%"PRIu16", ignoring",
646
+ qid, new_head);
647
return;
648
}
649
650
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
651
nvme_isr_notify(n, cq);
652
}
653
} else {
654
+ /* Submission queue doorbell write */
655
+
656
uint16_t new_tail = val & 0xffff;
657
NvmeSQueue *sq;
658
659
qid = (addr - 0x1000) >> 3;
660
- if (nvme_check_sqid(n, qid)) {
661
+ if (unlikely(nvme_check_sqid(n, qid))) {
662
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
663
+ "submission queue doorbell write"
664
+ " for nonexistent queue,"
665
+ " sqid=%"PRIu32", ignoring", qid);
666
return;
667
}
668
669
sq = n->sq[qid];
670
- if (new_tail >= sq->size) {
671
+ if (unlikely(new_tail >= sq->size)) {
672
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
673
+ "submission queue doorbell write value"
674
+ " beyond queue size, sqid=%"PRIu32","
675
+ " new_tail=%"PRIu16", ignoring",
676
+ qid, new_tail);
677
return;
678
}
679
680
diff --git a/hw/block/trace-events b/hw/block/trace-events
681
index XXXXXXX..XXXXXXX 100644
682
--- a/hw/block/trace-events
683
+++ b/hw/block/trace-events
684
@@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
685
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
686
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
687
688
+# hw/block/nvme.c
689
+# nvme traces for successful events
690
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
691
+nvme_irq_pin(void) "pulsing IRQ pin"
692
+nvme_irq_masked(void) "IRQ is masked"
693
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
694
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
695
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
696
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
697
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
698
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
699
+nvme_identify_ctrl(void) "identify controller"
700
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
701
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
702
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
703
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
704
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
705
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
706
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
707
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
708
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
709
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
710
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
711
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
712
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
713
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
714
+nvme_mmio_stopped(void) "cleared controller enable bit"
715
+nvme_mmio_shutdown_set(void) "shutdown bit set"
716
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
717
+
718
+# nvme traces for error conditions
719
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
720
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
721
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
722
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
723
+nvme_err_invalid_field(void) "invalid field"
724
+nvme_err_invalid_prp(void) "invalid PRP"
725
+nvme_err_invalid_sgl(void) "invalid SGL"
726
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
727
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
728
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
729
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
730
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
731
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
732
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
733
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
734
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
735
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
736
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
737
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
738
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
739
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
740
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
741
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
742
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
743
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
744
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
745
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
746
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
747
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
748
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
749
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
750
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
751
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
752
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
753
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
754
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
755
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
756
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
757
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
758
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
759
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
760
+nvme_err_startfail(void) "setting controller enable bit failed"
761
+
762
+# Traces for undefined behavior
763
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
764
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
765
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
766
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
767
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
768
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
769
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
770
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
771
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
772
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
773
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
774
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
775
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
776
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
777
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
778
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
779
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
780
+
781
# hw/block/xen_disk.c
782
xen_disk_alloc(char *name) "%s"
783
xen_disk_init(char *name) "%s"
41
--
784
--
42
2.40.1
785
2.13.6
786
787
diff view generated by jsdifflib
1
bdrv_refresh_total_sectors() and bdrv_refresh_limits() expect to be
1
From: Fam Zheng <famz@redhat.com>
2
called under the AioContext lock of the node. Take the lock.
3
2
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Management tools create overlays of running guests with qemu-img:
5
Message-Id: <20230525124713.401149-10-kwolf@redhat.com>
4
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
$ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2
6
7
but this doesn't work anymore due to image locking:
8
9
qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock
10
Is another process using the image?
11
Could not open backing image to determine size.
12
Use the force share option to allow this use case again.
13
14
Cc: qemu-stable@nongnu.org
15
Signed-off-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
18
---
9
block.c | 7 +++++++
19
block.c | 3 ++-
10
1 file changed, 7 insertions(+)
20
1 file changed, 2 insertions(+), 1 deletion(-)
11
21
12
diff --git a/block.c b/block.c
22
diff --git a/block.c b/block.c
13
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
14
--- a/block.c
24
--- a/block.c
15
+++ b/block.c
25
+++ b/block.c
16
@@ -XXX,XX +XXX,XX @@ static int no_coroutine_fn GRAPH_UNLOCKED
26
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
17
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
27
back_flags = flags;
18
QDict *options, int open_flags, Error **errp)
28
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
19
{
29
20
+ AioContext *ctx;
30
+ backing_options = qdict_new();
21
Error *local_err = NULL;
31
if (backing_fmt) {
22
int i, ret;
32
- backing_options = qdict_new();
23
GLOBAL_STATE_CODE();
33
qdict_put_str(backing_options, "driver", backing_fmt);
24
@@ -XXX,XX +XXX,XX @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
34
}
25
bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
35
+ qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
26
bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
36
27
37
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
28
+ /* Get the context after .bdrv_open, it can change the context */
38
&local_err);
29
+ ctx = bdrv_get_aio_context(bs);
30
+ aio_context_acquire(ctx);
31
+
32
ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
33
if (ret < 0) {
34
error_setg_errno(errp, -ret, "Could not refresh total sector count");
35
+ aio_context_release(ctx);
36
return ret;
37
}
38
39
bdrv_graph_rdlock_main_loop();
40
bdrv_refresh_limits(bs, NULL, &local_err);
41
bdrv_graph_rdunlock_main_loop();
42
+ aio_context_release(ctx);
43
44
if (local_err) {
45
error_propagate(errp, local_err);
46
--
39
--
47
2.40.1
40
2.13.6
41
42
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
is_external=true suspends fd handlers between aio_disable_external() and
3
It's not working anymore since QEMU v1.3.0 - time to remove it now.
4
aio_enable_external(). The block layer's drain operation uses this
5
mechanism to prevent new I/O from sneaking in between
6
bdrv_drained_begin() and bdrv_drained_end().
7
4
8
The previous commit converted the xen-block device to use BlockDevOps
5
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
.drained_begin/end() callbacks. It no longer relies on is_external=true
6
Reviewed-by: John Snow <jsnow@redhat.com>
10
so it is safe to pass is_external=false.
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
11
12
This is part of ongoing work to remove the aio_disable_external() API.
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Message-Id: <20230516190238.8401-13-stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
9
---
19
hw/xen/xen-bus.c | 8 ++++----
10
blockdev.c | 11 -----------
20
1 file changed, 4 insertions(+), 4 deletions(-)
11
qemu-doc.texi | 6 ------
12
2 files changed, 17 deletions(-)
21
13
22
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
14
diff --git a/blockdev.c b/blockdev.c
23
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/xen/xen-bus.c
16
--- a/blockdev.c
25
+++ b/hw/xen/xen-bus.c
17
+++ b/blockdev.c
26
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
18
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = {
19
.type = QEMU_OPT_STRING,
20
.help = "chs translation (auto, lba, none)",
21
},{
22
- .name = "boot",
23
- .type = QEMU_OPT_BOOL,
24
- .help = "(deprecated, ignored)",
25
- },{
26
.name = "addr",
27
.type = QEMU_OPT_STRING,
28
.help = "pci address (virtio only)",
29
@@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
30
goto fail;
27
}
31
}
28
32
29
if (channel->ctx)
33
- /* Deprecated option boot=[on|off] */
30
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
34
- if (qemu_opt_get(legacy_opts, "boot") != NULL) {
31
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
35
- fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
32
NULL, NULL, NULL, NULL, NULL);
36
- "ignored. Future versions will reject this parameter. Please "
33
37
- "update your scripts.\n");
34
channel->ctx = ctx;
38
- }
35
if (ctx) {
39
-
36
aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
40
/* Other deprecated options */
37
- true, xen_device_event, NULL, xen_device_poll, NULL,
41
if (!qtest_enabled()) {
38
- channel);
42
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
39
+ false, xen_device_event, NULL, xen_device_poll,
43
diff --git a/qemu-doc.texi b/qemu-doc.texi
40
+ NULL, channel);
44
index XXXXXXX..XXXXXXX 100644
41
}
45
--- a/qemu-doc.texi
42
}
46
+++ b/qemu-doc.texi
43
47
@@ -XXX,XX +XXX,XX @@ deprecated.
44
@@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev,
48
45
49
@section System emulator command line arguments
46
QLIST_REMOVE(channel, list);
50
47
51
-@subsection -drive boot=on|off (since 1.3.0)
48
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
52
-
49
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
53
-The ``boot=on|off'' option to the ``-drive'' argument is
50
NULL, NULL, NULL, NULL, NULL);
54
-ignored. Applications should use the ``bootindex=N'' parameter
51
55
-to set an absolute ordering between devices instead.
52
if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
56
-
57
@subsection -tdf (since 1.3.0)
58
59
The ``-tdf'' argument is ignored. The behaviour implemented
53
--
60
--
54
2.40.1
61
2.13.6
62
63
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
This patch is part of an effort to remove the aio_disable_external()
3
It's been marked as deprecated since QEMU v2.10.0, and so far nobody
4
API because it does not fit in a multi-queue block layer world where
4
complained that we should keep it, so let's remove this legacy option
5
many AioContexts may be submitting requests to the same disk.
5
now to simplify the code quite a bit.
6
6
7
The SCSI emulation code is already in good shape to stop using
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
aio_disable_external(). It was only used by commit 9c5aad84da1c
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
("virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi
9
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
disk") to ensure that virtio_scsi_hotunplug() works while the guest
11
driver is submitting I/O.
12
13
Ensure virtio_scsi_hotunplug() is safe as follows:
14
15
1. qdev_simple_device_unplug_cb() -> qdev_unrealize() ->
16
device_set_realized() calls qatomic_set(&dev->realized, false) so
17
that future scsi_device_get() calls return NULL because they exclude
18
SCSIDevices with realized=false.
19
20
That means virtio-scsi will reject new I/O requests to this
21
SCSIDevice with VIRTIO_SCSI_S_BAD_TARGET even while
22
virtio_scsi_hotunplug() is still executing. We are protected against
23
new requests!
24
25
2. scsi_qdev_unrealize() already contains a call to
26
scsi_device_purge_requests() so that in-flight requests are cancelled
27
synchronously. This ensures that no in-flight requests remain once
28
qdev_simple_device_unplug_cb() returns.
29
30
Thanks to these two conditions we don't need aio_disable_external()
31
anymore.
32
33
Cc: Zhengui Li <lizhengui@huawei.com>
34
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
35
Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
36
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
37
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
38
Message-Id: <20230516190238.8401-5-stefanha@redhat.com>
39
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
40
---
11
---
41
hw/scsi/virtio-scsi.c | 3 ---
12
vl.c | 86 ++-------------------------------------------------------
42
1 file changed, 3 deletions(-)
13
qemu-doc.texi | 8 ------
43
14
qemu-options.hx | 19 ++-----------
44
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
15
3 files changed, 4 insertions(+), 109 deletions(-)
16
17
diff --git a/vl.c b/vl.c
45
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
46
--- a/hw/scsi/virtio-scsi.c
19
--- a/vl.c
47
+++ b/hw/scsi/virtio-scsi.c
20
+++ b/vl.c
48
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
21
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
49
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
22
const char *boot_order = NULL;
50
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
23
const char *boot_once = NULL;
51
SCSIDevice *sd = SCSI_DEVICE(dev);
24
DisplayState *ds;
52
- AioContext *ctx = s->ctx ?: qemu_get_aio_context();
25
- int cyls, heads, secs, translation;
53
VirtIOSCSIEventInfo info = {
26
QemuOpts *opts, *machine_opts;
54
.event = VIRTIO_SCSI_T_TRANSPORT_RESET,
27
- QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
55
.reason = VIRTIO_SCSI_EVT_RESET_REMOVED,
28
+ QemuOpts *icount_opts = NULL, *accel_opts = NULL;
56
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
29
QemuOptsList *olist;
57
},
30
int optind;
58
};
31
const char *optarg;
59
32
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
60
- aio_disable_external(ctx);
33
61
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
34
cpu_model = NULL;
62
- aio_enable_external(ctx);
35
snapshot = 0;
63
36
- cyls = heads = secs = 0;
64
if (s->ctx) {
37
- translation = BIOS_ATA_TRANSLATION_AUTO;
65
virtio_scsi_acquire(s);
38
39
nb_nics = 0;
40
41
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
42
if (optind >= argc)
43
break;
44
if (argv[optind][0] != '-') {
45
- hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
46
+ drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
47
} else {
48
const QEMUOption *popt;
49
50
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
51
cpu_model = optarg;
52
break;
53
case QEMU_OPTION_hda:
54
- {
55
- char buf[256];
56
- if (cyls == 0)
57
- snprintf(buf, sizeof(buf), "%s", HD_OPTS);
58
- else
59
- snprintf(buf, sizeof(buf),
60
- "%s,cyls=%d,heads=%d,secs=%d%s",
61
- HD_OPTS , cyls, heads, secs,
62
- translation == BIOS_ATA_TRANSLATION_LBA ?
63
- ",trans=lba" :
64
- translation == BIOS_ATA_TRANSLATION_NONE ?
65
- ",trans=none" : "");
66
- drive_add(IF_DEFAULT, 0, optarg, buf);
67
- break;
68
- }
69
case QEMU_OPTION_hdb:
70
case QEMU_OPTION_hdc:
71
case QEMU_OPTION_hdd:
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
73
case QEMU_OPTION_snapshot:
74
snapshot = 1;
75
break;
76
- case QEMU_OPTION_hdachs:
77
- {
78
- const char *p;
79
- p = optarg;
80
- cyls = strtol(p, (char **)&p, 0);
81
- if (cyls < 1 || cyls > 16383)
82
- goto chs_fail;
83
- if (*p != ',')
84
- goto chs_fail;
85
- p++;
86
- heads = strtol(p, (char **)&p, 0);
87
- if (heads < 1 || heads > 16)
88
- goto chs_fail;
89
- if (*p != ',')
90
- goto chs_fail;
91
- p++;
92
- secs = strtol(p, (char **)&p, 0);
93
- if (secs < 1 || secs > 63)
94
- goto chs_fail;
95
- if (*p == ',') {
96
- p++;
97
- if (!strcmp(p, "large")) {
98
- translation = BIOS_ATA_TRANSLATION_LARGE;
99
- } else if (!strcmp(p, "rechs")) {
100
- translation = BIOS_ATA_TRANSLATION_RECHS;
101
- } else if (!strcmp(p, "none")) {
102
- translation = BIOS_ATA_TRANSLATION_NONE;
103
- } else if (!strcmp(p, "lba")) {
104
- translation = BIOS_ATA_TRANSLATION_LBA;
105
- } else if (!strcmp(p, "auto")) {
106
- translation = BIOS_ATA_TRANSLATION_AUTO;
107
- } else {
108
- goto chs_fail;
109
- }
110
- } else if (*p != '\0') {
111
- chs_fail:
112
- error_report("invalid physical CHS format");
113
- exit(1);
114
- }
115
- if (hda_opts != NULL) {
116
- qemu_opt_set_number(hda_opts, "cyls", cyls,
117
- &error_abort);
118
- qemu_opt_set_number(hda_opts, "heads", heads,
119
- &error_abort);
120
- qemu_opt_set_number(hda_opts, "secs", secs,
121
- &error_abort);
122
- if (translation == BIOS_ATA_TRANSLATION_LARGE) {
123
- qemu_opt_set(hda_opts, "trans", "large",
124
- &error_abort);
125
- } else if (translation == BIOS_ATA_TRANSLATION_RECHS) {
126
- qemu_opt_set(hda_opts, "trans", "rechs",
127
- &error_abort);
128
- } else if (translation == BIOS_ATA_TRANSLATION_LBA) {
129
- qemu_opt_set(hda_opts, "trans", "lba",
130
- &error_abort);
131
- } else if (translation == BIOS_ATA_TRANSLATION_NONE) {
132
- qemu_opt_set(hda_opts, "trans", "none",
133
- &error_abort);
134
- }
135
- }
136
- }
137
- error_report("'-hdachs' is deprecated, please use '-device"
138
- " ide-hd,cyls=c,heads=h,secs=s,...' instead");
139
- break;
140
case QEMU_OPTION_numa:
141
opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
142
optarg, true);
143
diff --git a/qemu-doc.texi b/qemu-doc.texi
144
index XXXXXXX..XXXXXXX 100644
145
--- a/qemu-doc.texi
146
+++ b/qemu-doc.texi
147
@@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the
148
``-object filter-dump'' argument which works in combination
149
with the modern ``-netdev`` backends instead.
150
151
-@subsection -hdachs (since 2.10.0)
152
-
153
-The ``-hdachs'' argument is now a synonym for setting
154
-the ``cyls'', ``heads'', ``secs'', and ``trans'' properties
155
-on the ``ide-hd'' device using the ``-device'' argument.
156
-The new syntax allows different settings to be provided
157
-per disk.
158
-
159
@subsection -usbdevice (since 2.10.0)
160
161
The ``-usbdevice DEV'' argument is now a synonym for setting
162
diff --git a/qemu-options.hx b/qemu-options.hx
163
index XXXXXXX..XXXXXXX 100644
164
--- a/qemu-options.hx
165
+++ b/qemu-options.hx
166
@@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type.
167
@item media=@var{media}
168
This option defines the type of the media: disk or cdrom.
169
@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
170
-These options have the same definition as they have in @option{-hdachs}.
171
-These parameters are deprecated, use the corresponding parameters
172
+Force disk physical geometry and the optional BIOS translation (trans=none or
173
+lba). These parameters are deprecated, use the corresponding parameters
174
of @code{-device} instead.
175
@item snapshot=@var{snapshot}
176
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
177
@@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force
178
the write back by pressing @key{C-a s} (@pxref{disk_images}).
179
ETEXI
180
181
-DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \
182
- "-hdachs c,h,s[,t]\n" \
183
- " force hard disk 0 physical geometry and the optional BIOS\n" \
184
- " translation (t=none or lba) (usually QEMU can guess them)\n",
185
- QEMU_ARCH_ALL)
186
-STEXI
187
-@item -hdachs @var{c},@var{h},@var{s},[,@var{t}]
188
-@findex -hdachs
189
-Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <=
190
-@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS
191
-translation mode (@var{t}=none, lba or auto). Usually QEMU can guess
192
-all those parameters. This option is deprecated, please use
193
-@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead.
194
-ETEXI
195
-
196
DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
197
"-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
198
" [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n"
66
--
199
--
67
2.40.1
200
2.13.6
201
202
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
There is no need to suspend activity between aio_disable_external() and
3
Looks like we forgot to announce the deprecation of these options in
4
aio_enable_external(), which is mainly used for the block layer's drain
4
the corresponding chapter of the qemu-doc text, so let's do that now.
5
operation.
6
5
7
This is part of ongoing work to remove the aio_disable_external() API.
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
7
Reviewed-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
Reviewed-by: Paul Durrant <paul@xen.org>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-Id: <20230516190238.8401-9-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
10
---
16
hw/i386/kvm/xen_xenstore.c | 2 +-
11
qemu-doc.texi | 15 +++++++++++++++
17
1 file changed, 1 insertion(+), 1 deletion(-)
12
1 file changed, 15 insertions(+)
18
13
19
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
14
diff --git a/qemu-doc.texi b/qemu-doc.texi
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/i386/kvm/xen_xenstore.c
16
--- a/qemu-doc.texi
22
+++ b/hw/i386/kvm/xen_xenstore.c
17
+++ b/qemu-doc.texi
23
@@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
18
@@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU.
24
error_setg(errp, "Xenstore evtchn port init failed");
19
The ``-drive if=scsi'' argument is replaced by the the
25
return;
20
``-device BUS-TYPE'' argument combined with ``-drive if=none''.
26
}
21
27
- aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
22
+@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
28
+ aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false,
23
+
29
xen_xenstore_event, NULL, NULL, NULL, s);
24
+The drive geometry arguments are replaced by the the geometry arguments
30
25
+that can be specified with the ``-device'' parameter.
31
s->impl = xs_impl_create(xen_domid);
26
+
27
+@subsection -drive serial=... (since 2.10.0)
28
+
29
+The drive serial argument is replaced by the the serial argument
30
+that can be specified with the ``-device'' parameter.
31
+
32
+@subsection -drive addr=... (since 2.10.0)
33
+
34
+The drive addr argument is replaced by the the addr argument
35
+that can be specified with the ``-device'' parameter.
36
+
37
@subsection -net dump (since 2.10.0)
38
39
The ``--net dump'' argument is now replaced with the
32
--
40
--
33
2.40.1
41
2.13.6
42
43
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Fam Zheng <famz@redhat.com>
2
2
3
All callers now pass is_external=false to aio_set_fd_handler() and
3
Signed-off-by: Fam Zheng <famz@redhat.com>
4
aio_set_event_notifier(). The aio_disable_external() API that
5
temporarily disables fd handlers that were registered is_external=true
6
is therefore dead code.
7
8
Remove aio_disable_external(), aio_enable_external(), and the
9
is_external arguments to aio_set_fd_handler() and
10
aio_set_event_notifier().
11
12
The entire test-fdmon-epoll test is removed because its sole purpose was
13
testing aio_disable_external().
14
15
Parts of this patch were generated using the following coccinelle
16
(https://coccinelle.lip6.fr/) semantic patch:
17
18
@@
19
expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque;
20
@@
21
- aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque)
22
+ aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque)
23
24
@@
25
expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready;
26
@@
27
- aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready)
28
+ aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready)
29
30
Reviewed-by: Juan Quintela <quintela@redhat.com>
31
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
32
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
33
Message-Id: <20230516190238.8401-21-stefanha@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
35
---
5
---
36
include/block/aio.h | 57 ------------------------
6
include/block/block_int.h | 1 -
37
util/aio-posix.h | 1 -
7
block/io.c | 18 ------------------
38
block.c | 7 ---
8
2 files changed, 19 deletions(-)
39
block/blkio.c | 15 +++----
40
block/curl.c | 10 ++---
41
block/export/fuse.c | 8 ++--
42
block/export/vduse-blk.c | 10 ++---
43
block/io.c | 2 -
44
block/io_uring.c | 4 +-
45
block/iscsi.c | 3 +-
46
block/linux-aio.c | 4 +-
47
block/nfs.c | 5 +--
48
block/nvme.c | 8 ++--
49
block/ssh.c | 4 +-
50
block/win32-aio.c | 6 +--
51
hw/i386/kvm/xen_xenstore.c | 2 +-
52
hw/virtio/virtio.c | 6 +--
53
hw/xen/xen-bus.c | 8 ++--
54
io/channel-command.c | 6 +--
55
io/channel-file.c | 3 +-
56
io/channel-socket.c | 3 +-
57
migration/rdma.c | 16 +++----
58
tests/unit/test-aio.c | 27 +-----------
59
tests/unit/test-bdrv-drain.c | 1 -
60
tests/unit/test-fdmon-epoll.c | 73 -------------------------------
61
tests/unit/test-nested-aio-poll.c | 9 ++--
62
util/aio-posix.c | 20 +++------
63
util/aio-win32.c | 8 +---
64
util/async.c | 3 +-
65
util/fdmon-epoll.c | 10 -----
66
util/fdmon-io_uring.c | 8 +---
67
util/fdmon-poll.c | 3 +-
68
util/main-loop.c | 7 ++-
69
util/qemu-coroutine-io.c | 7 ++-
70
util/vhost-user-server.c | 11 +++--
71
tests/unit/meson.build | 3 --
72
36 files changed, 80 insertions(+), 298 deletions(-)
73
delete mode 100644 tests/unit/test-fdmon-epoll.c
74
9
75
diff --git a/include/block/aio.h b/include/block/aio.h
10
diff --git a/include/block/block_int.h b/include/block/block_int.h
76
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
77
--- a/include/block/aio.h
12
--- a/include/block/block_int.h
78
+++ b/include/block/aio.h
13
+++ b/include/block/block_int.h
79
@@ -XXX,XX +XXX,XX @@ struct AioContext {
14
@@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk);
80
*/
15
bool blk_dev_is_medium_locked(BlockBackend *blk);
81
QEMUTimerListGroup tlg;
16
82
17
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
83
- int external_disable_cnt;
18
-bool bdrv_requests_pending(BlockDriverState *bs);
84
-
19
85
/* Number of AioHandlers without .io_poll() */
20
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
86
int poll_disable_cnt;
21
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
87
88
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking);
89
*/
90
void aio_set_fd_handler(AioContext *ctx,
91
int fd,
92
- bool is_external,
93
IOHandler *io_read,
94
IOHandler *io_write,
95
AioPollFn *io_poll,
96
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
97
*/
98
void aio_set_event_notifier(AioContext *ctx,
99
EventNotifier *notifier,
100
- bool is_external,
101
EventNotifierHandler *io_read,
102
AioPollFn *io_poll,
103
EventNotifierHandler *io_poll_ready);
104
@@ -XXX,XX +XXX,XX @@ static inline void aio_timer_init(AioContext *ctx,
105
*/
106
int64_t aio_compute_timeout(AioContext *ctx);
107
108
-/**
109
- * aio_disable_external:
110
- * @ctx: the aio context
111
- *
112
- * Disable the further processing of external clients.
113
- */
114
-static inline void aio_disable_external(AioContext *ctx)
115
-{
116
- qatomic_inc(&ctx->external_disable_cnt);
117
-}
118
-
119
-/**
120
- * aio_enable_external:
121
- * @ctx: the aio context
122
- *
123
- * Enable the processing of external clients.
124
- */
125
-static inline void aio_enable_external(AioContext *ctx)
126
-{
127
- int old;
128
-
129
- old = qatomic_fetch_dec(&ctx->external_disable_cnt);
130
- assert(old > 0);
131
- if (old == 1) {
132
- /* Kick event loop so it re-arms file descriptors */
133
- aio_notify(ctx);
134
- }
135
-}
136
-
137
-/**
138
- * aio_external_disabled:
139
- * @ctx: the aio context
140
- *
141
- * Return true if the external clients are disabled.
142
- */
143
-static inline bool aio_external_disabled(AioContext *ctx)
144
-{
145
- return qatomic_read(&ctx->external_disable_cnt);
146
-}
147
-
148
-/**
149
- * aio_node_check:
150
- * @ctx: the aio context
151
- * @is_external: Whether or not the checked node is an external event source.
152
- *
153
- * Check if the node's is_external flag is okay to be polled by the ctx at this
154
- * moment. True means green light.
155
- */
156
-static inline bool aio_node_check(AioContext *ctx, bool is_external)
157
-{
158
- return !is_external || !qatomic_read(&ctx->external_disable_cnt);
159
-}
160
-
161
/**
162
* aio_co_schedule:
163
* @ctx: the aio context
164
diff --git a/util/aio-posix.h b/util/aio-posix.h
165
index XXXXXXX..XXXXXXX 100644
166
--- a/util/aio-posix.h
167
+++ b/util/aio-posix.h
168
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
169
#endif
170
int64_t poll_idle_timeout; /* when to stop userspace polling */
171
bool poll_ready; /* has polling detected an event? */
172
- bool is_external;
173
};
174
175
/* Add a handler to a ready list */
176
diff --git a/block.c b/block.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/block.c
179
+++ b/block.c
180
@@ -XXX,XX +XXX,XX @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
181
bs->drv->bdrv_detach_aio_context(bs);
182
}
183
184
- if (bs->quiesce_counter) {
185
- aio_enable_external(bs->aio_context);
186
- }
187
bs->aio_context = NULL;
188
}
189
190
@@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
191
BdrvAioNotifier *ban, *ban_tmp;
192
GLOBAL_STATE_CODE();
193
194
- if (bs->quiesce_counter) {
195
- aio_disable_external(new_context);
196
- }
197
-
198
bs->aio_context = new_context;
199
200
if (bs->drv && bs->drv->bdrv_attach_aio_context) {
201
diff --git a/block/blkio.c b/block/blkio.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/block/blkio.c
204
+++ b/block/blkio.c
205
@@ -XXX,XX +XXX,XX @@ static void blkio_attach_aio_context(BlockDriverState *bs,
206
{
207
BDRVBlkioState *s = bs->opaque;
208
209
- aio_set_fd_handler(new_context,
210
- s->completion_fd,
211
- false,
212
- blkio_completion_fd_read,
213
- NULL,
214
+ aio_set_fd_handler(new_context, s->completion_fd,
215
+ blkio_completion_fd_read, NULL,
216
blkio_completion_fd_poll,
217
- blkio_completion_fd_poll_ready,
218
- bs);
219
+ blkio_completion_fd_poll_ready, bs);
220
}
221
222
static void blkio_detach_aio_context(BlockDriverState *bs)
223
{
224
BDRVBlkioState *s = bs->opaque;
225
226
- aio_set_fd_handler(bdrv_get_aio_context(bs),
227
- s->completion_fd,
228
- false, NULL, NULL, NULL, NULL, NULL);
229
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
230
+ NULL, NULL, NULL);
231
}
232
233
/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
234
diff --git a/block/curl.c b/block/curl.c
235
index XXXXXXX..XXXXXXX 100644
236
--- a/block/curl.c
237
+++ b/block/curl.c
238
@@ -XXX,XX +XXX,XX @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque)
239
CURLSocket *socket = value;
240
BDRVCURLState *s = socket->s;
241
242
- aio_set_fd_handler(s->aio_context, socket->fd, false,
243
+ aio_set_fd_handler(s->aio_context, socket->fd,
244
NULL, NULL, NULL, NULL, NULL);
245
return true;
246
}
247
@@ -XXX,XX +XXX,XX @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
248
trace_curl_sock_cb(action, (int)fd);
249
switch (action) {
250
case CURL_POLL_IN:
251
- aio_set_fd_handler(s->aio_context, fd, false,
252
+ aio_set_fd_handler(s->aio_context, fd,
253
curl_multi_do, NULL, NULL, NULL, socket);
254
break;
255
case CURL_POLL_OUT:
256
- aio_set_fd_handler(s->aio_context, fd, false,
257
+ aio_set_fd_handler(s->aio_context, fd,
258
NULL, curl_multi_do, NULL, NULL, socket);
259
break;
260
case CURL_POLL_INOUT:
261
- aio_set_fd_handler(s->aio_context, fd, false,
262
+ aio_set_fd_handler(s->aio_context, fd,
263
curl_multi_do, curl_multi_do,
264
NULL, NULL, socket);
265
break;
266
case CURL_POLL_REMOVE:
267
- aio_set_fd_handler(s->aio_context, fd, false,
268
+ aio_set_fd_handler(s->aio_context, fd,
269
NULL, NULL, NULL, NULL, NULL);
270
break;
271
}
272
diff --git a/block/export/fuse.c b/block/export/fuse.c
273
index XXXXXXX..XXXXXXX 100644
274
--- a/block/export/fuse.c
275
+++ b/block/export/fuse.c
276
@@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_begin(void *opaque)
277
FuseExport *exp = opaque;
278
279
aio_set_fd_handler(exp->common.ctx,
280
- fuse_session_fd(exp->fuse_session), false,
281
+ fuse_session_fd(exp->fuse_session),
282
NULL, NULL, NULL, NULL, NULL);
283
exp->fd_handler_set_up = false;
284
}
285
@@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_end(void *opaque)
286
exp->common.ctx = blk_get_aio_context(exp->common.blk);
287
288
aio_set_fd_handler(exp->common.ctx,
289
- fuse_session_fd(exp->fuse_session), false,
290
+ fuse_session_fd(exp->fuse_session),
291
read_from_fuse_export, NULL, NULL, NULL, exp);
292
exp->fd_handler_set_up = true;
293
}
294
@@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
295
g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
296
297
aio_set_fd_handler(exp->common.ctx,
298
- fuse_session_fd(exp->fuse_session), false,
299
+ fuse_session_fd(exp->fuse_session),
300
read_from_fuse_export, NULL, NULL, NULL, exp);
301
exp->fd_handler_set_up = true;
302
303
@@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp)
304
305
if (exp->fd_handler_set_up) {
306
aio_set_fd_handler(exp->common.ctx,
307
- fuse_session_fd(exp->fuse_session), false,
308
+ fuse_session_fd(exp->fuse_session),
309
NULL, NULL, NULL, NULL, NULL);
310
exp->fd_handler_set_up = false;
311
}
312
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/block/export/vduse-blk.c
315
+++ b/block/export/vduse-blk.c
316
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
317
}
318
319
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
320
- false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
321
+ on_vduse_vq_kick, NULL, NULL, NULL, vq);
322
/* Make sure we don't miss any kick afer reconnecting */
323
eventfd_write(vduse_queue_get_fd(vq), 1);
324
}
325
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
326
return;
327
}
328
329
- aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
330
+ aio_set_fd_handler(vblk_exp->export.ctx, fd,
331
NULL, NULL, NULL, NULL, NULL);
332
}
333
334
@@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque)
335
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
336
{
337
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
338
- false, on_vduse_dev_kick, NULL, NULL, NULL,
339
+ on_vduse_dev_kick, NULL, NULL, NULL,
340
vblk_exp->dev);
341
342
/* Virtqueues are handled by vduse_blk_drained_end() */
343
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
344
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
345
{
346
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
347
- false, NULL, NULL, NULL, NULL, NULL);
348
+ NULL, NULL, NULL, NULL, NULL);
349
350
/* Virtqueues are handled by vduse_blk_drained_begin() */
351
}
352
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
353
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
354
}
355
356
- aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false,
357
+ aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
358
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
359
360
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
361
diff --git a/block/io.c b/block/io.c
22
diff --git a/block/io.c b/block/io.c
362
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
363
--- a/block/io.c
24
--- a/block/io.c
364
+++ b/block/io.c
25
+++ b/block/io.c
365
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
26
@@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
366
27
assert(old >= 1);
367
/* Stop things in parent-to-child order */
368
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
369
- aio_disable_external(bdrv_get_aio_context(bs));
370
bdrv_parent_drained_begin(bs, parent);
371
if (bs->drv && bs->drv->bdrv_drain_begin) {
372
bs->drv->bdrv_drain_begin(bs);
373
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
374
bs->drv->bdrv_drain_end(bs);
375
}
376
bdrv_parent_drained_end(bs, parent);
377
- aio_enable_external(bdrv_get_aio_context(bs));
378
}
379
}
28
}
380
29
381
diff --git a/block/io_uring.c b/block/io_uring.c
30
-/* Check if any requests are in-flight (including throttled requests) */
382
index XXXXXXX..XXXXXXX 100644
31
-bool bdrv_requests_pending(BlockDriverState *bs)
383
--- a/block/io_uring.c
384
+++ b/block/io_uring.c
385
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
386
387
void luring_detach_aio_context(LuringState *s, AioContext *old_context)
388
{
389
- aio_set_fd_handler(old_context, s->ring.ring_fd, false,
390
+ aio_set_fd_handler(old_context, s->ring.ring_fd,
391
NULL, NULL, NULL, NULL, s);
392
qemu_bh_delete(s->completion_bh);
393
s->aio_context = NULL;
394
@@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
395
{
396
s->aio_context = new_context;
397
s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
398
- aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
399
+ aio_set_fd_handler(s->aio_context, s->ring.ring_fd,
400
qemu_luring_completion_cb, NULL,
401
qemu_luring_poll_cb, qemu_luring_poll_ready, s);
402
}
403
diff --git a/block/iscsi.c b/block/iscsi.c
404
index XXXXXXX..XXXXXXX 100644
405
--- a/block/iscsi.c
406
+++ b/block/iscsi.c
407
@@ -XXX,XX +XXX,XX @@ iscsi_set_events(IscsiLun *iscsilun)
408
409
if (ev != iscsilun->events) {
410
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
411
- false,
412
(ev & POLLIN) ? iscsi_process_read : NULL,
413
(ev & POLLOUT) ? iscsi_process_write : NULL,
414
NULL, NULL,
415
@@ -XXX,XX +XXX,XX @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
416
IscsiLun *iscsilun = bs->opaque;
417
418
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
419
- false, NULL, NULL, NULL, NULL, NULL);
420
+ NULL, NULL, NULL, NULL, NULL);
421
iscsilun->events = 0;
422
423
if (iscsilun->nop_timer) {
424
diff --git a/block/linux-aio.c b/block/linux-aio.c
425
index XXXXXXX..XXXXXXX 100644
426
--- a/block/linux-aio.c
427
+++ b/block/linux-aio.c
428
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
429
430
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
431
{
432
- aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL);
433
+ aio_set_event_notifier(old_context, &s->e, NULL, NULL, NULL);
434
qemu_bh_delete(s->completion_bh);
435
s->aio_context = NULL;
436
}
437
@@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
438
{
439
s->aio_context = new_context;
440
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
441
- aio_set_event_notifier(new_context, &s->e, false,
442
+ aio_set_event_notifier(new_context, &s->e,
443
qemu_laio_completion_cb,
444
qemu_laio_poll_cb,
445
qemu_laio_poll_ready);
446
diff --git a/block/nfs.c b/block/nfs.c
447
index XXXXXXX..XXXXXXX 100644
448
--- a/block/nfs.c
449
+++ b/block/nfs.c
450
@@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client)
451
int ev = nfs_which_events(client->context);
452
if (ev != client->events) {
453
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
454
- false,
455
(ev & POLLIN) ? nfs_process_read : NULL,
456
(ev & POLLOUT) ? nfs_process_write : NULL,
457
NULL, NULL, client);
458
@@ -XXX,XX +XXX,XX @@ static void nfs_detach_aio_context(BlockDriverState *bs)
459
NFSClient *client = bs->opaque;
460
461
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
462
- false, NULL, NULL, NULL, NULL, NULL);
463
+ NULL, NULL, NULL, NULL, NULL);
464
client->events = 0;
465
}
466
467
@@ -XXX,XX +XXX,XX @@ static void nfs_client_close(NFSClient *client)
468
if (client->context) {
469
qemu_mutex_lock(&client->mutex);
470
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
471
- false, NULL, NULL, NULL, NULL, NULL);
472
+ NULL, NULL, NULL, NULL, NULL);
473
qemu_mutex_unlock(&client->mutex);
474
if (client->fh) {
475
nfs_close(client->context, client->fh);
476
diff --git a/block/nvme.c b/block/nvme.c
477
index XXXXXXX..XXXXXXX 100644
478
--- a/block/nvme.c
479
+++ b/block/nvme.c
480
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
481
}
482
aio_set_event_notifier(bdrv_get_aio_context(bs),
483
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
484
- false, nvme_handle_event, nvme_poll_cb,
485
+ nvme_handle_event, nvme_poll_cb,
486
nvme_poll_ready);
487
488
if (!nvme_identify(bs, namespace, errp)) {
489
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
490
g_free(s->queues);
491
aio_set_event_notifier(bdrv_get_aio_context(bs),
492
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
493
- false, NULL, NULL, NULL);
494
+ NULL, NULL, NULL);
495
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
496
qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
497
0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
498
@@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs)
499
500
aio_set_event_notifier(bdrv_get_aio_context(bs),
501
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
502
- false, NULL, NULL, NULL);
503
+ NULL, NULL, NULL);
504
}
505
506
static void nvme_attach_aio_context(BlockDriverState *bs,
507
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
508
509
s->aio_context = new_context;
510
aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
511
- false, nvme_handle_event, nvme_poll_cb,
512
+ nvme_handle_event, nvme_poll_cb,
513
nvme_poll_ready);
514
515
for (unsigned i = 0; i < s->queue_count; i++) {
516
diff --git a/block/ssh.c b/block/ssh.c
517
index XXXXXXX..XXXXXXX 100644
518
--- a/block/ssh.c
519
+++ b/block/ssh.c
520
@@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque)
521
AioContext *ctx = bdrv_get_aio_context(bs);
522
523
trace_ssh_restart_coroutine(restart->co);
524
- aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL, NULL);
525
+ aio_set_fd_handler(ctx, s->sock, NULL, NULL, NULL, NULL, NULL);
526
527
aio_co_wake(restart->co);
528
}
529
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
530
trace_ssh_co_yield(s->sock, rd_handler, wr_handler);
531
532
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
533
- false, rd_handler, wr_handler, NULL, NULL, &restart);
534
+ rd_handler, wr_handler, NULL, NULL, &restart);
535
qemu_coroutine_yield();
536
trace_ssh_co_yield_back(s->sock);
537
}
538
diff --git a/block/win32-aio.c b/block/win32-aio.c
539
index XXXXXXX..XXXXXXX 100644
540
--- a/block/win32-aio.c
541
+++ b/block/win32-aio.c
542
@@ -XXX,XX +XXX,XX @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
543
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
544
AioContext *old_context)
545
{
546
- aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL, NULL);
547
+ aio_set_event_notifier(old_context, &aio->e, NULL, NULL, NULL);
548
aio->aio_ctx = NULL;
549
}
550
551
@@ -XXX,XX +XXX,XX @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
552
AioContext *new_context)
553
{
554
aio->aio_ctx = new_context;
555
- aio_set_event_notifier(new_context, &aio->e, false,
556
- win32_aio_completion_cb, NULL, NULL);
557
+ aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb,
558
+ NULL, NULL);
559
}
560
561
QEMUWin32AIOState *win32_aio_init(void)
562
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
563
index XXXXXXX..XXXXXXX 100644
564
--- a/hw/i386/kvm/xen_xenstore.c
565
+++ b/hw/i386/kvm/xen_xenstore.c
566
@@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
567
error_setg(errp, "Xenstore evtchn port init failed");
568
return;
569
}
570
- aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false,
571
+ aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
572
xen_xenstore_event, NULL, NULL, NULL, s);
573
574
s->impl = xs_impl_create(xen_domid);
575
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
576
index XXXXXXX..XXXXXXX 100644
577
--- a/hw/virtio/virtio.c
578
+++ b/hw/virtio/virtio.c
579
@@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
580
581
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
582
{
583
- aio_set_event_notifier(ctx, &vq->host_notifier, false,
584
+ aio_set_event_notifier(ctx, &vq->host_notifier,
585
virtio_queue_host_notifier_read,
586
virtio_queue_host_notifier_aio_poll,
587
virtio_queue_host_notifier_aio_poll_ready);
588
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
589
*/
590
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
591
{
592
- aio_set_event_notifier(ctx, &vq->host_notifier, false,
593
+ aio_set_event_notifier(ctx, &vq->host_notifier,
594
virtio_queue_host_notifier_read,
595
NULL, NULL);
596
}
597
598
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
599
{
600
- aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL);
601
+ aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
602
}
603
604
void virtio_queue_host_notifier_read(EventNotifier *n)
605
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
606
index XXXXXXX..XXXXXXX 100644
607
--- a/hw/xen/xen-bus.c
608
+++ b/hw/xen/xen-bus.c
609
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
610
}
611
612
if (channel->ctx)
613
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
614
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
615
NULL, NULL, NULL, NULL, NULL);
616
617
channel->ctx = ctx;
618
if (ctx) {
619
aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
620
- false, xen_device_event, NULL, xen_device_poll,
621
- NULL, channel);
622
+ xen_device_event, NULL, xen_device_poll, NULL,
623
+ channel);
624
}
625
}
626
627
@@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev,
628
629
QLIST_REMOVE(channel, list);
630
631
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
632
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
633
NULL, NULL, NULL, NULL, NULL);
634
635
if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
636
diff --git a/io/channel-command.c b/io/channel-command.c
637
index XXXXXXX..XXXXXXX 100644
638
--- a/io/channel-command.c
639
+++ b/io/channel-command.c
640
@@ -XXX,XX +XXX,XX @@ static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
641
void *opaque)
642
{
643
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
644
- aio_set_fd_handler(ctx, cioc->readfd, false,
645
- io_read, NULL, NULL, NULL, opaque);
646
- aio_set_fd_handler(ctx, cioc->writefd, false,
647
- NULL, io_write, NULL, NULL, opaque);
648
+ aio_set_fd_handler(ctx, cioc->readfd, io_read, NULL, NULL, NULL, opaque);
649
+ aio_set_fd_handler(ctx, cioc->writefd, NULL, io_write, NULL, NULL, opaque);
650
}
651
652
653
diff --git a/io/channel-file.c b/io/channel-file.c
654
index XXXXXXX..XXXXXXX 100644
655
--- a/io/channel-file.c
656
+++ b/io/channel-file.c
657
@@ -XXX,XX +XXX,XX @@ static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
658
void *opaque)
659
{
660
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
661
- aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write,
662
- NULL, NULL, opaque);
663
+ aio_set_fd_handler(ctx, fioc->fd, io_read, io_write, NULL, NULL, opaque);
664
}
665
666
static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
667
diff --git a/io/channel-socket.c b/io/channel-socket.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/io/channel-socket.c
670
+++ b/io/channel-socket.c
671
@@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
672
void *opaque)
673
{
674
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
675
- aio_set_fd_handler(ctx, sioc->fd, false,
676
- io_read, io_write, NULL, NULL, opaque);
677
+ aio_set_fd_handler(ctx, sioc->fd, io_read, io_write, NULL, NULL, opaque);
678
}
679
680
static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
681
diff --git a/migration/rdma.c b/migration/rdma.c
682
index XXXXXXX..XXXXXXX 100644
683
--- a/migration/rdma.c
684
+++ b/migration/rdma.c
685
@@ -XXX,XX +XXX,XX @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
686
{
687
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
688
if (io_read) {
689
- aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd,
690
- false, io_read, io_write, NULL, NULL, opaque);
691
- aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd,
692
- false, io_read, io_write, NULL, NULL, opaque);
693
+ aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, io_read,
694
+ io_write, NULL, NULL, opaque);
695
+ aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, io_read,
696
+ io_write, NULL, NULL, opaque);
697
} else {
698
- aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd,
699
- false, io_read, io_write, NULL, NULL, opaque);
700
- aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd,
701
- false, io_read, io_write, NULL, NULL, opaque);
702
+ aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, io_read,
703
+ io_write, NULL, NULL, opaque);
704
+ aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, io_read,
705
+ io_write, NULL, NULL, opaque);
706
}
707
}
708
709
diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/tests/unit/test-aio.c
712
+++ b/tests/unit/test-aio.c
713
@@ -XXX,XX +XXX,XX @@ static void *test_acquire_thread(void *opaque)
714
static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
715
EventNotifierHandler *handler)
716
{
717
- aio_set_event_notifier(ctx, notifier, false, handler, NULL, NULL);
718
+ aio_set_event_notifier(ctx, notifier, handler, NULL, NULL);
719
}
720
721
static void dummy_notifier_read(EventNotifier *n)
722
@@ -XXX,XX +XXX,XX @@ static void test_flush_event_notifier(void)
723
event_notifier_cleanup(&data.e);
724
}
725
726
-static void test_aio_external_client(void)
727
-{
32
-{
728
- int i, j;
33
- BdrvChild *child;
729
-
34
-
730
- for (i = 1; i < 3; i++) {
35
- if (atomic_read(&bs->in_flight)) {
731
- EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
36
- return true;
732
- event_notifier_init(&data.e, false);
37
- }
733
- aio_set_event_notifier(ctx, &data.e, true, event_ready_cb, NULL, NULL);
38
-
734
- event_notifier_set(&data.e);
39
- QLIST_FOREACH(child, &bs->children, next) {
735
- for (j = 0; j < i; j++) {
40
- if (bdrv_requests_pending(child->bs)) {
736
- aio_disable_external(ctx);
41
- return true;
737
- }
42
- }
738
- for (j = 0; j < i; j++) {
739
- assert(!aio_poll(ctx, false));
740
- assert(event_notifier_test_and_clear(&data.e));
741
- event_notifier_set(&data.e);
742
- aio_enable_external(ctx);
743
- }
744
- assert(aio_poll(ctx, false));
745
- set_event_notifier(ctx, &data.e, NULL);
746
- event_notifier_cleanup(&data.e);
747
- }
43
- }
44
-
45
- return false;
748
-}
46
-}
749
-
47
-
750
static void test_wait_event_notifier_noflush(void)
48
typedef struct {
751
{
49
Coroutine *co;
752
EventNotifierTestData data = { .n = 0 };
50
BlockDriverState *bs;
753
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
754
g_test_add_func("/aio/event/wait", test_wait_event_notifier);
755
g_test_add_func("/aio/event/wait/no-flush-cb", test_wait_event_notifier_noflush);
756
g_test_add_func("/aio/event/flush", test_flush_event_notifier);
757
- g_test_add_func("/aio/external-client", test_aio_external_client);
758
g_test_add_func("/aio/timer/schedule", test_timer_schedule);
759
760
g_test_add_func("/aio/coroutine/queue-chaining", test_queue_chaining);
761
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
762
index XXXXXXX..XXXXXXX 100644
763
--- a/tests/unit/test-bdrv-drain.c
764
+++ b/tests/unit/test-bdrv-drain.c
765
@@ -XXX,XX +XXX,XX @@ static void test_graph_change_drain_all(void)
766
767
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
768
g_assert_cmpint(b_s->drain_count, ==, 0);
769
- g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0);
770
771
bdrv_unref(bs_b);
772
blk_unref(blk_b);
773
diff --git a/tests/unit/test-fdmon-epoll.c b/tests/unit/test-fdmon-epoll.c
774
deleted file mode 100644
775
index XXXXXXX..XXXXXXX
776
--- a/tests/unit/test-fdmon-epoll.c
777
+++ /dev/null
778
@@ -XXX,XX +XXX,XX @@
779
-/* SPDX-License-Identifier: GPL-2.0-or-later */
780
-/*
781
- * fdmon-epoll tests
782
- *
783
- * Copyright (c) 2020 Red Hat, Inc.
784
- */
785
-
786
-#include "qemu/osdep.h"
787
-#include "block/aio.h"
788
-#include "qapi/error.h"
789
-#include "qemu/main-loop.h"
790
-
791
-static AioContext *ctx;
792
-
793
-static void dummy_fd_handler(EventNotifier *notifier)
794
-{
795
- event_notifier_test_and_clear(notifier);
796
-}
797
-
798
-static void add_event_notifiers(EventNotifier *notifiers, size_t n)
799
-{
800
- for (size_t i = 0; i < n; i++) {
801
- event_notifier_init(&notifiers[i], false);
802
- aio_set_event_notifier(ctx, &notifiers[i], false,
803
- dummy_fd_handler, NULL, NULL);
804
- }
805
-}
806
-
807
-static void remove_event_notifiers(EventNotifier *notifiers, size_t n)
808
-{
809
- for (size_t i = 0; i < n; i++) {
810
- aio_set_event_notifier(ctx, &notifiers[i], false, NULL, NULL, NULL);
811
- event_notifier_cleanup(&notifiers[i]);
812
- }
813
-}
814
-
815
-/* Check that fd handlers work when external clients are disabled */
816
-static void test_external_disabled(void)
817
-{
818
- EventNotifier notifiers[100];
819
-
820
- /* fdmon-epoll is only enabled when many fd handlers are registered */
821
- add_event_notifiers(notifiers, G_N_ELEMENTS(notifiers));
822
-
823
- event_notifier_set(&notifiers[0]);
824
- assert(aio_poll(ctx, true));
825
-
826
- aio_disable_external(ctx);
827
- event_notifier_set(&notifiers[0]);
828
- assert(aio_poll(ctx, true));
829
- aio_enable_external(ctx);
830
-
831
- remove_event_notifiers(notifiers, G_N_ELEMENTS(notifiers));
832
-}
833
-
834
-int main(int argc, char **argv)
835
-{
836
- /*
837
- * This code relies on the fact that fdmon-io_uring disables itself when
838
- * the glib main loop is in use. The main loop uses fdmon-poll and upgrades
839
- * to fdmon-epoll when the number of fds exceeds a threshold.
840
- */
841
- qemu_init_main_loop(&error_fatal);
842
- ctx = qemu_get_aio_context();
843
-
844
- while (g_main_context_iteration(NULL, false)) {
845
- /* Do nothing */
846
- }
847
-
848
- g_test_init(&argc, &argv, NULL);
849
- g_test_add_func("/fdmon-epoll/external-disabled", test_external_disabled);
850
- return g_test_run();
851
-}
852
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
853
index XXXXXXX..XXXXXXX 100644
854
--- a/tests/unit/test-nested-aio-poll.c
855
+++ b/tests/unit/test-nested-aio-poll.c
856
@@ -XXX,XX +XXX,XX @@ static void test(void)
857
858
/* Make the event notifier active (set) right away */
859
event_notifier_init(&td.poll_notifier, 1);
860
- aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
861
+ aio_set_event_notifier(td.ctx, &td.poll_notifier,
862
io_read, io_poll_true, io_poll_ready);
863
864
/* This event notifier will be used later */
865
event_notifier_init(&td.dummy_notifier, 0);
866
- aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
867
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier,
868
io_read, io_poll_false, io_poll_never_ready);
869
870
/* Consume aio_notify() */
871
@@ -XXX,XX +XXX,XX @@ static void test(void)
872
/* Run io_poll()/io_poll_ready() one more time to show it keeps working */
873
g_assert(aio_poll(td.ctx, true));
874
875
- aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
876
- NULL, NULL, NULL);
877
- aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
878
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, NULL, NULL, NULL);
879
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, NULL, NULL, NULL);
880
event_notifier_cleanup(&td.dummy_notifier);
881
event_notifier_cleanup(&td.poll_notifier);
882
aio_context_unref(td.ctx);
883
diff --git a/util/aio-posix.c b/util/aio-posix.c
884
index XXXXXXX..XXXXXXX 100644
885
--- a/util/aio-posix.c
886
+++ b/util/aio-posix.c
887
@@ -XXX,XX +XXX,XX @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
888
889
void aio_set_fd_handler(AioContext *ctx,
890
int fd,
891
- bool is_external,
892
IOHandler *io_read,
893
IOHandler *io_write,
894
AioPollFn *io_poll,
895
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
896
new_node->io_poll = io_poll;
897
new_node->io_poll_ready = io_poll_ready;
898
new_node->opaque = opaque;
899
- new_node->is_external = is_external;
900
901
if (is_new) {
902
new_node->pfd.fd = fd;
903
@@ -XXX,XX +XXX,XX @@ static void aio_set_fd_poll(AioContext *ctx, int fd,
904
905
void aio_set_event_notifier(AioContext *ctx,
906
EventNotifier *notifier,
907
- bool is_external,
908
EventNotifierHandler *io_read,
909
AioPollFn *io_poll,
910
EventNotifierHandler *io_poll_ready)
911
{
912
- aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
913
+ aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
914
(IOHandler *)io_read, NULL, io_poll,
915
(IOHandler *)io_poll_ready, notifier);
916
}
917
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx)
918
919
/* TODO should this check poll ready? */
920
revents = node->pfd.revents & node->pfd.events;
921
- if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
922
- aio_node_check(ctx, node->is_external)) {
923
+ if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
924
result = true;
925
break;
926
}
927
- if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
928
- aio_node_check(ctx, node->is_external)) {
929
+ if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
930
result = true;
931
break;
932
}
933
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
934
QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
935
}
936
if (!QLIST_IS_INSERTED(node, node_deleted) &&
937
- poll_ready && revents == 0 &&
938
- aio_node_check(ctx, node->is_external) &&
939
- node->io_poll_ready) {
940
+ poll_ready && revents == 0 && node->io_poll_ready) {
941
/*
942
* Remove temporarily to avoid infinite loops when ->io_poll_ready()
943
* calls aio_poll() before clearing the condition that made the poll
944
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
945
946
if (!QLIST_IS_INSERTED(node, node_deleted) &&
947
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
948
- aio_node_check(ctx, node->is_external) &&
949
node->io_read) {
950
node->io_read(node->opaque);
951
952
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
953
}
954
if (!QLIST_IS_INSERTED(node, node_deleted) &&
955
(revents & (G_IO_OUT | G_IO_ERR)) &&
956
- aio_node_check(ctx, node->is_external) &&
957
node->io_write) {
958
node->io_write(node->opaque);
959
progress = true;
960
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx,
961
AioHandler *tmp;
962
963
QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
964
- if (aio_node_check(ctx, node->is_external) &&
965
- node->io_poll(node->opaque)) {
966
+ if (node->io_poll(node->opaque)) {
967
aio_add_poll_ready_handler(ready_list, node);
968
969
node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
970
diff --git a/util/aio-win32.c b/util/aio-win32.c
971
index XXXXXXX..XXXXXXX 100644
972
--- a/util/aio-win32.c
973
+++ b/util/aio-win32.c
974
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
975
GPollFD pfd;
976
int deleted;
977
void *opaque;
978
- bool is_external;
979
QLIST_ENTRY(AioHandler) node;
980
};
981
982
@@ -XXX,XX +XXX,XX @@ static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
983
984
void aio_set_fd_handler(AioContext *ctx,
985
int fd,
986
- bool is_external,
987
IOHandler *io_read,
988
IOHandler *io_write,
989
AioPollFn *io_poll,
990
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
991
node->opaque = opaque;
992
node->io_read = io_read;
993
node->io_write = io_write;
994
- node->is_external = is_external;
995
996
if (io_read) {
997
bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
998
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
999
1000
void aio_set_event_notifier(AioContext *ctx,
1001
EventNotifier *e,
1002
- bool is_external,
1003
EventNotifierHandler *io_notify,
1004
AioPollFn *io_poll,
1005
EventNotifierHandler *io_poll_ready)
1006
@@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier(AioContext *ctx,
1007
node->e = e;
1008
node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
1009
node->pfd.events = G_IO_IN;
1010
- node->is_external = is_external;
1011
QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
1012
1013
g_source_add_poll(&ctx->source, &node->pfd);
1014
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
1015
/* fill fd sets */
1016
count = 0;
1017
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
1018
- if (!node->deleted && node->io_notify
1019
- && aio_node_check(ctx, node->is_external)) {
1020
+ if (!node->deleted && node->io_notify) {
1021
assert(count < MAXIMUM_WAIT_OBJECTS);
1022
events[count++] = event_notifier_get_handle(node->e);
1023
}
1024
diff --git a/util/async.c b/util/async.c
1025
index XXXXXXX..XXXXXXX 100644
1026
--- a/util/async.c
1027
+++ b/util/async.c
1028
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
1029
g_free(bh);
1030
}
1031
1032
- aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL, NULL);
1033
+ aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
1034
event_notifier_cleanup(&ctx->notifier);
1035
qemu_rec_mutex_destroy(&ctx->lock);
1036
qemu_lockcnt_destroy(&ctx->list_lock);
1037
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
1038
QSLIST_INIT(&ctx->scheduled_coroutines);
1039
1040
aio_set_event_notifier(ctx, &ctx->notifier,
1041
- false,
1042
aio_context_notifier_cb,
1043
aio_context_notifier_poll,
1044
aio_context_notifier_poll_ready);
1045
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
1046
index XXXXXXX..XXXXXXX 100644
1047
--- a/util/fdmon-epoll.c
1048
+++ b/util/fdmon-epoll.c
1049
@@ -XXX,XX +XXX,XX @@ static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
1050
int i, ret = 0;
1051
struct epoll_event events[128];
1052
1053
- /* Fall back while external clients are disabled */
1054
- if (qatomic_read(&ctx->external_disable_cnt)) {
1055
- return fdmon_poll_ops.wait(ctx, ready_list, timeout);
1056
- }
1057
-
1058
if (timeout > 0) {
1059
ret = qemu_poll_ns(&pfd, 1, timeout);
1060
if (ret > 0) {
1061
@@ -XXX,XX +XXX,XX @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
1062
return false;
1063
}
1064
1065
- /* Do not upgrade while external clients are disabled */
1066
- if (qatomic_read(&ctx->external_disable_cnt)) {
1067
- return false;
1068
- }
1069
-
1070
if (npfd < EPOLL_ENABLE_THRESHOLD) {
1071
return false;
1072
}
1073
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
1074
index XXXXXXX..XXXXXXX 100644
1075
--- a/util/fdmon-io_uring.c
1076
+++ b/util/fdmon-io_uring.c
1077
@@ -XXX,XX +XXX,XX @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
1078
unsigned wait_nr = 1; /* block until at least one cqe is ready */
1079
int ret;
1080
1081
- /* Fall back while external clients are disabled */
1082
- if (qatomic_read(&ctx->external_disable_cnt)) {
1083
- return fdmon_poll_ops.wait(ctx, ready_list, timeout);
1084
- }
1085
-
1086
if (timeout == 0) {
1087
wait_nr = 0; /* non-blocking */
1088
} else if (timeout > 0) {
1089
@@ -XXX,XX +XXX,XX @@ static bool fdmon_io_uring_need_wait(AioContext *ctx)
1090
return true;
1091
}
1092
1093
- /* Are we falling back to fdmon-poll? */
1094
- return qatomic_read(&ctx->external_disable_cnt);
1095
+ return false;
1096
}
1097
1098
static const FDMonOps fdmon_io_uring_ops = {
1099
diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c
1100
index XXXXXXX..XXXXXXX 100644
1101
--- a/util/fdmon-poll.c
1102
+++ b/util/fdmon-poll.c
1103
@@ -XXX,XX +XXX,XX @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
1104
assert(npfd == 0);
1105
1106
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
1107
- if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
1108
- && aio_node_check(ctx, node->is_external)) {
1109
+ if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
1110
add_pollfd(node);
1111
}
1112
}
1113
diff --git a/util/main-loop.c b/util/main-loop.c
1114
index XXXXXXX..XXXXXXX 100644
1115
--- a/util/main-loop.c
1116
+++ b/util/main-loop.c
1117
@@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd,
1118
void *opaque)
1119
{
1120
iohandler_init();
1121
- aio_set_fd_handler(iohandler_ctx, fd, false,
1122
- fd_read, fd_write, NULL, NULL, opaque);
1123
+ aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, NULL, NULL,
1124
+ opaque);
1125
}
1126
1127
void event_notifier_set_handler(EventNotifier *e,
1128
EventNotifierHandler *handler)
1129
{
1130
iohandler_init();
1131
- aio_set_event_notifier(iohandler_ctx, e, false,
1132
- handler, NULL, NULL);
1133
+ aio_set_event_notifier(iohandler_ctx, e, handler, NULL, NULL);
1134
}
1135
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
1136
index XXXXXXX..XXXXXXX 100644
1137
--- a/util/qemu-coroutine-io.c
1138
+++ b/util/qemu-coroutine-io.c
1139
@@ -XXX,XX +XXX,XX @@ typedef struct {
1140
static void fd_coroutine_enter(void *opaque)
1141
{
1142
FDYieldUntilData *data = opaque;
1143
- aio_set_fd_handler(data->ctx, data->fd, false,
1144
- NULL, NULL, NULL, NULL, NULL);
1145
+ aio_set_fd_handler(data->ctx, data->fd, NULL, NULL, NULL, NULL, NULL);
1146
qemu_coroutine_enter(data->co);
1147
}
1148
1149
@@ -XXX,XX +XXX,XX @@ void coroutine_fn yield_until_fd_readable(int fd)
1150
data.ctx = qemu_get_current_aio_context();
1151
data.co = qemu_coroutine_self();
1152
data.fd = fd;
1153
- aio_set_fd_handler(
1154
- data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, NULL, &data);
1155
+ aio_set_fd_handler(data.ctx, fd, fd_coroutine_enter, NULL, NULL, NULL,
1156
+ &data);
1157
qemu_coroutine_yield();
1158
}
1159
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
1160
index XXXXXXX..XXXXXXX 100644
1161
--- a/util/vhost-user-server.c
1162
+++ b/util/vhost-user-server.c
1163
@@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
1164
vu_fd_watch->fd = fd;
1165
vu_fd_watch->cb = cb;
1166
qemu_socket_set_nonblock(fd);
1167
- aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler,
1168
+ aio_set_fd_handler(server->ioc->ctx, fd, kick_handler,
1169
NULL, NULL, NULL, vu_fd_watch);
1170
vu_fd_watch->vu_dev = vu_dev;
1171
vu_fd_watch->pvt = pvt;
1172
@@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd)
1173
if (!vu_fd_watch) {
1174
return;
1175
}
1176
- aio_set_fd_handler(server->ioc->ctx, fd, false,
1177
- NULL, NULL, NULL, NULL, NULL);
1178
+ aio_set_fd_handler(server->ioc->ctx, fd, NULL, NULL, NULL, NULL, NULL);
1179
1180
QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
1181
g_free(vu_fd_watch);
1182
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server)
1183
VuFdWatch *vu_fd_watch;
1184
1185
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1186
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
1187
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
1188
NULL, NULL, NULL, NULL, vu_fd_watch);
1189
}
1190
1191
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
1192
qio_channel_attach_aio_context(server->ioc, ctx);
1193
1194
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1195
- aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL,
1196
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
1197
NULL, NULL, vu_fd_watch);
1198
}
1199
1200
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server)
1201
VuFdWatch *vu_fd_watch;
1202
1203
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1204
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
1205
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
1206
NULL, NULL, NULL, NULL, vu_fd_watch);
1207
}
1208
1209
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
1210
index XXXXXXX..XXXXXXX 100644
1211
--- a/tests/unit/meson.build
1212
+++ b/tests/unit/meson.build
1213
@@ -XXX,XX +XXX,XX @@ if have_block
1214
if nettle.found() or gcrypt.found()
1215
tests += {'test-crypto-pbkdf': [io]}
1216
endif
1217
- if config_host_data.get('CONFIG_EPOLL_CREATE1')
1218
- tests += {'test-fdmon-epoll': [testblock]}
1219
- endif
1220
endif
1221
1222
if have_system
1223
--
51
--
1224
2.40.1
52
2.13.6
1225
53
1226
54
diff view generated by jsdifflib
New patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Fam Zheng <famz@redhat.com>
3
---
4
block/io.c | 6 ++++++
5
1 file changed, 6 insertions(+)
1
6
7
diff --git a/block/io.c b/block/io.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/block/io.c
10
+++ b/block/io.c
11
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
12
BdrvNextIterator it;
13
GSList *aio_ctxs = NULL, *ctx;
14
15
+ /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
16
+ * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
17
+ * nodes in several different AioContexts, so make sure we're in the main
18
+ * context. */
19
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
20
+
21
block_job_pause_all();
22
23
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
24
--
25
2.13.6
26
27
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively
2
and also doesn't notify other parent nodes of children, which both means
3
that the child nodes are not actually drained, and bdrv_drained_begin()
4
is providing useful functionality only on a single node.
2
5
3
Each vhost-user-blk request runs in a coroutine. When the BlockBackend
6
To keep things consistent, we also shouldn't call the block driver
4
enters a drained section we need to enter a quiescent state. Currently
7
callbacks recursively.
5
any in-flight requests race with bdrv_drained_begin() because it is
6
unaware of vhost-user-blk requests.
7
8
8
When blk_co_preadv/pwritev()/etc returns it wakes the
9
A proper recursive drain version that provides an actually working
9
bdrv_drained_begin() thread but vhost-user-blk request processing has
10
drained section for child nodes will be introduced later.
10
not yet finished. The request coroutine continues executing while the
11
main loop thread thinks it is in a drained section.
12
11
13
One example where this is unsafe is for blk_set_aio_context() where
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
bdrv_drained_begin() is called before .aio_context_detached() and
13
Reviewed-by: Fam Zheng <famz@redhat.com>
15
.aio_context_attach(). If request coroutines are still running after
14
---
16
bdrv_drained_begin(), then the AioContext could change underneath them
15
block/io.c | 16 +++++++++-------
17
and they race with new requests processed in the new AioContext. This
16
1 file changed, 9 insertions(+), 7 deletions(-)
18
could lead to virtqueue corruption, for example.
19
17
20
(This example is theoretical, I came across this while reading the
18
diff --git a/block/io.c b/block/io.c
21
code and have not tried to reproduce it.)
22
23
It's easy to make bdrv_drained_begin() wait for in-flight requests: add
24
a .drained_poll() callback that checks the VuServer's in-flight counter.
25
VuServer just needs an API that returns true when there are requests in
26
flight. The in-flight counter needs to be atomic.
27
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
30
Message-Id: <20230516190238.8401-7-stefanha@redhat.com>
31
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
32
---
33
include/qemu/vhost-user-server.h | 4 +++-
34
block/export/vhost-user-blk-server.c | 13 +++++++++++++
35
util/vhost-user-server.c | 18 ++++++++++++------
36
3 files changed, 28 insertions(+), 7 deletions(-)
37
38
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
39
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
40
--- a/include/qemu/vhost-user-server.h
20
--- a/block/io.c
41
+++ b/include/qemu/vhost-user-server.h
21
+++ b/block/io.c
42
@@ -XXX,XX +XXX,XX @@ typedef struct {
22
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
43
int max_queues;
44
const VuDevIface *vu_iface;
45
46
+ unsigned int in_flight; /* atomic */
47
+
48
/* Protected by ctx lock */
49
- unsigned int in_flight;
50
bool wait_idle;
51
VuDev vu_dev;
52
QIOChannel *ioc; /* The I/O channel with the client */
53
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server);
54
55
void vhost_user_server_inc_in_flight(VuServer *server);
56
void vhost_user_server_dec_in_flight(VuServer *server);
57
+bool vhost_user_server_has_in_flight(VuServer *server);
58
59
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
60
void vhost_user_server_detach_aio_context(VuServer *server);
61
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/block/export/vhost-user-blk-server.c
64
+++ b/block/export/vhost-user-blk-server.c
65
@@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque)
66
vu_config_change_msg(&vexp->vu_server.vu_dev);
67
}
23
}
68
24
69
+/*
25
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
70
+ * Ensures that bdrv_drained_begin() waits until in-flight requests complete.
26
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
71
+ *
27
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
72
+ * Called with vexp->export.ctx acquired.
73
+ */
74
+static bool vu_blk_drained_poll(void *opaque)
75
+{
76
+ VuBlkExport *vexp = opaque;
77
+
78
+ return vhost_user_server_has_in_flight(&vexp->vu_server);
79
+}
80
+
81
static const BlockDevOps vu_blk_dev_ops = {
82
+ .drained_poll = vu_blk_drained_poll,
83
.resize_cb = vu_blk_exp_resize,
84
};
85
86
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/util/vhost-user-server.c
89
+++ b/util/vhost-user-server.c
90
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
91
void vhost_user_server_inc_in_flight(VuServer *server)
92
{
28
{
93
assert(!server->wait_idle);
29
BdrvChild *child, *tmp;
94
- server->in_flight++;
30
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
95
+ qatomic_inc(&server->in_flight);
31
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
96
}
32
bdrv_coroutine_enter(bs, data.co);
97
33
BDRV_POLL_WHILE(bs, !data.done);
98
void vhost_user_server_dec_in_flight(VuServer *server)
34
99
{
35
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
100
- server->in_flight--;
36
- bdrv_drain_invoke(child->bs, begin);
101
- if (server->wait_idle && !server->in_flight) {
37
+ if (recursive) {
102
- aio_co_wake(server->co_trip);
38
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
103
+ if (qatomic_fetch_dec(&server->in_flight) == 1) {
39
+ bdrv_drain_invoke(child->bs, begin, true);
104
+ if (server->wait_idle) {
105
+ aio_co_wake(server->co_trip);
106
+ }
40
+ }
107
}
41
}
108
}
42
}
109
43
110
+bool vhost_user_server_has_in_flight(VuServer *server)
44
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
111
+{
45
bdrv_parent_drained_begin(bs);
112
+ return qatomic_load_acquire(&server->in_flight) > 0;
113
+}
114
+
115
static bool coroutine_fn
116
vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
117
{
118
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque)
119
/* Keep running */
120
}
46
}
121
47
122
- if (server->in_flight) {
48
- bdrv_drain_invoke(bs, true);
123
+ if (vhost_user_server_has_in_flight(server)) {
49
+ bdrv_drain_invoke(bs, true, false);
124
/* Wait for requests to complete before we can unmap the memory */
50
bdrv_drain_recurse(bs);
125
server->wait_idle = true;
51
}
126
qemu_coroutine_yield();
52
127
server->wait_idle = false;
53
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
128
}
54
}
129
- assert(server->in_flight == 0);
55
130
+ assert(!vhost_user_server_has_in_flight(server));
56
/* Re-enable things in child-to-parent order */
131
57
- bdrv_drain_invoke(bs, false);
132
vu_deinit(vu_dev);
58
+ bdrv_drain_invoke(bs, false, false);
133
59
bdrv_parent_drained_end(bs);
60
aio_enable_external(bdrv_get_aio_context(bs));
61
}
62
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
63
aio_context_acquire(aio_context);
64
aio_disable_external(aio_context);
65
bdrv_parent_drained_begin(bs);
66
- bdrv_drain_invoke(bs, true);
67
+ bdrv_drain_invoke(bs, true, true);
68
aio_context_release(aio_context);
69
70
if (!g_slist_find(aio_ctxs, aio_context)) {
71
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
72
73
/* Re-enable things in child-to-parent order */
74
aio_context_acquire(aio_context);
75
- bdrv_drain_invoke(bs, false);
76
+ bdrv_drain_invoke(bs, false, true);
77
bdrv_parent_drained_end(bs);
78
aio_enable_external(aio_context);
79
aio_context_release(aio_context);
134
--
80
--
135
2.40.1
81
2.13.6
82
83
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
The existing test is for bdrv_drain_all_begin/end() only. Generalise the
2
test case so that it can be run for the other variants as well. At the
3
moment this is only bdrv_drain_begin/end(), but in a while, we'll add
4
another one.
2
5
3
The virtio-scsi Host Bus Adapter provides access to devices on a SCSI
6
Also, add a backing file to the test node to test whether the operations
4
bus. Those SCSI devices typically have a BlockBackend. When the
7
work recursively.
5
BlockBackend enters a drained section, the SCSI device must temporarily
6
stop submitting new I/O requests.
7
8
8
Implement this behavior by temporarily stopping virtio-scsi virtqueue
9
processing when one of the SCSI devices enters a drained section. The
10
new scsi_device_drained_begin() API allows scsi-disk to message the
11
virtio-scsi HBA.
12
13
scsi_device_drained_begin() uses a drain counter so that multiple SCSI
14
devices can have overlapping drained sections. The HBA only sees one
15
pair of .drained_begin/end() calls.
16
17
After this commit, virtio-scsi no longer depends on hw/virtio's
18
ioeventfd aio_set_event_notifier(is_external=true). This commit is a
19
step towards removing the aio_disable_external() API.
20
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-Id: <20230516190238.8401-19-stefanha@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
---
10
---
25
include/hw/scsi/scsi.h | 14 ++++++++++++
11
tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-----
26
hw/scsi/scsi-bus.c | 40 +++++++++++++++++++++++++++++++++
12
1 file changed, 62 insertions(+), 7 deletions(-)
27
hw/scsi/scsi-disk.c | 27 +++++++++++++++++-----
28
hw/scsi/virtio-scsi-dataplane.c | 18 +++++++++------
29
hw/scsi/virtio-scsi.c | 38 +++++++++++++++++++++++++++++++
30
hw/scsi/trace-events | 2 ++
31
6 files changed, 127 insertions(+), 12 deletions(-)
32
13
33
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
34
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
35
--- a/include/hw/scsi/scsi.h
16
--- a/tests/test-bdrv-drain.c
36
+++ b/include/hw/scsi/scsi.h
17
+++ b/tests/test-bdrv-drain.c
37
@@ -XXX,XX +XXX,XX @@ struct SCSIBusInfo {
18
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
38
void (*save_request)(QEMUFile *f, SCSIRequest *req);
19
39
void *(*load_request)(QEMUFile *f, SCSIRequest *req);
20
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
40
void (*free_request)(SCSIBus *bus, void *priv);
21
.bdrv_co_drain_end = bdrv_test_co_drain_end,
41
+
22
+
42
+ /*
23
+ .bdrv_child_perm = bdrv_format_default_perms,
43
+ * Temporarily stop submitting new requests between drained_begin() and
44
+ * drained_end(). Called from the main loop thread with the BQL held.
45
+ *
46
+ * Implement these callbacks if request processing is triggered by a file
47
+ * descriptor like an EventNotifier. Otherwise set them to NULL.
48
+ */
49
+ void (*drained_begin)(SCSIBus *bus);
50
+ void (*drained_end)(SCSIBus *bus);
51
};
24
};
52
25
53
#define TYPE_SCSI_BUS "SCSI"
26
static void aio_ret_cb(void *opaque, int ret)
54
@@ -XXX,XX +XXX,XX @@ struct SCSIBus {
27
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
55
28
*aio_ret = ret;
56
SCSISense unit_attention;
29
}
57
const SCSIBusInfo *info;
30
31
-static void test_drv_cb_drain_all(void)
32
+enum drain_type {
33
+ BDRV_DRAIN_ALL,
34
+ BDRV_DRAIN,
35
+};
58
+
36
+
59
+ int drain_count; /* protected by BQL */
37
+static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
60
};
61
62
/**
63
@@ -XXX,XX +XXX,XX @@ void scsi_req_cancel_complete(SCSIRequest *req);
64
void scsi_req_cancel(SCSIRequest *req);
65
void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier);
66
void scsi_req_retry(SCSIRequest *req);
67
+void scsi_device_drained_begin(SCSIDevice *sdev);
68
+void scsi_device_drained_end(SCSIDevice *sdev);
69
void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
70
void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense);
71
void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
72
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/hw/scsi/scsi-bus.c
75
+++ b/hw/scsi/scsi-bus.c
76
@@ -XXX,XX +XXX,XX @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
77
scsi_device_set_ua(sdev, sense);
78
}
79
80
+void scsi_device_drained_begin(SCSIDevice *sdev)
81
+{
38
+{
82
+ SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus);
39
+ switch (drain_type) {
83
+ if (!bus) {
40
+ case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
84
+ return;
41
+ case BDRV_DRAIN: bdrv_drained_begin(bs); break;
85
+ }
42
+ default: g_assert_not_reached();
86
+
87
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
88
+ assert(bus->drain_count < INT_MAX);
89
+
90
+ /*
91
+ * Multiple BlockBackends can be on a SCSIBus and each may begin/end
92
+ * draining at any time. Keep a counter so HBAs only see begin/end once.
93
+ */
94
+ if (bus->drain_count++ == 0) {
95
+ trace_scsi_bus_drained_begin(bus, sdev);
96
+ if (bus->info->drained_begin) {
97
+ bus->info->drained_begin(bus);
98
+ }
99
+ }
43
+ }
100
+}
44
+}
101
+
45
+
102
+void scsi_device_drained_end(SCSIDevice *sdev)
46
+static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
103
+{
47
+{
104
+ SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus);
48
+ switch (drain_type) {
105
+ if (!bus) {
49
+ case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
106
+ return;
50
+ case BDRV_DRAIN: bdrv_drained_end(bs); break;
107
+ }
51
+ default: g_assert_not_reached();
108
+
109
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
110
+ assert(bus->drain_count > 0);
111
+
112
+ if (bus->drain_count-- == 1) {
113
+ trace_scsi_bus_drained_end(bus, sdev);
114
+ if (bus->info->drained_end) {
115
+ bus->info->drained_end(bus);
116
+ }
117
+ }
52
+ }
118
+}
53
+}
119
+
54
+
120
static char *scsibus_get_dev_path(DeviceState *dev)
55
+static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
121
{
56
{
122
SCSIDevice *d = SCSI_DEVICE(dev);
57
BlockBackend *blk;
123
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
58
- BlockDriverState *bs;
124
index XXXXXXX..XXXXXXX 100644
59
- BDRVTestState *s;
125
--- a/hw/scsi/scsi-disk.c
60
+ BlockDriverState *bs, *backing;
126
+++ b/hw/scsi/scsi-disk.c
61
+ BDRVTestState *s, *backing_s;
127
@@ -XXX,XX +XXX,XX @@ static void scsi_disk_reset(DeviceState *dev)
62
BlockAIOCB *acb;
128
s->qdev.scsi_version = s->qdev.default_scsi_version;
63
int aio_ret;
64
65
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
66
s = bs->opaque;
67
blk_insert_bs(blk, bs, &error_abort);
68
69
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
70
+ backing_s = backing->opaque;
71
+ bdrv_set_backing_hd(bs, backing, &error_abort);
72
+
73
/* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
74
g_assert_cmpint(s->drain_count, ==, 0);
75
- bdrv_drain_all_begin();
76
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
77
+
78
+ do_drain_begin(drain_type, bs);
79
+
80
g_assert_cmpint(s->drain_count, ==, 1);
81
- bdrv_drain_all_end();
82
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
83
+
84
+ do_drain_end(drain_type, bs);
85
+
86
g_assert_cmpint(s->drain_count, ==, 0);
87
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
88
89
/* Now do the same while a request is pending */
90
aio_ret = -EINPROGRESS;
91
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
92
g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
93
94
g_assert_cmpint(s->drain_count, ==, 0);
95
- bdrv_drain_all_begin();
96
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
97
+
98
+ do_drain_begin(drain_type, bs);
99
+
100
g_assert_cmpint(aio_ret, ==, 0);
101
g_assert_cmpint(s->drain_count, ==, 1);
102
- bdrv_drain_all_end();
103
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
104
+
105
+ do_drain_end(drain_type, bs);
106
+
107
g_assert_cmpint(s->drain_count, ==, 0);
108
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
109
110
+ bdrv_unref(backing);
111
bdrv_unref(bs);
112
blk_unref(blk);
129
}
113
}
130
114
131
+static void scsi_disk_drained_begin(void *opaque)
115
+static void test_drv_cb_drain_all(void)
132
+{
116
+{
133
+ SCSIDiskState *s = opaque;
117
+ test_drv_cb_common(BDRV_DRAIN_ALL, true);
134
+
135
+ scsi_device_drained_begin(&s->qdev);
136
+}
118
+}
137
+
119
+
138
+static void scsi_disk_drained_end(void *opaque)
120
+static void test_drv_cb_drain(void)
139
+{
121
+{
140
+ SCSIDiskState *s = opaque;
122
+ test_drv_cb_common(BDRV_DRAIN, false);
141
+
142
+ scsi_device_drained_end(&s->qdev);
143
+}
123
+}
144
+
124
+
145
static void scsi_disk_resize_cb(void *opaque)
125
int main(int argc, char **argv)
146
{
126
{
147
SCSIDiskState *s = opaque;
127
bdrv_init();
148
@@ -XXX,XX +XXX,XX @@ static bool scsi_cd_is_medium_locked(void *opaque)
128
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
129
g_test_init(&argc, &argv, NULL);
130
131
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
132
+ g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
133
134
return g_test_run();
149
}
135
}
150
151
static const BlockDevOps scsi_disk_removable_block_ops = {
152
- .change_media_cb = scsi_cd_change_media_cb,
153
+ .change_media_cb = scsi_cd_change_media_cb,
154
+ .drained_begin = scsi_disk_drained_begin,
155
+ .drained_end = scsi_disk_drained_end,
156
.eject_request_cb = scsi_cd_eject_request_cb,
157
- .is_tray_open = scsi_cd_is_tray_open,
158
.is_medium_locked = scsi_cd_is_medium_locked,
159
-
160
- .resize_cb = scsi_disk_resize_cb,
161
+ .is_tray_open = scsi_cd_is_tray_open,
162
+ .resize_cb = scsi_disk_resize_cb,
163
};
164
165
static const BlockDevOps scsi_disk_block_ops = {
166
- .resize_cb = scsi_disk_resize_cb,
167
+ .drained_begin = scsi_disk_drained_begin,
168
+ .drained_end = scsi_disk_drained_end,
169
+ .resize_cb = scsi_disk_resize_cb,
170
};
171
172
static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
173
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
174
index XXXXXXX..XXXXXXX 100644
175
--- a/hw/scsi/virtio-scsi-dataplane.c
176
+++ b/hw/scsi/virtio-scsi-dataplane.c
177
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
178
s->dataplane_starting = false;
179
s->dataplane_started = true;
180
181
- aio_context_acquire(s->ctx);
182
- virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
183
- virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
184
+ if (s->bus.drain_count == 0) {
185
+ aio_context_acquire(s->ctx);
186
+ virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
187
+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
188
189
- for (i = 0; i < vs->conf.num_queues; i++) {
190
- virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
191
+ for (i = 0; i < vs->conf.num_queues; i++) {
192
+ virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
193
+ }
194
+ aio_context_release(s->ctx);
195
}
196
- aio_context_release(s->ctx);
197
return 0;
198
199
fail_host_notifiers:
200
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
201
}
202
s->dataplane_stopping = true;
203
204
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
205
+ if (s->bus.drain_count == 0) {
206
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
207
+ }
208
209
blk_drain_all(); /* ensure there are no in-flight requests */
210
211
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
212
index XXXXXXX..XXXXXXX 100644
213
--- a/hw/scsi/virtio-scsi.c
214
+++ b/hw/scsi/virtio-scsi.c
215
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
216
}
217
}
218
219
+/* Suspend virtqueue ioeventfd processing during drain */
220
+static void virtio_scsi_drained_begin(SCSIBus *bus)
221
+{
222
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
223
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
224
+ uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
225
+ s->parent_obj.conf.num_queues;
226
+
227
+ if (!s->dataplane_started) {
228
+ return;
229
+ }
230
+
231
+ for (uint32_t i = 0; i < total_queues; i++) {
232
+ VirtQueue *vq = virtio_get_queue(vdev, i);
233
+ virtio_queue_aio_detach_host_notifier(vq, s->ctx);
234
+ }
235
+}
236
+
237
+/* Resume virtqueue ioeventfd processing after drain */
238
+static void virtio_scsi_drained_end(SCSIBus *bus)
239
+{
240
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
241
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
242
+ uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
243
+ s->parent_obj.conf.num_queues;
244
+
245
+ if (!s->dataplane_started) {
246
+ return;
247
+ }
248
+
249
+ for (uint32_t i = 0; i < total_queues; i++) {
250
+ VirtQueue *vq = virtio_get_queue(vdev, i);
251
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
252
+ }
253
+}
254
+
255
static struct SCSIBusInfo virtio_scsi_scsi_info = {
256
.tcq = true,
257
.max_channel = VIRTIO_SCSI_MAX_CHANNEL,
258
@@ -XXX,XX +XXX,XX @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
259
.get_sg_list = virtio_scsi_get_sg_list,
260
.save_request = virtio_scsi_save_request,
261
.load_request = virtio_scsi_load_request,
262
+ .drained_begin = virtio_scsi_drained_begin,
263
+ .drained_end = virtio_scsi_drained_end,
264
};
265
266
void virtio_scsi_common_realize(DeviceState *dev,
267
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
268
index XXXXXXX..XXXXXXX 100644
269
--- a/hw/scsi/trace-events
270
+++ b/hw/scsi/trace-events
271
@@ -XXX,XX +XXX,XX @@ scsi_req_cancel(int target, int lun, int tag) "target %d lun %d tag %d"
272
scsi_req_data(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
273
scsi_req_data_canceled(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
274
scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d"
275
+scsi_bus_drained_begin(void *bus, void *sdev) "bus %p sdev %p"
276
+scsi_bus_drained_end(void *bus, void *sdev) "bus %p sdev %p"
277
scsi_req_continue(int target, int lun, int tag) "target %d lun %d tag %d"
278
scsi_req_continue_canceled(int target, int lun, int tag) "target %d lun %d tag %d"
279
scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer) "target %d lun %d tag %d command %d dir %d length %d"
280
--
136
--
281
2.40.1
137
2.13.6
138
139
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
This is currently only working correctly for bdrv_drain(), not for
2
bdrv_drain_all(). Leave a comment for the drain_all case, we'll address
3
it later.
2
4
3
This is part of ongoing work to remove the aio_disable_external() API.
4
5
Use BlockDevOps .drained_begin/end/poll() instead of
6
aio_set_fd_handler(is_external=true).
7
8
As a side-effect the FUSE export now follows AioContext changes like the
9
other export types.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-Id: <20230516190238.8401-16-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
6
---
16
block/export/fuse.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
7
tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
17
1 file changed, 54 insertions(+), 2 deletions(-)
8
1 file changed, 45 insertions(+)
18
9
19
diff --git a/block/export/fuse.c b/block/export/fuse.c
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/block/export/fuse.c
12
--- a/tests/test-bdrv-drain.c
22
+++ b/block/export/fuse.c
13
+++ b/tests/test-bdrv-drain.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct FuseExport {
14
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
24
15
test_drv_cb_common(BDRV_DRAIN, false);
25
struct fuse_session *fuse_session;
16
}
26
struct fuse_buf fuse_buf;
17
27
+ unsigned int in_flight; /* atomic */
18
+static void test_quiesce_common(enum drain_type drain_type, bool recursive)
28
bool mounted, fd_handler_set_up;
29
30
char *mountpoint;
31
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque);
32
static bool is_regular_file(const char *path, Error **errp);
33
34
35
+static void fuse_export_drained_begin(void *opaque)
36
+{
19
+{
37
+ FuseExport *exp = opaque;
20
+ BlockBackend *blk;
21
+ BlockDriverState *bs, *backing;
38
+
22
+
39
+ aio_set_fd_handler(exp->common.ctx,
23
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
40
+ fuse_session_fd(exp->fuse_session), false,
24
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
41
+ NULL, NULL, NULL, NULL, NULL);
25
+ &error_abort);
42
+ exp->fd_handler_set_up = false;
26
+ blk_insert_bs(blk, bs, &error_abort);
27
+
28
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
29
+ bdrv_set_backing_hd(bs, backing, &error_abort);
30
+
31
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
32
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
33
+
34
+ do_drain_begin(drain_type, bs);
35
+
36
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
37
+ g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
38
+
39
+ do_drain_end(drain_type, bs);
40
+
41
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
43
+
44
+ bdrv_unref(backing);
45
+ bdrv_unref(bs);
46
+ blk_unref(blk);
43
+}
47
+}
44
+
48
+
45
+static void fuse_export_drained_end(void *opaque)
49
+static void test_quiesce_drain_all(void)
46
+{
50
+{
47
+ FuseExport *exp = opaque;
51
+ // XXX drain_all doesn't quiesce
48
+
52
+ //test_quiesce_common(BDRV_DRAIN_ALL, true);
49
+ /* Refresh AioContext in case it changed */
50
+ exp->common.ctx = blk_get_aio_context(exp->common.blk);
51
+
52
+ aio_set_fd_handler(exp->common.ctx,
53
+ fuse_session_fd(exp->fuse_session), false,
54
+ read_from_fuse_export, NULL, NULL, NULL, exp);
55
+ exp->fd_handler_set_up = true;
56
+}
53
+}
57
+
54
+
58
+static bool fuse_export_drained_poll(void *opaque)
55
+static void test_quiesce_drain(void)
59
+{
56
+{
60
+ FuseExport *exp = opaque;
57
+ test_quiesce_common(BDRV_DRAIN, false);
61
+
62
+ return qatomic_read(&exp->in_flight) > 0;
63
+}
58
+}
64
+
59
+
65
+static const BlockDevOps fuse_export_blk_dev_ops = {
60
int main(int argc, char **argv)
66
+ .drained_begin = fuse_export_drained_begin,
61
{
67
+ .drained_end = fuse_export_drained_end,
62
bdrv_init();
68
+ .drained_poll = fuse_export_drained_poll,
63
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
69
+};
64
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
65
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
66
67
+ g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
68
+ g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
70
+
69
+
71
static int fuse_export_create(BlockExport *blk_exp,
70
return g_test_run();
72
BlockExportOptions *blk_exp_args,
73
Error **errp)
74
@@ -XXX,XX +XXX,XX @@ static int fuse_export_create(BlockExport *blk_exp,
75
}
76
}
77
78
+ blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
79
+
80
+ /*
81
+ * We handle draining ourselves using an in-flight counter and by disabling
82
+ * the FUSE fd handler. Do not queue BlockBackend requests, they need to
83
+ * complete so the in-flight counter reaches zero.
84
+ */
85
+ blk_set_disable_request_queuing(exp->common.blk, true);
86
+
87
init_exports_table();
88
89
/*
90
@@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
91
g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
92
93
aio_set_fd_handler(exp->common.ctx,
94
- fuse_session_fd(exp->fuse_session), true,
95
+ fuse_session_fd(exp->fuse_session), false,
96
read_from_fuse_export, NULL, NULL, NULL, exp);
97
exp->fd_handler_set_up = true;
98
99
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque)
100
101
blk_exp_ref(&exp->common);
102
103
+ qatomic_inc(&exp->in_flight);
104
+
105
do {
106
ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
107
} while (ret == -EINTR);
108
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque)
109
fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
110
111
out:
112
+ if (qatomic_fetch_dec(&exp->in_flight) == 1) {
113
+ aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
114
+ }
115
+
116
blk_exp_unref(&exp->common);
117
}
71
}
118
119
@@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp)
120
121
if (exp->fd_handler_set_up) {
122
aio_set_fd_handler(exp->common.ctx,
123
- fuse_session_fd(exp->fuse_session), true,
124
+ fuse_session_fd(exp->fuse_session), false,
125
NULL, NULL, NULL, NULL, NULL);
126
exp->fd_handler_set_up = false;
127
}
128
--
72
--
129
2.40.1
73
2.13.6
74
75
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Block jobs already paused themselves when their main BlockBackend
2
entered a drained section. This is not good enough: We also want to
3
pause a block job and may not submit new requests if, for example, the
4
mirror target node should be drained.
2
5
3
blk_set_aio_context() is not fully transactional because
6
This implements .drained_begin/end callbacks in child_job in order to
4
blk_do_set_aio_context() updates blk->ctx outside the transaction. Most
7
consider all block nodes related to the job, and removes the
5
of the time this goes unnoticed but a BlockDevOps.drained_end() callback
8
BlockBackend callbacks which are unnecessary now because the root of the
6
that invokes blk_get_aio_context() fails assert(ctx == blk->ctx). This
9
job main BlockBackend is always referenced with a child_job, too.
7
happens because blk->ctx is only assigned after
8
BlockDevOps.drained_end() is called and we're in an intermediate state
9
where BlockDrvierState nodes already have the new context and the
10
BlockBackend still has the old context.
11
10
12
Making blk_set_aio_context() fully transactional solves this assertion
13
failure because the BlockBackend's context is updated as part of the
14
transaction (before BlockDevOps.drained_end() is called).
15
16
Split blk_do_set_aio_context() in order to solve this assertion failure.
17
This helper function actually serves two different purposes:
18
1. It drives blk_set_aio_context().
19
2. It responds to BdrvChildClass->change_aio_ctx().
20
21
Get rid of the helper function. Do #1 inside blk_set_aio_context() and
22
do #2 inside blk_root_set_aio_ctx_commit(). This simplifies the code.
23
24
The only drawback of the fully transactional approach is that
25
blk_set_aio_context() must contend with blk_root_set_aio_ctx_commit()
26
being invoked as part of the AioContext change propagation. This can be
27
solved by temporarily setting blk->allow_aio_context_change to true.
28
29
Future patches call blk_get_aio_context() from
30
BlockDevOps->drained_end(), so this patch will become necessary.
31
32
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
33
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
34
Message-Id: <20230516190238.8401-2-stefanha@redhat.com>
35
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
36
---
12
---
37
block/block-backend.c | 61 ++++++++++++++++---------------------------
13
blockjob.c | 22 +++++++++-------------
38
1 file changed, 23 insertions(+), 38 deletions(-)
14
1 file changed, 9 insertions(+), 13 deletions(-)
39
15
40
diff --git a/block/block-backend.c b/block/block-backend.c
16
diff --git a/blockjob.c b/blockjob.c
41
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
42
--- a/block/block-backend.c
18
--- a/blockjob.c
43
+++ b/block/block-backend.c
19
+++ b/blockjob.c
44
@@ -XXX,XX +XXX,XX @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
20
@@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c)
45
return blk_get_aio_context(blk_acb->blk);
21
job->id);
46
}
22
}
47
23
48
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
24
-static const BdrvChildRole child_job = {
49
- bool update_root_node, Error **errp)
25
- .get_parent_desc = child_job_get_parent_desc,
50
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
26
- .stay_at_node = true,
51
+ Error **errp)
27
-};
28
-
29
-static void block_job_drained_begin(void *opaque)
30
+static void child_job_drained_begin(BdrvChild *c)
52
{
31
{
53
+ bool old_allow_change;
32
- BlockJob *job = opaque;
54
BlockDriverState *bs = blk_bs(blk);
33
+ BlockJob *job = c->opaque;
55
- ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
34
block_job_pause(job);
56
int ret;
57
58
- if (bs) {
59
- bdrv_ref(bs);
60
-
61
- if (update_root_node) {
62
- /*
63
- * update_root_node MUST be false for blk_root_set_aio_ctx_commit(),
64
- * as we are already in the commit function of a transaction.
65
- */
66
- ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp);
67
- if (ret < 0) {
68
- bdrv_unref(bs);
69
- return ret;
70
- }
71
- }
72
- /*
73
- * Make blk->ctx consistent with the root node before we invoke any
74
- * other operations like drain that might inquire blk->ctx
75
- */
76
- blk->ctx = new_context;
77
- if (tgm->throttle_state) {
78
- bdrv_drained_begin(bs);
79
- throttle_group_detach_aio_context(tgm);
80
- throttle_group_attach_aio_context(tgm, new_context);
81
- bdrv_drained_end(bs);
82
- }
83
+ GLOBAL_STATE_CODE();
84
85
- bdrv_unref(bs);
86
- } else {
87
+ if (!bs) {
88
blk->ctx = new_context;
89
+ return 0;
90
}
91
92
- return 0;
93
-}
94
+ bdrv_ref(bs);
95
96
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
97
- Error **errp)
98
-{
99
- GLOBAL_STATE_CODE();
100
- return blk_do_set_aio_context(blk, new_context, true, errp);
101
+ old_allow_change = blk->allow_aio_context_change;
102
+ blk->allow_aio_context_change = true;
103
+
104
+ ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
105
+
106
+ blk->allow_aio_context_change = old_allow_change;
107
+
108
+ bdrv_unref(bs);
109
+ return ret;
110
}
35
}
111
36
112
typedef struct BdrvStateBlkRootContext {
37
-static void block_job_drained_end(void *opaque)
113
@@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx_commit(void *opaque)
38
+static void child_job_drained_end(BdrvChild *c)
114
{
39
{
115
BdrvStateBlkRootContext *s = opaque;
40
- BlockJob *job = opaque;
116
BlockBackend *blk = s->blk;
41
+ BlockJob *job = c->opaque;
117
+ AioContext *new_context = s->new_ctx;
42
block_job_resume(job);
118
+ ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
119
120
- blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort);
121
+ blk->ctx = new_context;
122
+ if (tgm->throttle_state) {
123
+ throttle_group_detach_aio_context(tgm);
124
+ throttle_group_attach_aio_context(tgm, new_context);
125
+ }
126
}
43
}
127
44
128
static TransactionActionDrv set_blk_root_context = {
45
-static const BlockDevOps block_job_dev_ops = {
46
- .drained_begin = block_job_drained_begin,
47
- .drained_end = block_job_drained_end,
48
+static const BdrvChildRole child_job = {
49
+ .get_parent_desc = child_job_get_parent_desc,
50
+ .drained_begin = child_job_drained_begin,
51
+ .drained_end = child_job_drained_end,
52
+ .stay_at_node = true,
53
};
54
55
void block_job_remove_all_bdrv(BlockJob *job)
56
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
57
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
58
bs->job = job;
59
60
- blk_set_dev_ops(blk, &block_job_dev_ops, job);
61
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
62
63
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
129
--
64
--
130
2.40.1
65
2.13.6
66
67
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Block jobs must be paused if any of the involved nodes are drained.
2
2
3
Detach event channels during drained sections to stop I/O submission
4
from the ring. xen-block is no longer reliant on aio_disable_external()
5
after this patch. This will allow us to remove the
6
aio_disable_external() API once all other code that relies on it is
7
converted.
8
9
Extend xen_device_set_event_channel_context() to allow ctx=NULL. The
10
event channel still exists but the event loop does not monitor the file
11
descriptor. Event channel processing can resume by calling
12
xen_device_set_event_channel_context() with a non-NULL ctx.
13
14
Factor out xen_device_set_event_channel_context() calls in
15
hw/block/dataplane/xen-block.c into attach/detach helper functions.
16
Incidentally, these don't require the AioContext lock because
17
aio_set_fd_handler() is thread-safe.
18
19
It's safer to register BlockDevOps after the dataplane instance has been
20
created. The BlockDevOps .drained_begin/end() callbacks depend on the
21
dataplane instance, so move the blk_set_dev_ops() call after
22
xen_block_dataplane_create().
23
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
26
Message-Id: <20230516190238.8401-12-stefanha@redhat.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
4
---
29
hw/block/dataplane/xen-block.h | 2 ++
5
tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
30
hw/block/dataplane/xen-block.c | 42 +++++++++++++++++++++++++---------
6
1 file changed, 121 insertions(+)
31
hw/block/xen-block.c | 24 ++++++++++++++++---
32
hw/xen/xen-bus.c | 7 ++++--
33
4 files changed, 59 insertions(+), 16 deletions(-)
34
7
35
diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
36
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/block/dataplane/xen-block.h
10
--- a/tests/test-bdrv-drain.c
38
+++ b/hw/block/dataplane/xen-block.h
11
+++ b/tests/test-bdrv-drain.c
39
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
12
@@ -XXX,XX +XXX,XX @@
40
unsigned int protocol,
13
41
Error **errp);
14
#include "qemu/osdep.h"
42
void xen_block_dataplane_stop(XenBlockDataPlane *dataplane);
15
#include "block/block.h"
43
+void xen_block_dataplane_attach(XenBlockDataPlane *dataplane);
16
+#include "block/blockjob_int.h"
44
+void xen_block_dataplane_detach(XenBlockDataPlane *dataplane);
17
#include "sysemu/block-backend.h"
45
18
#include "qapi/error.h"
46
#endif /* HW_BLOCK_DATAPLANE_XEN_BLOCK_H */
19
47
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
20
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
48
index XXXXXXX..XXXXXXX 100644
21
test_quiesce_common(BDRV_DRAIN, false);
49
--- a/hw/block/dataplane/xen-block.c
50
+++ b/hw/block/dataplane/xen-block.c
51
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane)
52
g_free(dataplane);
53
}
22
}
54
23
55
+void xen_block_dataplane_detach(XenBlockDataPlane *dataplane)
24
+
25
+typedef struct TestBlockJob {
26
+ BlockJob common;
27
+ bool should_complete;
28
+} TestBlockJob;
29
+
30
+static void test_job_completed(BlockJob *job, void *opaque)
56
+{
31
+{
57
+ if (!dataplane || !dataplane->event_channel) {
32
+ block_job_completed(job, 0);
58
+ return;
33
+}
34
+
35
+static void coroutine_fn test_job_start(void *opaque)
36
+{
37
+ TestBlockJob *s = opaque;
38
+
39
+ while (!s->should_complete) {
40
+ block_job_sleep_ns(&s->common, 100000);
59
+ }
41
+ }
60
+
42
+
61
+ /* Only reason for failure is a NULL channel */
43
+ block_job_defer_to_main_loop(&s->common, test_job_completed, NULL);
62
+ xen_device_set_event_channel_context(dataplane->xendev,
63
+ dataplane->event_channel,
64
+ NULL, &error_abort);
65
+}
44
+}
66
+
45
+
67
+void xen_block_dataplane_attach(XenBlockDataPlane *dataplane)
46
+static void test_job_complete(BlockJob *job, Error **errp)
68
+{
47
+{
69
+ if (!dataplane || !dataplane->event_channel) {
48
+ TestBlockJob *s = container_of(job, TestBlockJob, common);
70
+ return;
49
+ s->should_complete = true;
71
+ }
72
+
73
+ /* Only reason for failure is a NULL channel */
74
+ xen_device_set_event_channel_context(dataplane->xendev,
75
+ dataplane->event_channel,
76
+ dataplane->ctx, &error_abort);
77
+}
50
+}
78
+
51
+
79
void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
52
+BlockJobDriver test_job_driver = {
80
{
53
+ .instance_size = sizeof(TestBlockJob),
81
XenDevice *xendev;
54
+ .start = test_job_start,
82
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
55
+ .complete = test_job_complete,
83
56
+};
84
xendev = dataplane->xendev;
85
86
- aio_context_acquire(dataplane->ctx);
87
- if (dataplane->event_channel) {
88
- /* Only reason for failure is a NULL channel */
89
- xen_device_set_event_channel_context(xendev, dataplane->event_channel,
90
- qemu_get_aio_context(),
91
- &error_abort);
92
+ if (!blk_in_drain(dataplane->blk)) {
93
+ xen_block_dataplane_detach(dataplane);
94
}
95
+
57
+
96
+ aio_context_acquire(dataplane->ctx);
58
+static void test_blockjob_common(enum drain_type drain_type)
97
/* Xen doesn't have multiple users for nodes, so this can't fail */
59
+{
98
blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
60
+ BlockBackend *blk_src, *blk_target;
99
aio_context_release(dataplane->ctx);
61
+ BlockDriverState *src, *target;
100
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
62
+ BlockJob *job;
101
blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
63
+ int ret;
102
aio_context_release(old_context);
64
+
103
65
+ src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
104
- /* Only reason for failure is a NULL channel */
66
+ &error_abort);
105
- aio_context_acquire(dataplane->ctx);
67
+ blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
106
- xen_device_set_event_channel_context(xendev, dataplane->event_channel,
68
+ blk_insert_bs(blk_src, src, &error_abort);
107
- dataplane->ctx, &error_abort);
69
+
108
- aio_context_release(dataplane->ctx);
70
+ target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
109
+ if (!blk_in_drain(dataplane->blk)) {
71
+ &error_abort);
110
+ xen_block_dataplane_attach(dataplane);
72
+ blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
73
+ blk_insert_bs(blk_target, target, &error_abort);
74
+
75
+ job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0,
76
+ 0, NULL, NULL, &error_abort);
77
+ block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
78
+ block_job_start(job);
79
+
80
+ g_assert_cmpint(job->pause_count, ==, 0);
81
+ g_assert_false(job->paused);
82
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
83
+
84
+ do_drain_begin(drain_type, src);
85
+
86
+ if (drain_type == BDRV_DRAIN_ALL) {
87
+ /* bdrv_drain_all() drains both src and target, and involves an
88
+ * additional block_job_pause_all() */
89
+ g_assert_cmpint(job->pause_count, ==, 3);
90
+ } else {
91
+ g_assert_cmpint(job->pause_count, ==, 1);
111
+ }
92
+ }
112
93
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
113
return;
94
+ /* g_assert_true(job->paused); */
114
95
+ g_assert_false(job->busy); /* The job is paused */
115
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/hw/block/xen-block.c
118
+++ b/hw/block/xen-block.c
119
@@ -XXX,XX +XXX,XX @@ static void xen_block_resize_cb(void *opaque)
120
xen_device_backend_printf(xendev, "state", "%u", state);
121
}
122
123
+/* Suspend request handling */
124
+static void xen_block_drained_begin(void *opaque)
125
+{
126
+ XenBlockDevice *blockdev = opaque;
127
+
96
+
128
+ xen_block_dataplane_detach(blockdev->dataplane);
97
+ do_drain_end(drain_type, src);
98
+
99
+ g_assert_cmpint(job->pause_count, ==, 0);
100
+ g_assert_false(job->paused);
101
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
102
+
103
+ do_drain_begin(drain_type, target);
104
+
105
+ if (drain_type == BDRV_DRAIN_ALL) {
106
+ /* bdrv_drain_all() drains both src and target, and involves an
107
+ * additional block_job_pause_all() */
108
+ g_assert_cmpint(job->pause_count, ==, 3);
109
+ } else {
110
+ g_assert_cmpint(job->pause_count, ==, 1);
111
+ }
112
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
113
+ /* g_assert_true(job->paused); */
114
+ g_assert_false(job->busy); /* The job is paused */
115
+
116
+ do_drain_end(drain_type, target);
117
+
118
+ g_assert_cmpint(job->pause_count, ==, 0);
119
+ g_assert_false(job->paused);
120
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
121
+
122
+ ret = block_job_complete_sync(job, &error_abort);
123
+ g_assert_cmpint(ret, ==, 0);
124
+
125
+ blk_unref(blk_src);
126
+ blk_unref(blk_target);
127
+ bdrv_unref(src);
128
+ bdrv_unref(target);
129
+}
129
+}
130
+
130
+
131
+/* Resume request handling */
131
+static void test_blockjob_drain_all(void)
132
+static void xen_block_drained_end(void *opaque)
133
+{
132
+{
134
+ XenBlockDevice *blockdev = opaque;
133
+ test_blockjob_common(BDRV_DRAIN_ALL);
135
+
136
+ xen_block_dataplane_attach(blockdev->dataplane);
137
+}
134
+}
138
+
135
+
139
static const BlockDevOps xen_block_dev_ops = {
136
+static void test_blockjob_drain(void)
140
- .resize_cb = xen_block_resize_cb,
137
+{
141
+ .resize_cb = xen_block_resize_cb,
138
+ test_blockjob_common(BDRV_DRAIN);
142
+ .drained_begin = xen_block_drained_begin,
139
+}
143
+ .drained_end = xen_block_drained_end,
144
};
145
146
static void xen_block_realize(XenDevice *xendev, Error **errp)
147
@@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
148
return;
149
}
150
151
- blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
152
-
153
if (conf->discard_granularity == -1) {
154
conf->discard_granularity = conf->physical_block_size;
155
}
156
@@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
157
blockdev->dataplane =
158
xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
159
blockdev->props.iothread);
160
+
140
+
161
+ blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
141
int main(int argc, char **argv)
142
{
143
bdrv_init();
144
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
145
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
146
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
147
148
+ g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
149
+ g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
150
+
151
return g_test_run();
162
}
152
}
163
164
static void xen_block_frontend_changed(XenDevice *xendev,
165
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/hw/xen/xen-bus.c
168
+++ b/hw/xen/xen-bus.c
169
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
170
NULL, NULL, NULL, NULL, NULL);
171
172
channel->ctx = ctx;
173
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
174
- xen_device_event, NULL, xen_device_poll, NULL, channel);
175
+ if (ctx) {
176
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
177
+ true, xen_device_event, NULL, xen_device_poll, NULL,
178
+ channel);
179
+ }
180
}
181
182
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
183
--
153
--
184
2.40.1
154
2.13.6
155
156
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Block jobs are already paused using the BdrvChildRole drain callbacks,
2
so we don't need an additional block_job_pause_all() call.
2
3
3
Host notifiers can now use is_external=false since virtio-blk and
4
virtio-scsi no longer rely on is_external=true for drained sections.
5
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-Id: <20230516190238.8401-20-stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
5
---
10
hw/virtio/virtio.c | 6 +++---
6
block/io.c | 4 ----
11
1 file changed, 3 insertions(+), 3 deletions(-)
7
tests/test-bdrv-drain.c | 10 ++++------
8
2 files changed, 4 insertions(+), 10 deletions(-)
12
9
13
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
10
diff --git a/block/io.c b/block/io.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/virtio.c
12
--- a/block/io.c
16
+++ b/hw/virtio/virtio.c
13
+++ b/block/io.c
17
@@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
14
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
18
15
* context. */
19
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
16
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
20
{
17
21
- aio_set_event_notifier(ctx, &vq->host_notifier, true,
18
- block_job_pause_all();
22
+ aio_set_event_notifier(ctx, &vq->host_notifier, false,
19
-
23
virtio_queue_host_notifier_read,
20
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
24
virtio_queue_host_notifier_aio_poll,
21
AioContext *aio_context = bdrv_get_aio_context(bs);
25
virtio_queue_host_notifier_aio_poll_ready);
22
26
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
23
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
27
*/
24
aio_enable_external(aio_context);
28
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
25
aio_context_release(aio_context);
29
{
26
}
30
- aio_set_event_notifier(ctx, &vq->host_notifier, true,
27
-
31
+ aio_set_event_notifier(ctx, &vq->host_notifier, false,
28
- block_job_resume_all();
32
virtio_queue_host_notifier_read,
33
NULL, NULL);
34
}
29
}
35
30
36
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
31
void bdrv_drain_all(void)
37
{
32
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
38
- aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
33
index XXXXXXX..XXXXXXX 100644
39
+ aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL);
34
--- a/tests/test-bdrv-drain.c
40
}
35
+++ b/tests/test-bdrv-drain.c
41
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
42
void virtio_queue_host_notifier_read(EventNotifier *n)
37
do_drain_begin(drain_type, src);
38
39
if (drain_type == BDRV_DRAIN_ALL) {
40
- /* bdrv_drain_all() drains both src and target, and involves an
41
- * additional block_job_pause_all() */
42
- g_assert_cmpint(job->pause_count, ==, 3);
43
+ /* bdrv_drain_all() drains both src and target */
44
+ g_assert_cmpint(job->pause_count, ==, 2);
45
} else {
46
g_assert_cmpint(job->pause_count, ==, 1);
47
}
48
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
49
do_drain_begin(drain_type, target);
50
51
if (drain_type == BDRV_DRAIN_ALL) {
52
- /* bdrv_drain_all() drains both src and target, and involves an
53
- * additional block_job_pause_all() */
54
- g_assert_cmpint(job->pause_count, ==, 3);
55
+ /* bdrv_drain_all() drains both src and target */
56
+ g_assert_cmpint(job->pause_count, ==, 2);
57
} else {
58
g_assert_cmpint(job->pause_count, ==, 1);
59
}
43
--
60
--
44
2.40.1
61
2.13.6
62
63
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
bdrv_do_drained_begin() restricts the call of parent callbacks and
2
aio_disable_external() to the outermost drain section, but the block
3
driver callbacks are always called. bdrv_do_drained_end() must match
4
this behaviour, otherwise nodes stay drained even if begin/end calls
5
were balanced.
2
6
3
For simplicity, always run BlockDevOps .drained_begin/end/poll()
4
callbacks in the main loop thread. This makes it easier to implement the
5
callbacks and avoids extra locks.
6
7
Move the function pointer declarations from the I/O Code section to the
8
Global State section for BlockDevOps, BdrvChildClass, and BlockDriver.
9
10
Narrow IO_OR_GS_CODE() to GLOBAL_STATE_CODE() where appropriate.
11
12
The test-bdrv-drain test case calls bdrv_drain() from an IOThread. This
13
is now only allowed from coroutine context, so update the test case to
14
run in a coroutine.
15
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-Id: <20230516190238.8401-11-stefanha@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
8
---
21
include/block/block_int-common.h | 72 +++++++++++++--------------
9
block/io.c | 12 +++++++-----
22
include/sysemu/block-backend-common.h | 25 +++++-----
10
1 file changed, 7 insertions(+), 5 deletions(-)
23
block/io.c | 14 ++++--
24
tests/unit/test-bdrv-drain.c | 14 +++---
25
4 files changed, 67 insertions(+), 58 deletions(-)
26
11
27
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/block/block_int-common.h
30
+++ b/include/block/block_int-common.h
31
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
32
void (*bdrv_attach_aio_context)(BlockDriverState *bs,
33
AioContext *new_context);
34
35
+ /**
36
+ * bdrv_drain_begin is called if implemented in the beginning of a
37
+ * drain operation to drain and stop any internal sources of requests in
38
+ * the driver.
39
+ * bdrv_drain_end is called if implemented at the end of the drain.
40
+ *
41
+ * They should be used by the driver to e.g. manage scheduled I/O
42
+ * requests, or toggle an internal state. After the end of the drain new
43
+ * requests will continue normally.
44
+ *
45
+ * Implementations of both functions must not call aio_poll().
46
+ */
47
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
48
+ void (*bdrv_drain_end)(BlockDriverState *bs);
49
+
50
/**
51
* Try to get @bs's logical and physical block size.
52
* On success, store them in @bsz and return zero.
53
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
54
void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)(
55
BlockDriverState *bs);
56
57
- /**
58
- * bdrv_drain_begin is called if implemented in the beginning of a
59
- * drain operation to drain and stop any internal sources of requests in
60
- * the driver.
61
- * bdrv_drain_end is called if implemented at the end of the drain.
62
- *
63
- * They should be used by the driver to e.g. manage scheduled I/O
64
- * requests, or toggle an internal state. After the end of the drain new
65
- * requests will continue normally.
66
- *
67
- * Implementations of both functions must not call aio_poll().
68
- */
69
- void (*bdrv_drain_begin)(BlockDriverState *bs);
70
- void (*bdrv_drain_end)(BlockDriverState *bs);
71
-
72
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
73
74
bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
75
@@ -XXX,XX +XXX,XX @@ struct BdrvChildClass {
76
void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
77
void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
78
79
+ /*
80
+ * If this pair of functions is implemented, the parent doesn't issue new
81
+ * requests after returning from .drained_begin() until .drained_end() is
82
+ * called.
83
+ *
84
+ * These functions must not change the graph (and therefore also must not
85
+ * call aio_poll(), which could change the graph indirectly).
86
+ *
87
+ * Note that this can be nested. If drained_begin() was called twice, new
88
+ * I/O is allowed only after drained_end() was called twice, too.
89
+ */
90
+ void (*drained_begin)(BdrvChild *child);
91
+ void (*drained_end)(BdrvChild *child);
92
+
93
+ /*
94
+ * Returns whether the parent has pending requests for the child. This
95
+ * callback is polled after .drained_begin() has been called until all
96
+ * activity on the child has stopped.
97
+ */
98
+ bool (*drained_poll)(BdrvChild *child);
99
+
100
/*
101
* Notifies the parent that the filename of its child has changed (e.g.
102
* because the direct child was removed from the backing chain), so that it
103
@@ -XXX,XX +XXX,XX @@ struct BdrvChildClass {
104
const char *(*get_name)(BdrvChild *child);
105
106
AioContext *(*get_parent_aio_context)(BdrvChild *child);
107
-
108
- /*
109
- * If this pair of functions is implemented, the parent doesn't issue new
110
- * requests after returning from .drained_begin() until .drained_end() is
111
- * called.
112
- *
113
- * These functions must not change the graph (and therefore also must not
114
- * call aio_poll(), which could change the graph indirectly).
115
- *
116
- * Note that this can be nested. If drained_begin() was called twice, new
117
- * I/O is allowed only after drained_end() was called twice, too.
118
- */
119
- void (*drained_begin)(BdrvChild *child);
120
- void (*drained_end)(BdrvChild *child);
121
-
122
- /*
123
- * Returns whether the parent has pending requests for the child. This
124
- * callback is polled after .drained_begin() has been called until all
125
- * activity on the child has stopped.
126
- */
127
- bool (*drained_poll)(BdrvChild *child);
128
};
129
130
extern const BdrvChildClass child_of_bds;
131
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/include/sysemu/block-backend-common.h
134
+++ b/include/sysemu/block-backend-common.h
135
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
136
*/
137
bool (*is_medium_locked)(void *opaque);
138
139
+ /*
140
+ * Runs when the backend receives a drain request.
141
+ */
142
+ void (*drained_begin)(void *opaque);
143
+ /*
144
+ * Runs when the backend's last drain request ends.
145
+ */
146
+ void (*drained_end)(void *opaque);
147
+ /*
148
+ * Is the device still busy?
149
+ */
150
+ bool (*drained_poll)(void *opaque);
151
+
152
/*
153
* I/O API functions. These functions are thread-safe.
154
*
155
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
156
* Runs when the size changed (e.g. monitor command block_resize)
157
*/
158
void (*resize_cb)(void *opaque);
159
- /*
160
- * Runs when the backend receives a drain request.
161
- */
162
- void (*drained_begin)(void *opaque);
163
- /*
164
- * Runs when the backend's last drain request ends.
165
- */
166
- void (*drained_end)(void *opaque);
167
- /*
168
- * Is the device still busy?
169
- */
170
- bool (*drained_poll)(void *opaque);
171
} BlockDevOps;
172
173
/*
174
diff --git a/block/io.c b/block/io.c
12
diff --git a/block/io.c b/block/io.c
175
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
176
--- a/block/io.c
14
--- a/block/io.c
177
+++ b/block/io.c
15
+++ b/block/io.c
178
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
16
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
179
17
180
void bdrv_parent_drained_end_single(BdrvChild *c)
18
void bdrv_drained_end(BlockDriverState *bs)
181
{
19
{
182
- IO_OR_GS_CODE();
20
+ int old_quiesce_counter;
183
+ GLOBAL_STATE_CODE();
184
185
assert(c->quiesced_parent);
186
c->quiesced_parent = false;
187
@@ -XXX,XX +XXX,XX @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
188
189
void bdrv_parent_drained_begin_single(BdrvChild *c)
190
{
191
- IO_OR_GS_CODE();
192
+ GLOBAL_STATE_CODE();
193
194
assert(!c->quiesced_parent);
195
c->quiesced_parent = true;
196
@@ -XXX,XX +XXX,XX @@ typedef struct {
197
bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
198
bool ignore_bds_parents)
199
{
200
- IO_OR_GS_CODE();
201
+ GLOBAL_STATE_CODE();
202
203
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
204
return true;
205
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
206
if (ctx != co_ctx) {
207
aio_context_release(ctx);
208
}
209
- replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
210
+ replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
211
+ bdrv_co_drain_bh_cb, &data);
212
213
qemu_coroutine_yield();
214
/* If we are resumed from some other event (such as an aio completion or a
215
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
216
return;
217
}
218
219
+ GLOBAL_STATE_CODE();
220
+
221
/* Stop things in parent-to-child order */
222
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
223
aio_disable_external(bdrv_get_aio_context(bs));
224
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
225
{
226
int old_quiesce_counter;
227
228
+ IO_OR_GS_CODE();
229
+
21
+
230
if (qemu_in_coroutine()) {
22
if (qemu_in_coroutine()) {
231
bdrv_co_yield_to_drain(bs, false, parent, false);
23
bdrv_co_yield_to_drain(bs, false);
232
return;
24
return;
233
}
25
}
234
assert(bs->quiesce_counter > 0);
26
assert(bs->quiesce_counter > 0);
235
+ GLOBAL_STATE_CODE();
27
- if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
28
- return;
29
- }
30
+ old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
236
31
237
/* Re-enable things in child-to-parent order */
32
/* Re-enable things in child-to-parent order */
238
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
33
bdrv_drain_invoke(bs, false, false);
239
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
34
- bdrv_parent_drained_end(bs);
240
index XXXXXXX..XXXXXXX 100644
35
- aio_enable_external(bdrv_get_aio_context(bs));
241
--- a/tests/unit/test-bdrv-drain.c
36
+ if (old_quiesce_counter == 1) {
242
+++ b/tests/unit/test-bdrv-drain.c
37
+ bdrv_parent_drained_end(bs);
243
@@ -XXX,XX +XXX,XX @@ struct test_iothread_data {
38
+ aio_enable_external(bdrv_get_aio_context(bs));
244
BlockDriverState *bs;
39
+ }
245
enum drain_type drain_type;
246
int *aio_ret;
247
+ bool co_done;
248
};
249
250
-static void test_iothread_drain_entry(void *opaque)
251
+static void coroutine_fn test_iothread_drain_co_entry(void *opaque)
252
{
253
struct test_iothread_data *data = opaque;
254
255
- aio_context_acquire(bdrv_get_aio_context(data->bs));
256
do_drain_begin(data->drain_type, data->bs);
257
g_assert_cmpint(*data->aio_ret, ==, 0);
258
do_drain_end(data->drain_type, data->bs);
259
- aio_context_release(bdrv_get_aio_context(data->bs));
260
261
- qemu_event_set(&done_event);
262
+ data->co_done = true;
263
+ aio_wait_kick();
264
}
40
}
265
41
266
static void test_iothread_aio_cb(void *opaque, int ret)
42
/*
267
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
268
BlockDriverState *bs;
269
BDRVTestState *s;
270
BlockAIOCB *acb;
271
+ Coroutine *co;
272
int aio_ret;
273
struct test_iothread_data data;
274
275
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
276
}
277
break;
278
case 1:
279
- aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
280
- qemu_event_wait(&done_event);
281
+ co = qemu_coroutine_create(test_iothread_drain_co_entry, &data);
282
+ aio_co_enter(ctx_a, co);
283
+ AIO_WAIT_WHILE_UNLOCKED(NULL, !data.co_done);
284
break;
285
default:
286
g_assert_not_reached();
287
--
43
--
288
2.40.1
44
2.13.6
45
46
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
3
Add a helper function to check whether the device is realized without
4
requiring the Big QEMU Lock. The next patch adds a second caller. The
5
goal is to avoid spreading DeviceState field accesses throughout the
6
code.
7
8
Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20230516190238.8401-3-stefanha@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
2
---
15
include/hw/qdev-core.h | 17 ++++++++++++++---
3
tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
16
hw/scsi/scsi-bus.c | 3 +--
4
1 file changed, 57 insertions(+)
17
2 files changed, 15 insertions(+), 5 deletions(-)
18
5
19
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
20
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
21
--- a/include/hw/qdev-core.h
8
--- a/tests/test-bdrv-drain.c
22
+++ b/include/hw/qdev-core.h
9
+++ b/tests/test-bdrv-drain.c
23
@@ -XXX,XX +XXX,XX @@
10
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
24
#ifndef QDEV_CORE_H
11
enum drain_type {
25
#define QDEV_CORE_H
12
BDRV_DRAIN_ALL,
26
13
BDRV_DRAIN,
27
+#include "qemu/atomic.h"
14
+ DRAIN_TYPE_MAX,
28
#include "qemu/queue.h"
15
};
29
#include "qemu/bitmap.h"
16
30
#include "qemu/rcu.h"
17
static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
31
@@ -XXX,XX +XXX,XX @@ typedef struct {
18
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
32
19
test_quiesce_common(BDRV_DRAIN, false);
33
/**
20
}
34
* DeviceState:
21
35
- * @realized: Indicates whether the device has been fully constructed.
22
+static void test_nested(void)
36
- * When accessed outside big qemu lock, must be accessed with
37
- * qatomic_load_acquire()
38
* @reset: ResettableState for the device; handled by Resettable interface.
39
*
40
* This structure should not be accessed directly. We declare it here
41
@@ -XXX,XX +XXX,XX @@ DeviceState *qdev_new(const char *name);
42
*/
43
DeviceState *qdev_try_new(const char *name);
44
45
+/**
46
+ * qdev_is_realized:
47
+ * @dev: The device to check.
48
+ *
49
+ * May be called outside big qemu lock.
50
+ *
51
+ * Returns: %true% if the device has been fully constructed, %false% otherwise.
52
+ */
53
+static inline bool qdev_is_realized(DeviceState *dev)
54
+{
23
+{
55
+ return qatomic_load_acquire(&dev->realized);
24
+ BlockBackend *blk;
25
+ BlockDriverState *bs, *backing;
26
+ BDRVTestState *s, *backing_s;
27
+ enum drain_type outer, inner;
28
+
29
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
30
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
31
+ &error_abort);
32
+ s = bs->opaque;
33
+ blk_insert_bs(blk, bs, &error_abort);
34
+
35
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
36
+ backing_s = backing->opaque;
37
+ bdrv_set_backing_hd(bs, backing, &error_abort);
38
+
39
+ for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
40
+ for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
41
+ /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
42
+ int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
43
+ (inner != BDRV_DRAIN_ALL);
44
+ int backing_quiesce = 0;
45
+ int backing_cb_cnt = (outer != BDRV_DRAIN) +
46
+ (inner != BDRV_DRAIN);
47
+
48
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
49
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
50
+ g_assert_cmpint(s->drain_count, ==, 0);
51
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
52
+
53
+ do_drain_begin(outer, bs);
54
+ do_drain_begin(inner, bs);
55
+
56
+ g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
57
+ g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
58
+ g_assert_cmpint(s->drain_count, ==, 2);
59
+ g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
60
+
61
+ do_drain_end(inner, bs);
62
+ do_drain_end(outer, bs);
63
+
64
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
65
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
66
+ g_assert_cmpint(s->drain_count, ==, 0);
67
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
68
+ }
69
+ }
70
+
71
+ bdrv_unref(backing);
72
+ bdrv_unref(bs);
73
+ blk_unref(blk);
56
+}
74
+}
57
+
75
+
58
/**
76
59
* qdev_realize: Realize @dev.
77
typedef struct TestBlockJob {
60
* @dev: device to realize
78
BlockJob common;
61
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
79
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
62
index XXXXXXX..XXXXXXX 100644
80
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
63
--- a/hw/scsi/scsi-bus.c
81
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
64
+++ b/hw/scsi/scsi-bus.c
82
65
@@ -XXX,XX +XXX,XX @@ static SCSIDevice *do_scsi_device_find(SCSIBus *bus,
83
+ g_test_add_func("/bdrv-drain/nested", test_nested);
66
* the user access the device.
84
+
67
*/
85
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
68
86
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
69
- if (retval && !include_unrealized &&
70
- !qatomic_load_acquire(&retval->qdev.realized)) {
71
+ if (retval && !include_unrealized && !qdev_is_realized(&retval->qdev)) {
72
retval = NULL;
73
}
74
87
75
--
88
--
76
2.40.1
89
2.13.6
77
90
78
91
diff view generated by jsdifflib
1
The function documentation already says that all callers must hold the
1
This is in preparation for subtree drains, i.e. drained sections that
2
main AioContext lock, but not all of them do. This can cause assertion
2
affect not only a single node, but recursively all child nodes, too.
3
failures when functions called by bdrv_open() try to drop the lock. Fix
3
4
a few more callers to take the lock before calling bdrv_open().
4
Calling the parent callbacks for drain is pointless when we just came
5
5
from that parent node recursively and leads to multiple increases of
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
bs->quiesce_counter in a single drain call. Don't do it.
7
Message-Id: <20230525124713.401149-4-kwolf@redhat.com>
7
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
In order for this to work correctly, the parent callback must be called
9
for every bdrv_drain_begin/end() call, not only for the outermost one:
10
11
If we have a node N with two parents A and B, recursive draining of A
12
should cause the quiesce_counter of B to increase because its child N is
13
drained independently of B. If now B is recursively drained, too, A must
14
increase its quiesce_counter because N is drained independently of A
15
only now, even if N is going from quiesce_counter 1 to 2.
16
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
18
---
11
block.c | 3 +++
19
include/block/block.h | 4 ++--
12
block/block-backend.c | 2 ++
20
block.c | 13 +++++++++----
13
block/qapi-sysemu.c | 3 +++
21
block/io.c | 47 ++++++++++++++++++++++++++++++++++-------------
14
blockdev.c | 29 +++++++++++++++++++++++------
22
3 files changed, 45 insertions(+), 19 deletions(-)
15
qemu-nbd.c | 4 ++++
23
16
tests/unit/test-block-iothread.c | 3 +++
24
diff --git a/include/block/block.h b/include/block/block.h
17
6 files changed, 38 insertions(+), 6 deletions(-)
25
index XXXXXXX..XXXXXXX 100644
18
26
--- a/include/block/block.h
27
+++ b/include/block/block.h
28
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs);
29
* Begin a quiesced section of all users of @bs. This is part of
30
* bdrv_drained_begin.
31
*/
32
-void bdrv_parent_drained_begin(BlockDriverState *bs);
33
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
34
35
/**
36
* bdrv_parent_drained_end:
37
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs);
38
* End a quiesced section of all users of @bs. This is part of
39
* bdrv_drained_end.
40
*/
41
-void bdrv_parent_drained_end(BlockDriverState *bs);
42
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
43
44
/**
45
* bdrv_drained_begin:
19
diff --git a/block.c b/block.c
46
diff --git a/block.c b/block.c
20
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
21
--- a/block.c
48
--- a/block.c
22
+++ b/block.c
49
+++ b/block.c
23
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
50
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
51
BlockDriverState *new_bs)
52
{
53
BlockDriverState *old_bs = child->bs;
54
+ int i;
55
56
if (old_bs && new_bs) {
57
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
58
}
59
if (old_bs) {
60
if (old_bs->quiesce_counter && child->role->drained_end) {
61
- child->role->drained_end(child);
62
+ for (i = 0; i < old_bs->quiesce_counter; i++) {
63
+ child->role->drained_end(child);
64
+ }
65
}
66
if (child->role->detach) {
67
child->role->detach(child);
68
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
69
if (new_bs) {
70
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
71
if (new_bs->quiesce_counter && child->role->drained_begin) {
72
- child->role->drained_begin(child);
73
+ for (i = 0; i < new_bs->quiesce_counter; i++) {
74
+ child->role->drained_begin(child);
75
+ }
76
}
77
78
if (child->role->attach) {
79
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
80
AioContext *ctx = bdrv_get_aio_context(bs);
81
82
aio_disable_external(ctx);
83
- bdrv_parent_drained_begin(bs);
84
+ bdrv_parent_drained_begin(bs, NULL);
85
bdrv_drain(bs); /* ensure there are no in-flight requests */
86
87
while (aio_poll(ctx, false)) {
88
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
89
*/
90
aio_context_acquire(new_context);
91
bdrv_attach_aio_context(bs, new_context);
92
- bdrv_parent_drained_end(bs);
93
+ bdrv_parent_drained_end(bs, NULL);
94
aio_enable_external(ctx);
95
aio_context_release(new_context);
96
}
97
diff --git a/block/io.c b/block/io.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/io.c
100
+++ b/block/io.c
101
@@ -XXX,XX +XXX,XX @@
102
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
103
int64_t offset, int bytes, BdrvRequestFlags flags);
104
105
-void bdrv_parent_drained_begin(BlockDriverState *bs)
106
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
107
{
108
BdrvChild *c, *next;
109
110
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
111
+ if (c == ignore) {
112
+ continue;
113
+ }
114
if (c->role->drained_begin) {
115
c->role->drained_begin(c);
116
}
117
}
118
}
119
120
-void bdrv_parent_drained_end(BlockDriverState *bs)
121
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
122
{
123
BdrvChild *c, *next;
124
125
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
126
+ if (c == ignore) {
127
+ continue;
128
+ }
129
if (c->role->drained_end) {
130
c->role->drained_end(c);
131
}
132
@@ -XXX,XX +XXX,XX @@ typedef struct {
133
BlockDriverState *bs;
134
bool done;
135
bool begin;
136
+ BdrvChild *parent;
137
} BdrvCoDrainData;
138
139
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
140
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
141
return waited;
142
}
143
144
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
145
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
146
+
147
static void bdrv_co_drain_bh_cb(void *opaque)
148
{
149
BdrvCoDrainData *data = opaque;
150
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
151
152
bdrv_dec_in_flight(bs);
153
if (data->begin) {
154
- bdrv_drained_begin(bs);
155
+ bdrv_do_drained_begin(bs, data->parent);
156
} else {
157
- bdrv_drained_end(bs);
158
+ bdrv_do_drained_end(bs, data->parent);
159
}
160
161
data->done = true;
162
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
163
}
164
165
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
166
- bool begin)
167
+ bool begin, BdrvChild *parent)
168
{
169
BdrvCoDrainData data;
170
171
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
172
.bs = bs,
173
.done = false,
174
.begin = begin,
175
+ .parent = parent,
176
};
177
bdrv_inc_in_flight(bs);
178
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
179
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
180
assert(data.done);
181
}
182
183
-void bdrv_drained_begin(BlockDriverState *bs)
184
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
185
{
186
if (qemu_in_coroutine()) {
187
- bdrv_co_yield_to_drain(bs, true);
188
+ bdrv_co_yield_to_drain(bs, true, parent);
24
return;
189
return;
25
}
190
}
26
191
27
+ aio_context_acquire(qemu_get_aio_context());
192
/* Stop things in parent-to-child order */
193
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
194
aio_disable_external(bdrv_get_aio_context(bs));
195
- bdrv_parent_drained_begin(bs);
196
}
197
198
+ bdrv_parent_drained_begin(bs, parent);
199
bdrv_drain_invoke(bs, true, false);
200
bdrv_drain_recurse(bs);
201
}
202
203
-void bdrv_drained_end(BlockDriverState *bs)
204
+void bdrv_drained_begin(BlockDriverState *bs)
205
+{
206
+ bdrv_do_drained_begin(bs, NULL);
207
+}
28
+
208
+
29
/* Create parameter list */
209
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
30
create_opts = qemu_opts_append(create_opts, drv->create_opts);
210
{
31
create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
211
int old_quiesce_counter;
32
@@ -XXX,XX +XXX,XX @@ out:
212
33
qemu_opts_del(opts);
213
if (qemu_in_coroutine()) {
34
qemu_opts_free(create_opts);
214
- bdrv_co_yield_to_drain(bs, false);
35
error_propagate(errp, local_err);
215
+ bdrv_co_yield_to_drain(bs, false, parent);
36
+ aio_context_release(qemu_get_aio_context());
216
return;
37
}
217
}
38
218
assert(bs->quiesce_counter > 0);
39
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
219
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
40
diff --git a/block/block-backend.c b/block/block-backend.c
220
41
index XXXXXXX..XXXXXXX 100644
221
/* Re-enable things in child-to-parent order */
42
--- a/block/block-backend.c
222
bdrv_drain_invoke(bs, false, false);
43
+++ b/block/block-backend.c
223
+ bdrv_parent_drained_end(bs, parent);
44
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
224
if (old_quiesce_counter == 1) {
45
}
225
- bdrv_parent_drained_end(bs);
46
226
aio_enable_external(bdrv_get_aio_context(bs));
47
blk = blk_new(qemu_get_aio_context(), perm, shared);
227
}
48
+ aio_context_acquire(qemu_get_aio_context());
228
}
49
bs = bdrv_open(filename, reference, options, flags, errp);
229
50
+ aio_context_release(qemu_get_aio_context());
230
+void bdrv_drained_end(BlockDriverState *bs)
51
if (!bs) {
231
+{
52
blk_unref(blk);
232
+ bdrv_do_drained_end(bs, NULL);
53
return NULL;
233
+}
54
diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/block/qapi-sysemu.c
57
+++ b/block/qapi-sysemu.c
58
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_change_medium(const char *device,
59
qdict_put_str(options, "driver", format);
60
}
61
62
+ aio_context_acquire(qemu_get_aio_context());
63
medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
64
+ aio_context_release(qemu_get_aio_context());
65
+
234
+
66
if (!medium_bs) {
235
/*
67
goto fail;
236
* Wait for pending requests to complete on a single BlockDriverState subtree,
68
}
237
* and suspend block driver's internal I/O until next request arrives.
69
diff --git a/blockdev.c b/blockdev.c
238
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
70
index XXXXXXX..XXXXXXX 100644
239
/* Stop things in parent-to-child order */
71
--- a/blockdev.c
240
aio_context_acquire(aio_context);
72
+++ b/blockdev.c
241
aio_disable_external(aio_context);
73
@@ -XXX,XX +XXX,XX @@ err_no_opts:
242
- bdrv_parent_drained_begin(bs);
74
/* Takes the ownership of bs_opts */
243
+ bdrv_parent_drained_begin(bs, NULL);
75
BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
244
bdrv_drain_invoke(bs, true, true);
76
{
245
aio_context_release(aio_context);
77
+ BlockDriverState *bs;
246
78
int bdrv_flags = 0;
247
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
79
248
/* Re-enable things in child-to-parent order */
80
GLOBAL_STATE_CODE();
249
aio_context_acquire(aio_context);
81
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
250
bdrv_drain_invoke(bs, false, true);
82
bdrv_flags |= BDRV_O_INACTIVE;
251
- bdrv_parent_drained_end(bs);
83
}
252
+ bdrv_parent_drained_end(bs, NULL);
84
253
aio_enable_external(aio_context);
85
- return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
254
aio_context_release(aio_context);
86
+ aio_context_acquire(qemu_get_aio_context());
255
}
87
+ bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
88
+ aio_context_release(qemu_get_aio_context());
89
+
90
+ return bs;
91
}
92
93
void blockdev_close_all_bdrv_states(void)
94
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_action(TransactionAction *action,
95
}
96
qdict_put_str(options, "driver", format);
97
}
98
+ aio_context_release(aio_context);
99
100
+ aio_context_acquire(qemu_get_aio_context());
101
state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
102
errp);
103
+ aio_context_release(qemu_get_aio_context());
104
+
105
/* We will manually add the backing_hd field to the bs later */
106
if (!state->new_bs) {
107
- goto out;
108
+ return;
109
}
110
111
+ aio_context_acquire(aio_context);
112
+
113
/*
114
* Allow attaching a backing file to an overlay that's already in use only
115
* if the parents don't assume that they are already seeing a valid image.
116
@@ -XXX,XX +XXX,XX @@ static void drive_backup_action(DriveBackup *backup,
117
if (format) {
118
qdict_put_str(options, "driver", format);
119
}
120
+ aio_context_release(aio_context);
121
122
+ aio_context_acquire(qemu_get_aio_context());
123
target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
124
+ aio_context_release(qemu_get_aio_context());
125
+
126
if (!target_bs) {
127
- goto out;
128
+ return;
129
}
130
131
/* Honor bdrv_try_change_aio_context() context acquisition requirements. */
132
old_context = bdrv_get_aio_context(target_bs);
133
- aio_context_release(aio_context);
134
aio_context_acquire(old_context);
135
136
ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
137
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
138
if (format) {
139
qdict_put_str(options, "driver", format);
140
}
141
+ aio_context_release(aio_context);
142
143
/* Mirroring takes care of copy-on-write using the source's backing
144
* file.
145
*/
146
+ aio_context_acquire(qemu_get_aio_context());
147
target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
148
+ aio_context_release(qemu_get_aio_context());
149
+
150
if (!target_bs) {
151
- goto out;
152
+ return;
153
}
154
155
zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL &&
156
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
157
158
/* Honor bdrv_try_change_aio_context() context acquisition requirements. */
159
old_context = bdrv_get_aio_context(target_bs);
160
- aio_context_release(aio_context);
161
aio_context_acquire(old_context);
162
163
ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
164
diff --git a/qemu-nbd.c b/qemu-nbd.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/qemu-nbd.c
167
+++ b/qemu-nbd.c
168
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
169
qdict_put_str(raw_opts, "driver", "raw");
170
qdict_put_str(raw_opts, "file", bs->node_name);
171
qdict_put_int(raw_opts, "offset", dev_offset);
172
+
173
+ aio_context_acquire(qemu_get_aio_context());
174
bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
175
+ aio_context_release(qemu_get_aio_context());
176
+
177
blk_remove_bs(blk);
178
blk_insert_bs(blk, bs, &error_fatal);
179
bdrv_unref(bs);
180
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/tests/unit/test-block-iothread.c
183
+++ b/tests/unit/test-block-iothread.c
184
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
185
qdict_put_str(options, "driver", "raw");
186
qdict_put_str(options, "file", "base");
187
188
+ /* FIXME raw_open() should take ctx's lock internally */
189
aio_context_acquire(ctx);
190
+ aio_context_acquire(main_ctx);
191
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
192
+ aio_context_release(main_ctx);
193
aio_context_release(ctx);
194
195
g_assert(blk_get_aio_context(blk) == ctx);
196
--
256
--
197
2.40.1
257
2.13.6
258
259
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
bdrv_drained_begin() waits for the completion of requests in the whole
2
subtree, but it only actually keeps its immediate bs parameter quiesced
3
until bdrv_drained_end().
2
4
3
The FUSE export calls blk_exp_ref/unref() without the AioContext lock.
5
Add a version that keeps the whole subtree drained. As of this commit,
4
Instead of fixing the FUSE export, adjust blk_exp_ref/unref() so they
6
graph changes cannot be allowed during a subtree drained section, but
5
work without the AioContext lock. This way it's less error-prone.
7
this will be fixed soon.
6
8
7
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20230516190238.8401-15-stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
include/block/export.h | 2 ++
11
include/block/block.h | 13 +++++++++++++
14
block/export/export.c | 13 ++++++-------
12
block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++-----------
15
block/export/vduse-blk.c | 4 ----
13
2 files changed, 56 insertions(+), 11 deletions(-)
16
3 files changed, 8 insertions(+), 11 deletions(-)
17
14
18
diff --git a/include/block/export.h b/include/block/export.h
15
diff --git a/include/block/block.h b/include/block/block.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/export.h
17
--- a/include/block/block.h
21
+++ b/include/block/export.h
18
+++ b/include/block/block.h
22
@@ -XXX,XX +XXX,XX @@ struct BlockExport {
19
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
23
* Reference count for this block export. This includes strong references
20
void bdrv_drained_begin(BlockDriverState *bs);
24
* both from the owner (qemu-nbd or the monitor) and clients connected to
21
25
* the export.
22
/**
26
+ *
23
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
27
+ * Use atomics to access this field.
24
+ * exclusive access to all child nodes as well.
28
*/
25
+ *
29
int refcount;
26
+ * Graph changes are not allowed during a subtree drain section.
30
27
+ */
31
diff --git a/block/export/export.c b/block/export/export.c
28
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
29
+
30
+/**
31
* bdrv_drained_end:
32
*
33
* End a quiescent section started by bdrv_drained_begin().
34
*/
35
void bdrv_drained_end(BlockDriverState *bs);
36
37
+/**
38
+ * End a quiescent section started by bdrv_subtree_drained_begin().
39
+ */
40
+void bdrv_subtree_drained_end(BlockDriverState *bs);
41
+
42
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
43
Error **errp);
44
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
45
diff --git a/block/io.c b/block/io.c
32
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
33
--- a/block/export/export.c
47
--- a/block/io.c
34
+++ b/block/export/export.c
48
+++ b/block/io.c
35
@@ -XXX,XX +XXX,XX @@ fail:
49
@@ -XXX,XX +XXX,XX @@ typedef struct {
36
return NULL;
50
BlockDriverState *bs;
51
bool done;
52
bool begin;
53
+ bool recursive;
54
BdrvChild *parent;
55
} BdrvCoDrainData;
56
57
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
58
return waited;
37
}
59
}
38
60
39
-/* Callers must hold exp->ctx lock */
61
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
40
void blk_exp_ref(BlockExport *exp)
62
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
63
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
64
+ BdrvChild *parent);
65
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
66
+ BdrvChild *parent);
67
68
static void bdrv_co_drain_bh_cb(void *opaque)
41
{
69
{
42
- assert(exp->refcount > 0);
70
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
43
- exp->refcount++;
71
44
+ assert(qatomic_read(&exp->refcount) > 0);
72
bdrv_dec_in_flight(bs);
45
+ qatomic_inc(&exp->refcount);
73
if (data->begin) {
74
- bdrv_do_drained_begin(bs, data->parent);
75
+ bdrv_do_drained_begin(bs, data->recursive, data->parent);
76
} else {
77
- bdrv_do_drained_end(bs, data->parent);
78
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
79
}
80
81
data->done = true;
82
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
46
}
83
}
47
84
48
/* Runs in the main thread */
85
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
49
@@ -XXX,XX +XXX,XX @@ static void blk_exp_delete_bh(void *opaque)
86
- bool begin, BdrvChild *parent)
50
aio_context_release(aio_context);
87
+ bool begin, bool recursive,
88
+ BdrvChild *parent)
89
{
90
BdrvCoDrainData data;
91
92
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
93
.bs = bs,
94
.done = false,
95
.begin = begin,
96
+ .recursive = recursive,
97
.parent = parent,
98
};
99
bdrv_inc_in_flight(bs);
100
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
101
assert(data.done);
51
}
102
}
52
103
53
-/* Callers must hold exp->ctx lock */
104
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
54
void blk_exp_unref(BlockExport *exp)
105
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
106
+ BdrvChild *parent)
55
{
107
{
56
- assert(exp->refcount > 0);
108
+ BdrvChild *child, *next;
57
- if (--exp->refcount == 0) {
109
+
58
+ assert(qatomic_read(&exp->refcount) > 0);
110
if (qemu_in_coroutine()) {
59
+ if (qatomic_fetch_dec(&exp->refcount) == 1) {
111
- bdrv_co_yield_to_drain(bs, true, parent);
60
/* Touch the block_exports list only in the main thread */
112
+ bdrv_co_yield_to_drain(bs, true, recursive, parent);
61
aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh,
113
return;
62
exp);
63
@@ -XXX,XX +XXX,XX @@ void qmp_block_export_del(const char *id,
64
if (!has_mode) {
65
mode = BLOCK_EXPORT_REMOVE_MODE_SAFE;
66
}
114
}
67
- if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) {
115
68
+ if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE &&
116
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
69
+ qatomic_read(&exp->refcount) > 1) {
117
bdrv_parent_drained_begin(bs, parent);
70
error_setg(errp, "export '%s' still in use", exp->id);
118
bdrv_drain_invoke(bs, true, false);
71
error_append_hint(errp, "Use mode='hard' to force client "
119
bdrv_drain_recurse(bs);
72
"disconnect\n");
120
+
73
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
121
+ if (recursive) {
74
index XXXXXXX..XXXXXXX 100644
122
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
75
--- a/block/export/vduse-blk.c
123
+ bdrv_do_drained_begin(child->bs, true, child);
76
+++ b/block/export/vduse-blk.c
124
+ }
77
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
125
+ }
126
}
127
128
void bdrv_drained_begin(BlockDriverState *bs)
78
{
129
{
79
if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
130
- bdrv_do_drained_begin(bs, NULL);
80
/* Prevent export from being deleted */
131
+ bdrv_do_drained_begin(bs, false, NULL);
81
- aio_context_acquire(vblk_exp->export.ctx);
132
+}
82
blk_exp_ref(&vblk_exp->export);
133
+
83
- aio_context_release(vblk_exp->export.ctx);
134
+void bdrv_subtree_drained_begin(BlockDriverState *bs)
135
+{
136
+ bdrv_do_drained_begin(bs, true, NULL);
137
}
138
139
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
140
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
141
+ BdrvChild *parent)
142
{
143
+ BdrvChild *child, *next;
144
int old_quiesce_counter;
145
146
if (qemu_in_coroutine()) {
147
- bdrv_co_yield_to_drain(bs, false, parent);
148
+ bdrv_co_yield_to_drain(bs, false, recursive, parent);
149
return;
84
}
150
}
151
assert(bs->quiesce_counter > 0);
152
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
153
if (old_quiesce_counter == 1) {
154
aio_enable_external(bdrv_get_aio_context(bs));
155
}
156
+
157
+ if (recursive) {
158
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
159
+ bdrv_do_drained_end(child->bs, true, child);
160
+ }
161
+ }
85
}
162
}
86
163
87
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
164
void bdrv_drained_end(BlockDriverState *bs)
88
aio_wait_kick();
165
{
89
166
- bdrv_do_drained_end(bs, NULL);
90
/* Now the export can be deleted */
167
+ bdrv_do_drained_end(bs, false, NULL);
91
- aio_context_acquire(vblk_exp->export.ctx);
168
+}
92
blk_exp_unref(&vblk_exp->export);
169
+
93
- aio_context_release(vblk_exp->export.ctx);
170
+void bdrv_subtree_drained_end(BlockDriverState *bs)
94
}
171
+{
172
+ bdrv_do_drained_end(bs, true, NULL);
95
}
173
}
96
174
175
/*
97
--
176
--
98
2.40.1
177
2.13.6
178
179
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Add a subtree drain version to the existing test cases.
2
2
3
vhost-user activity must be suspended during bdrv_drained_begin/end().
4
This prevents new requests from interfering with whatever is happening
5
in the drained section.
6
7
Previously this was done using aio_set_fd_handler()'s is_external
8
argument. In a multi-queue block layer world the aio_disable_external()
9
API cannot be used since multiple AioContext may be processing I/O, not
10
just one.
11
12
Switch to BlockDevOps->drained_begin/end() callbacks.
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Message-Id: <20230516190238.8401-8-stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
4
---
19
block/export/vhost-user-blk-server.c | 28 ++++++++++++++++++++++++++--
5
tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++-
20
util/vhost-user-server.c | 10 +++++-----
6
1 file changed, 26 insertions(+), 1 deletion(-)
21
2 files changed, 31 insertions(+), 7 deletions(-)
22
7
23
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
24
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
25
--- a/block/export/vhost-user-blk-server.c
10
--- a/tests/test-bdrv-drain.c
26
+++ b/block/export/vhost-user-blk-server.c
11
+++ b/tests/test-bdrv-drain.c
27
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
12
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
28
{
13
enum drain_type {
29
VuBlkExport *vexp = opaque;
14
BDRV_DRAIN_ALL,
30
15
BDRV_DRAIN,
31
+ /*
16
+ BDRV_SUBTREE_DRAIN,
32
+ * The actual attach will happen in vu_blk_drained_end() and we just
17
DRAIN_TYPE_MAX,
33
+ * restore ctx here.
18
};
34
+ */
19
35
vexp->export.ctx = ctx;
20
@@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
36
- vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
21
switch (drain_type) {
22
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
23
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
24
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
25
default: g_assert_not_reached();
26
}
37
}
27
}
38
28
@@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
39
static void blk_aio_detach(void *opaque)
29
switch (drain_type) {
40
{
30
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
41
VuBlkExport *vexp = opaque;
31
case BDRV_DRAIN: bdrv_drained_end(bs); break;
42
32
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
43
- vhost_user_server_detach_aio_context(&vexp->vu_server);
33
default: g_assert_not_reached();
44
+ /*
34
}
45
+ * The actual detach already happened in vu_blk_drained_begin() but from
46
+ * this point on we must not access ctx anymore.
47
+ */
48
vexp->export.ctx = NULL;
49
}
35
}
50
36
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
51
@@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque)
37
test_drv_cb_common(BDRV_DRAIN, false);
52
vu_config_change_msg(&vexp->vu_server.vu_dev);
53
}
38
}
54
39
55
+/* Called with vexp->export.ctx acquired */
40
+static void test_drv_cb_drain_subtree(void)
56
+static void vu_blk_drained_begin(void *opaque)
57
+{
41
+{
58
+ VuBlkExport *vexp = opaque;
42
+ test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
59
+
60
+ vhost_user_server_detach_aio_context(&vexp->vu_server);
61
+}
43
+}
62
+
44
+
63
+/* Called with vexp->export.blk AioContext acquired */
45
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
64
+static void vu_blk_drained_end(void *opaque)
46
{
47
BlockBackend *blk;
48
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
49
test_quiesce_common(BDRV_DRAIN, false);
50
}
51
52
+static void test_quiesce_drain_subtree(void)
65
+{
53
+{
66
+ VuBlkExport *vexp = opaque;
54
+ test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
67
+
68
+ vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
69
+}
55
+}
70
+
56
+
71
/*
57
static void test_nested(void)
72
* Ensures that bdrv_drained_begin() waits until in-flight requests complete.
58
{
73
*
59
BlockBackend *blk;
74
@@ -XXX,XX +XXX,XX @@ static bool vu_blk_drained_poll(void *opaque)
60
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
61
/* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
62
int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
63
(inner != BDRV_DRAIN_ALL);
64
- int backing_quiesce = 0;
65
+ int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
66
+ (inner == BDRV_SUBTREE_DRAIN);
67
int backing_cb_cnt = (outer != BDRV_DRAIN) +
68
(inner != BDRV_DRAIN);
69
70
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void)
71
test_blockjob_common(BDRV_DRAIN);
75
}
72
}
76
73
77
static const BlockDevOps vu_blk_dev_ops = {
74
+static void test_blockjob_drain_subtree(void)
78
+ .drained_begin = vu_blk_drained_begin,
75
+{
79
+ .drained_end = vu_blk_drained_end,
76
+ test_blockjob_common(BDRV_SUBTREE_DRAIN);
80
.drained_poll = vu_blk_drained_poll,
77
+}
81
.resize_cb = vu_blk_exp_resize,
78
+
82
};
79
int main(int argc, char **argv)
83
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
80
{
84
index XXXXXXX..XXXXXXX 100644
81
bdrv_init();
85
--- a/util/vhost-user-server.c
82
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
86
+++ b/util/vhost-user-server.c
83
87
@@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
84
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
88
vu_fd_watch->fd = fd;
85
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
89
vu_fd_watch->cb = cb;
86
+ g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
90
qemu_socket_set_nonblock(fd);
87
+ test_drv_cb_drain_subtree);
91
- aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
88
92
+ aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler,
89
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
93
NULL, NULL, NULL, vu_fd_watch);
90
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
94
vu_fd_watch->vu_dev = vu_dev;
91
+ g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
95
vu_fd_watch->pvt = pvt;
92
+ test_quiesce_drain_subtree);
96
@@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd)
93
97
if (!vu_fd_watch) {
94
g_test_add_func("/bdrv-drain/nested", test_nested);
98
return;
95
99
}
96
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
100
- aio_set_fd_handler(server->ioc->ctx, fd, true,
97
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
101
+ aio_set_fd_handler(server->ioc->ctx, fd, false,
98
+ g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
102
NULL, NULL, NULL, NULL, NULL);
99
+ test_blockjob_drain_subtree);
103
100
104
QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
101
return g_test_run();
105
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server)
102
}
106
VuFdWatch *vu_fd_watch;
107
108
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
109
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
110
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
111
NULL, NULL, NULL, NULL, vu_fd_watch);
112
}
113
114
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
115
qio_channel_attach_aio_context(server->ioc, ctx);
116
117
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
118
- aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
119
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL,
120
NULL, NULL, vu_fd_watch);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server)
124
VuFdWatch *vu_fd_watch;
125
126
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
127
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
128
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
129
NULL, NULL, NULL, NULL, vu_fd_watch);
130
}
131
132
--
103
--
133
2.40.1
104
2.13.6
105
106
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
If bdrv_do_drained_begin/end() are called in coroutine context, they
2
first use a BH to get out of the coroutine context. Call some existing
3
tests again from a coroutine to cover this code path.
2
4
3
vduse_blk_detach_ctx() waits for in-flight requests using
4
AIO_WAIT_WHILE(). This is not allowed according to a comment in
5
bdrv_set_aio_context_commit():
6
7
/*
8
* Take the old AioContex when detaching it from bs.
9
* At this point, new_context lock is already acquired, and we are now
10
* also taking old_context. This is safe as long as bdrv_detach_aio_context
11
* does not call AIO_POLL_WHILE().
12
*/
13
14
Use this opportunity to rewrite the drain code in vduse-blk:
15
16
- Use the BlockExport refcount so that vduse_blk_exp_delete() is only
17
called when there are no more requests in flight.
18
19
- Implement .drained_poll() so in-flight request coroutines are stopped
20
by the time .bdrv_detach_aio_context() is called.
21
22
- Remove AIO_WAIT_WHILE() from vduse_blk_detach_ctx() to solve the
23
.bdrv_detach_aio_context() constraint violation. It's no longer
24
needed due to the previous changes.
25
26
- Always handle the VDUSE file descriptor, even in drained sections. The
27
VDUSE file descriptor doesn't submit I/O, so it's safe to handle it in
28
drained sections. This ensures that the VDUSE kernel code gets a fast
29
response.
30
31
- Suspend virtqueue fd handlers in .drained_begin() and resume them in
32
.drained_end(). This eliminates the need for the
33
aio_set_fd_handler(is_external=true) flag, which is being removed from
34
QEMU.
35
36
This is a long list but splitting it into individual commits would
37
probably lead to git bisect failures - the changes are all related.
38
39
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
40
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
41
Message-Id: <20230516190238.8401-14-stefanha@redhat.com>
42
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
43
---
6
---
44
block/export/vduse-blk.c | 132 +++++++++++++++++++++++++++------------
7
tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
45
1 file changed, 93 insertions(+), 39 deletions(-)
8
1 file changed, 59 insertions(+)
46
9
47
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
48
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
49
--- a/block/export/vduse-blk.c
12
--- a/tests/test-bdrv-drain.c
50
+++ b/block/export/vduse-blk.c
13
+++ b/tests/test-bdrv-drain.c
51
@@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkExport {
14
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
52
VduseDev *dev;
15
*aio_ret = ret;
53
uint16_t num_queues;
54
char *recon_file;
55
- unsigned int inflight;
56
+ unsigned int inflight; /* atomic */
57
+ bool vqs_started;
58
} VduseBlkExport;
59
60
typedef struct VduseBlkReq {
61
@@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkReq {
62
63
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
64
{
65
- vblk_exp->inflight++;
66
+ if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
67
+ /* Prevent export from being deleted */
68
+ aio_context_acquire(vblk_exp->export.ctx);
69
+ blk_exp_ref(&vblk_exp->export);
70
+ aio_context_release(vblk_exp->export.ctx);
71
+ }
72
}
16
}
73
17
74
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
18
+typedef struct CallInCoroutineData {
75
{
19
+ void (*entry)(void);
76
- if (--vblk_exp->inflight == 0) {
20
+ bool done;
77
+ if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
21
+} CallInCoroutineData;
78
+ /* Wake AIO_WAIT_WHILE() */
79
aio_wait_kick();
80
+
22
+
81
+ /* Now the export can be deleted */
23
+static coroutine_fn void call_in_coroutine_entry(void *opaque)
82
+ aio_context_acquire(vblk_exp->export.ctx);
24
+{
83
+ blk_exp_unref(&vblk_exp->export);
25
+ CallInCoroutineData *data = opaque;
84
+ aio_context_release(vblk_exp->export.ctx);
85
}
86
}
87
88
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
89
{
90
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
91
92
+ if (!vblk_exp->vqs_started) {
93
+ return; /* vduse_blk_drained_end() will start vqs later */
94
+ }
95
+
26
+
96
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
27
+ data->entry();
97
- true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
28
+ data->done = true;
98
+ false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
99
/* Make sure we don't miss any kick afer reconnecting */
100
eventfd_write(vduse_queue_get_fd(vq), 1);
101
}
102
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
103
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
104
{
105
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
106
+ int fd = vduse_queue_get_fd(vq);
107
108
- aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
109
- true, NULL, NULL, NULL, NULL, NULL);
110
+ if (fd < 0) {
111
+ return;
112
+ }
113
+
114
+ aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
115
+ NULL, NULL, NULL, NULL, NULL);
116
}
117
118
static const VduseOps vduse_blk_ops = {
119
@@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque)
120
121
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
122
{
123
- int i;
124
-
125
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
126
- true, on_vduse_dev_kick, NULL, NULL, NULL,
127
+ false, on_vduse_dev_kick, NULL, NULL, NULL,
128
vblk_exp->dev);
129
130
- for (i = 0; i < vblk_exp->num_queues; i++) {
131
- VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
132
- int fd = vduse_queue_get_fd(vq);
133
-
134
- if (fd < 0) {
135
- continue;
136
- }
137
- aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
138
- on_vduse_vq_kick, NULL, NULL, NULL, vq);
139
- }
140
+ /* Virtqueues are handled by vduse_blk_drained_end() */
141
}
142
143
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
144
{
145
- int i;
146
-
147
- for (i = 0; i < vblk_exp->num_queues; i++) {
148
- VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
149
- int fd = vduse_queue_get_fd(vq);
150
-
151
- if (fd < 0) {
152
- continue;
153
- }
154
- aio_set_fd_handler(vblk_exp->export.ctx, fd,
155
- true, NULL, NULL, NULL, NULL, NULL);
156
- }
157
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
158
- true, NULL, NULL, NULL, NULL, NULL);
159
+ false, NULL, NULL, NULL, NULL, NULL);
160
161
- AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
162
+ /* Virtqueues are handled by vduse_blk_drained_begin() */
163
}
164
165
166
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_resize(void *opaque)
167
(char *)&config.capacity);
168
}
169
170
+static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
171
+{
172
+ for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
173
+ VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
174
+ vduse_blk_disable_queue(vblk_exp->dev, vq);
175
+ }
176
+
177
+ vblk_exp->vqs_started = false;
178
+}
29
+}
179
+
30
+
180
+static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
31
+static void call_in_coroutine(void (*entry)(void))
181
+{
32
+{
182
+ vblk_exp->vqs_started = true;
33
+ Coroutine *co;
34
+ CallInCoroutineData data = {
35
+ .entry = entry,
36
+ .done = false,
37
+ };
183
+
38
+
184
+ for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
39
+ co = qemu_coroutine_create(call_in_coroutine_entry, &data);
185
+ VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
40
+ qemu_coroutine_enter(co);
186
+ vduse_blk_enable_queue(vblk_exp->dev, vq);
41
+ while (!data.done) {
42
+ aio_poll(qemu_get_aio_context(), true);
187
+ }
43
+ }
188
+}
44
+}
189
+
45
+
190
+static void vduse_blk_drained_begin(void *opaque)
46
enum drain_type {
47
BDRV_DRAIN_ALL,
48
BDRV_DRAIN,
49
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void)
50
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
51
}
52
53
+static void test_drv_cb_co_drain(void)
191
+{
54
+{
192
+ BlockExport *exp = opaque;
55
+ call_in_coroutine(test_drv_cb_drain);
193
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
194
+
195
+ vduse_blk_stop_virtqueues(vblk_exp);
196
+}
56
+}
197
+
57
+
198
+static void vduse_blk_drained_end(void *opaque)
58
+static void test_drv_cb_co_drain_subtree(void)
199
+{
59
+{
200
+ BlockExport *exp = opaque;
60
+ call_in_coroutine(test_drv_cb_drain_subtree);
201
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
202
+
203
+ vduse_blk_start_virtqueues(vblk_exp);
204
+}
61
+}
205
+
62
+
206
+static bool vduse_blk_drained_poll(void *opaque)
63
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
64
{
65
BlockBackend *blk;
66
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void)
67
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
68
}
69
70
+static void test_quiesce_co_drain(void)
207
+{
71
+{
208
+ BlockExport *exp = opaque;
72
+ call_in_coroutine(test_quiesce_drain);
209
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
210
+
211
+ return qatomic_read(&vblk_exp->inflight) > 0;
212
+}
73
+}
213
+
74
+
214
static const BlockDevOps vduse_block_ops = {
75
+static void test_quiesce_co_drain_subtree(void)
215
- .resize_cb = vduse_blk_resize,
76
+{
216
+ .resize_cb = vduse_blk_resize,
77
+ call_in_coroutine(test_quiesce_drain_subtree);
217
+ .drained_begin = vduse_blk_drained_begin,
78
+}
218
+ .drained_end = vduse_blk_drained_end,
219
+ .drained_poll = vduse_blk_drained_poll,
220
};
221
222
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
223
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
224
vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
225
vblk_exp->handler.logical_block_size = logical_block_size;
226
vblk_exp->handler.writable = opts->writable;
227
+ vblk_exp->vqs_started = true;
228
229
config.capacity =
230
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
231
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
232
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
233
}
234
235
- aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
236
+ aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false,
237
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
238
239
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
240
vblk_exp);
241
-
242
blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
243
244
+ /*
245
+ * We handle draining ourselves using an in-flight counter and by disabling
246
+ * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
247
+ * complete so the in-flight counter reaches zero.
248
+ */
249
+ blk_set_disable_request_queuing(exp->blk, true);
250
+
79
+
251
return 0;
80
static void test_nested(void)
252
err:
81
{
253
vduse_dev_destroy(vblk_exp->dev);
82
BlockBackend *blk;
254
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp)
83
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
255
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
84
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
256
int ret;
85
test_drv_cb_drain_subtree);
257
86
258
+ assert(qatomic_read(&vblk_exp->inflight) == 0);
87
+ // XXX bdrv_drain_all() doesn't work in coroutine context
88
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
89
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
90
+ test_drv_cb_co_drain_subtree);
259
+
91
+
260
+ vduse_blk_detach_ctx(vblk_exp);
92
+
261
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
93
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
262
vblk_exp);
94
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
263
ret = vduse_dev_destroy(vblk_exp->dev);
95
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
264
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp)
96
test_quiesce_drain_subtree);
265
g_free(vblk_exp->handler.serial);
97
266
}
98
+ // XXX bdrv_drain_all() doesn't work in coroutine context
267
99
+ g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
268
+/* Called with exp->ctx acquired */
100
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
269
static void vduse_blk_exp_request_shutdown(BlockExport *exp)
101
+ test_quiesce_co_drain_subtree);
270
{
102
+
271
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
103
g_test_add_func("/bdrv-drain/nested", test_nested);
272
104
273
- aio_context_acquire(vblk_exp->export.ctx);
105
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
274
- vduse_blk_detach_ctx(vblk_exp);
275
- aio_context_acquire(vblk_exp->export.ctx);
276
+ vduse_blk_stop_virtqueues(vblk_exp);
277
}
278
279
const BlockExportDriver blk_exp_vduse_blk = {
280
--
106
--
281
2.40.1
107
2.13.6
108
109
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Test that drain sections are correctly propagated through the graph.
2
2
3
Detach ioeventfds during drained sections to stop I/O submission from
4
the guest. virtio-blk is no longer reliant on aio_disable_external()
5
after this patch. This will allow us to remove the
6
aio_disable_external() API once all other code that relies on it is
7
converted.
8
9
Take extra care to avoid attaching/detaching ioeventfds if the data
10
plane is started/stopped during a drained section. This should be rare,
11
but maybe the mirror block job can trigger it.
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-Id: <20230516190238.8401-18-stefanha@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
4
---
17
hw/block/dataplane/virtio-blk.c | 16 ++++++++------
5
tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
18
hw/block/virtio-blk.c | 38 ++++++++++++++++++++++++++++++++-
6
1 file changed, 74 insertions(+)
19
2 files changed, 47 insertions(+), 7 deletions(-)
20
7
21
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
22
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/block/dataplane/virtio-blk.c
10
--- a/tests/test-bdrv-drain.c
24
+++ b/hw/block/dataplane/virtio-blk.c
11
+++ b/tests/test-bdrv-drain.c
25
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
12
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
26
}
13
blk_unref(blk);
27
28
/* Get this show started by hooking up our callbacks */
29
- aio_context_acquire(s->ctx);
30
- for (i = 0; i < nvqs; i++) {
31
- VirtQueue *vq = virtio_get_queue(s->vdev, i);
32
+ if (!blk_in_drain(s->conf->conf.blk)) {
33
+ aio_context_acquire(s->ctx);
34
+ for (i = 0; i < nvqs; i++) {
35
+ VirtQueue *vq = virtio_get_queue(s->vdev, i);
36
37
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
38
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
39
+ }
40
+ aio_context_release(s->ctx);
41
}
42
- aio_context_release(s->ctx);
43
return 0;
44
45
fail_aio_context:
46
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
47
s->stopping = true;
48
trace_virtio_blk_data_plane_stop(s);
49
50
- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
51
+ if (!blk_in_drain(s->conf->conf.blk)) {
52
+ aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
53
+ }
54
55
aio_context_acquire(s->ctx);
56
57
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/block/virtio-blk.c
60
+++ b/hw/block/virtio-blk.c
61
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_resize(void *opaque)
62
aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
63
}
14
}
64
15
65
+/* Suspend virtqueue ioeventfd processing during drain */
16
+static void test_multiparent(void)
66
+static void virtio_blk_drained_begin(void *opaque)
67
+{
17
+{
68
+ VirtIOBlock *s = opaque;
18
+ BlockBackend *blk_a, *blk_b;
69
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
19
+ BlockDriverState *bs_a, *bs_b, *backing;
70
+ AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
20
+ BDRVTestState *a_s, *b_s, *backing_s;
71
+
21
+
72
+ if (!s->dataplane || !s->dataplane_started) {
22
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
73
+ return;
23
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
74
+ }
24
+ &error_abort);
25
+ a_s = bs_a->opaque;
26
+ blk_insert_bs(blk_a, bs_a, &error_abort);
75
+
27
+
76
+ for (uint16_t i = 0; i < s->conf.num_queues; i++) {
28
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
77
+ VirtQueue *vq = virtio_get_queue(vdev, i);
29
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
78
+ virtio_queue_aio_detach_host_notifier(vq, ctx);
30
+ &error_abort);
79
+ }
31
+ b_s = bs_b->opaque;
32
+ blk_insert_bs(blk_b, bs_b, &error_abort);
33
+
34
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
35
+ backing_s = backing->opaque;
36
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
37
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
38
+
39
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
40
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
41
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(a_s->drain_count, ==, 0);
43
+ g_assert_cmpint(b_s->drain_count, ==, 0);
44
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
45
+
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
47
+
48
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
49
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
50
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
51
+ g_assert_cmpint(a_s->drain_count, ==, 1);
52
+ g_assert_cmpint(b_s->drain_count, ==, 1);
53
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
54
+
55
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
56
+
57
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
58
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
59
+ g_assert_cmpint(backing->quiesce_counter, ==, 2);
60
+ g_assert_cmpint(a_s->drain_count, ==, 2);
61
+ g_assert_cmpint(b_s->drain_count, ==, 2);
62
+ g_assert_cmpint(backing_s->drain_count, ==, 2);
63
+
64
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
65
+
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
69
+ g_assert_cmpint(a_s->drain_count, ==, 1);
70
+ g_assert_cmpint(b_s->drain_count, ==, 1);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
74
+
75
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
76
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
77
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
78
+ g_assert_cmpint(a_s->drain_count, ==, 0);
79
+ g_assert_cmpint(b_s->drain_count, ==, 0);
80
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
81
+
82
+ bdrv_unref(backing);
83
+ bdrv_unref(bs_a);
84
+ bdrv_unref(bs_b);
85
+ blk_unref(blk_a);
86
+ blk_unref(blk_b);
80
+}
87
+}
81
+
88
+
82
+/* Resume virtqueue ioeventfd processing after drain */
89
83
+static void virtio_blk_drained_end(void *opaque)
90
typedef struct TestBlockJob {
84
+{
91
BlockJob common;
85
+ VirtIOBlock *s = opaque;
92
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
86
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
93
test_quiesce_co_drain_subtree);
87
+ AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
94
88
+
95
g_test_add_func("/bdrv-drain/nested", test_nested);
89
+ if (!s->dataplane || !s->dataplane_started) {
96
+ g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
90
+ return;
97
91
+ }
98
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
92
+
99
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
93
+ for (uint16_t i = 0; i < s->conf.num_queues; i++) {
94
+ VirtQueue *vq = virtio_get_queue(vdev, i);
95
+ virtio_queue_aio_attach_host_notifier(vq, ctx);
96
+ }
97
+}
98
+
99
static const BlockDevOps virtio_block_ops = {
100
- .resize_cb = virtio_blk_resize,
101
+ .resize_cb = virtio_blk_resize,
102
+ .drained_begin = virtio_blk_drained_begin,
103
+ .drained_end = virtio_blk_drained_end,
104
};
105
106
static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
107
--
100
--
108
2.40.1
101
2.13.6
102
103
diff view generated by jsdifflib
1
These functions specify that the caller must hold the "@filename
1
We need to remember how many of the drain sections in which a node is
2
AioContext lock". This doesn't make sense, file names don't have an
2
were recursive (i.e. subtree drain rather than node drain), so that they
3
AioContext. New BlockDriverStates always start in the main AioContext,
3
can be correctly applied when children are added or removed during the
4
so this is what we really need here.
4
drained section.
5
5
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
With this change, it is safe to modify the graph even inside a
7
Message-Id: <20230525124713.401149-3-kwolf@redhat.com>
7
bdrv_subtree_drained_begin/end() section.
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
10
---
11
block.c | 10 ++--------
11
include/block/block.h | 2 --
12
1 file changed, 2 insertions(+), 8 deletions(-)
12
include/block/block_int.h | 5 +++++
13
13
block.c | 32 +++++++++++++++++++++++++++++---
14
block/io.c | 28 ++++++++++++++++++++++++----
15
4 files changed, 58 insertions(+), 9 deletions(-)
16
17
diff --git a/include/block/block.h b/include/block/block.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block.h
20
+++ b/include/block/block.h
21
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs);
22
/**
23
* Like bdrv_drained_begin, but recursively begins a quiesced section for
24
* exclusive access to all child nodes as well.
25
- *
26
- * Graph changes are not allowed during a subtree drain section.
27
*/
28
void bdrv_subtree_drained_begin(BlockDriverState *bs);
29
30
diff --git a/include/block/block_int.h b/include/block/block_int.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/include/block/block_int.h
33
+++ b/include/block/block_int.h
34
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
35
36
/* Accessed with atomic ops. */
37
int quiesce_counter;
38
+ int recursive_quiesce_counter;
39
+
40
unsigned int write_gen; /* Current data generation */
41
42
/* Protected by reqs_lock. */
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
44
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
45
BdrvRequestFlags flags);
46
47
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
48
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
49
+
50
int get_tmp_filename(char *filename, int size);
51
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
52
const char *filename);
14
diff --git a/block.c b/block.c
53
diff --git a/block.c b/block.c
15
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
55
--- a/block.c
17
+++ b/block.c
56
+++ b/block.c
18
@@ -XXX,XX +XXX,XX @@ out:
57
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
19
* should be opened. If specified, neither options nor a filename may be given,
58
bdrv_drained_end(bs);
20
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
59
}
21
*
60
22
- * The caller must always hold @filename AioContext lock, because this
61
+static void bdrv_child_cb_attach(BdrvChild *child)
23
- * function eventually calls bdrv_refresh_total_sectors() which polls
62
+{
24
- * when called from non-coroutine context.
63
+ BlockDriverState *bs = child->opaque;
25
+ * The caller must always hold the main AioContext lock.
64
+ bdrv_apply_subtree_drain(child, bs);
26
*/
65
+}
27
static BlockDriverState * no_coroutine_fn
66
+
28
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
67
+static void bdrv_child_cb_detach(BdrvChild *child)
29
@@ -XXX,XX +XXX,XX @@ close_and_fail:
68
+{
30
return NULL;
69
+ BlockDriverState *bs = child->opaque;
31
}
70
+ bdrv_unapply_subtree_drain(child, bs);
32
71
+}
33
-/*
72
+
34
- * The caller must always hold @filename AioContext lock, because this
73
static int bdrv_child_cb_inactivate(BdrvChild *child)
35
- * function eventually calls bdrv_refresh_total_sectors() which polls
36
- * when called from non-coroutine context.
37
- */
38
+/* The caller must always hold the main AioContext lock. */
39
BlockDriverState *bdrv_open(const char *filename, const char *reference,
40
QDict *options, int flags, Error **errp)
41
{
74
{
75
BlockDriverState *bs = child->opaque;
76
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = {
77
.inherit_options = bdrv_inherited_options,
78
.drained_begin = bdrv_child_cb_drained_begin,
79
.drained_end = bdrv_child_cb_drained_end,
80
+ .attach = bdrv_child_cb_attach,
81
+ .detach = bdrv_child_cb_detach,
82
.inactivate = bdrv_child_cb_inactivate,
83
};
84
85
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = {
86
.inherit_options = bdrv_inherited_fmt_options,
87
.drained_begin = bdrv_child_cb_drained_begin,
88
.drained_end = bdrv_child_cb_drained_end,
89
+ .attach = bdrv_child_cb_attach,
90
+ .detach = bdrv_child_cb_detach,
91
.inactivate = bdrv_child_cb_inactivate,
92
};
93
94
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c)
95
parent->backing_blocker);
96
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
97
parent->backing_blocker);
98
+
99
+ bdrv_child_cb_attach(c);
100
}
101
102
static void bdrv_backing_detach(BdrvChild *c)
103
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c)
104
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
105
error_free(parent->backing_blocker);
106
parent->backing_blocker = NULL;
107
+
108
+ bdrv_child_cb_detach(c);
109
}
110
111
/*
112
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
113
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
114
}
115
if (old_bs) {
116
+ /* Detach first so that the recursive drain sections coming from @child
117
+ * are already gone and we only end the drain sections that came from
118
+ * elsewhere. */
119
+ if (child->role->detach) {
120
+ child->role->detach(child);
121
+ }
122
if (old_bs->quiesce_counter && child->role->drained_end) {
123
for (i = 0; i < old_bs->quiesce_counter; i++) {
124
child->role->drained_end(child);
125
}
126
}
127
- if (child->role->detach) {
128
- child->role->detach(child);
129
- }
130
QLIST_REMOVE(child, next_parent);
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
134
}
135
}
136
137
+ /* Attach only after starting new drained sections, so that recursive
138
+ * drain sections coming from @child don't get an extra .drained_begin
139
+ * callback. */
140
if (child->role->attach) {
141
child->role->attach(child);
142
}
143
diff --git a/block/io.c b/block/io.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
148
assert(data.done);
149
}
150
151
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
152
- BdrvChild *parent)
153
+void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
154
+ BdrvChild *parent)
155
{
156
BdrvChild *child, *next;
157
158
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
159
bdrv_drain_recurse(bs);
160
161
if (recursive) {
162
+ bs->recursive_quiesce_counter++;
163
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
164
bdrv_do_drained_begin(child->bs, true, child);
165
}
166
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
167
bdrv_do_drained_begin(bs, true, NULL);
168
}
169
170
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
171
- BdrvChild *parent)
172
+void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
173
+ BdrvChild *parent)
174
{
175
BdrvChild *child, *next;
176
int old_quiesce_counter;
177
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
178
}
179
180
if (recursive) {
181
+ bs->recursive_quiesce_counter--;
182
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
183
bdrv_do_drained_end(child->bs, true, child);
184
}
185
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
186
bdrv_do_drained_end(bs, true, NULL);
187
}
188
189
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
190
+{
191
+ int i;
192
+
193
+ for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
194
+ bdrv_do_drained_begin(child->bs, true, child);
195
+ }
196
+}
197
+
198
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
199
+{
200
+ int i;
201
+
202
+ for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
203
+ bdrv_do_drained_end(child->bs, true, child);
204
+ }
205
+}
206
+
207
/*
208
* Wait for pending requests to complete on a single BlockDriverState subtree,
209
* and suspend block driver's internal I/O until next request arrives.
42
--
210
--
43
2.40.1
211
2.13.6
212
213
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
3
The BlockBackend quiesce_counter is greater than zero during drained
4
sections. Add an API to check whether the BlockBackend is in a drained
5
section.
6
7
The next patch will use this API.
8
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Message-Id: <20230516190238.8401-10-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
2
---
14
include/sysemu/block-backend-global-state.h | 1 +
3
tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
15
block/block-backend.c | 7 +++++++
4
1 file changed, 80 insertions(+)
16
2 files changed, 8 insertions(+)
17
5
18
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
19
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
20
--- a/include/sysemu/block-backend-global-state.h
8
--- a/tests/test-bdrv-drain.c
21
+++ b/include/sysemu/block-backend-global-state.h
9
+++ b/tests/test-bdrv-drain.c
22
@@ -XXX,XX +XXX,XX @@ void blk_activate(BlockBackend *blk, Error **errp);
10
@@ -XXX,XX +XXX,XX @@ static void test_multiparent(void)
23
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
11
blk_unref(blk_b);
24
void blk_aio_cancel(BlockAIOCB *acb);
25
int blk_commit_all(void);
26
+bool blk_in_drain(BlockBackend *blk);
27
void blk_drain(BlockBackend *blk);
28
void blk_drain_all(void);
29
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
30
diff --git a/block/block-backend.c b/block/block-backend.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/block-backend.c
33
+++ b/block/block-backend.c
34
@@ -XXX,XX +XXX,XX @@ blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
35
return 0;
36
}
12
}
37
13
38
+/* Are we currently in a drained section? */
14
+static void test_graph_change(void)
39
+bool blk_in_drain(BlockBackend *blk)
40
+{
15
+{
41
+ GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
16
+ BlockBackend *blk_a, *blk_b;
42
+ return qatomic_read(&blk->quiesce_counter);
17
+ BlockDriverState *bs_a, *bs_b, *backing;
18
+ BDRVTestState *a_s, *b_s, *backing_s;
19
+
20
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
21
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
22
+ &error_abort);
23
+ a_s = bs_a->opaque;
24
+ blk_insert_bs(blk_a, bs_a, &error_abort);
25
+
26
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
27
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
28
+ &error_abort);
29
+ b_s = bs_b->opaque;
30
+ blk_insert_bs(blk_b, bs_b, &error_abort);
31
+
32
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
33
+ backing_s = backing->opaque;
34
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
35
+
36
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
37
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
38
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
39
+ g_assert_cmpint(a_s->drain_count, ==, 0);
40
+ g_assert_cmpint(b_s->drain_count, ==, 0);
41
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
42
+
43
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
44
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
45
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
47
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
48
+
49
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
50
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
51
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
52
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
53
+ g_assert_cmpint(a_s->drain_count, ==, 5);
54
+ g_assert_cmpint(b_s->drain_count, ==, 5);
55
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
56
+
57
+ bdrv_set_backing_hd(bs_b, NULL, &error_abort);
58
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
59
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
60
+ g_assert_cmpint(backing->quiesce_counter, ==, 3);
61
+ g_assert_cmpint(a_s->drain_count, ==, 3);
62
+ g_assert_cmpint(b_s->drain_count, ==, 2);
63
+ g_assert_cmpint(backing_s->drain_count, ==, 3);
64
+
65
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
69
+ g_assert_cmpint(a_s->drain_count, ==, 5);
70
+ g_assert_cmpint(b_s->drain_count, ==, 5);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
74
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
75
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
76
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
77
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
78
+
79
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
80
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
81
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
82
+ g_assert_cmpint(a_s->drain_count, ==, 0);
83
+ g_assert_cmpint(b_s->drain_count, ==, 0);
84
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
85
+
86
+ bdrv_unref(backing);
87
+ bdrv_unref(bs_a);
88
+ bdrv_unref(bs_b);
89
+ blk_unref(blk_a);
90
+ blk_unref(blk_b);
43
+}
91
+}
44
+
92
+
45
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
93
46
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
94
typedef struct TestBlockJob {
47
{
95
BlockJob common;
96
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
97
98
g_test_add_func("/bdrv-drain/nested", test_nested);
99
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
100
+ g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
101
102
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
103
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
48
--
104
--
49
2.40.1
105
2.13.6
106
107
diff view generated by jsdifflib
1
All of the functions that currently take a BlockDriverState, BdrvChild
1
Since commit bde70715, base is the only node that is reopened in
2
or BlockBackend as their first parameter expect the associated
2
commit_start(). This means that the code, which still involves an
3
AioContext to be locked when they are called. In the case of
3
explicit BlockReopenQueue, can now be simplified by using bdrv_reopen().
4
no_co_wrappers, they are called from bottom halves directly in the main
5
loop, so no other caller can be expected to take the lock for them. This
6
can result in assertion failures because a lock that isn't taken is
7
released in nested event loops.
8
9
Looking at the first parameter is already done by co_wrappers to decide
10
where the coroutine should run, so doing the same in no_co_wrappers is
11
only consistent. Take the lock in the generated bottom halves to fix the
12
problem.
13
4
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Message-Id: <20230525124713.401149-2-kwolf@redhat.com>
6
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
7
---
19
include/block/block-common.h | 3 +++
8
block/commit.c | 8 +-------
20
block/block-backend.c | 7 ++++++-
9
1 file changed, 1 insertion(+), 7 deletions(-)
21
scripts/block-coroutine-wrapper.py | 25 +++++++++++++++----------
22
3 files changed, 24 insertions(+), 11 deletions(-)
23
10
24
diff --git a/include/block/block-common.h b/include/block/block-common.h
11
diff --git a/block/commit.c b/block/commit.c
25
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/block-common.h
13
--- a/block/commit.c
27
+++ b/include/block/block-common.h
14
+++ b/block/commit.c
28
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
29
* scheduling a BH in the bottom half that runs the respective non-coroutine
16
const char *filter_node_name, Error **errp)
30
* function. The coroutine yields after scheduling the BH and is reentered when
31
* the wrapped function returns.
32
+ *
33
+ * If the first parameter of the function is a BlockDriverState, BdrvChild or
34
+ * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
35
*/
36
#define no_co_wrapper
37
38
diff --git a/block/block-backend.c b/block/block-backend.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/block-backend.c
41
+++ b/block/block-backend.c
42
@@ -XXX,XX +XXX,XX @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
43
44
AioContext *blk_get_aio_context(BlockBackend *blk)
45
{
17
{
46
- BlockDriverState *bs = blk_bs(blk);
18
CommitBlockJob *s;
47
+ BlockDriverState *bs;
19
- BlockReopenQueue *reopen_queue = NULL;
48
IO_CODE();
20
int orig_base_flags;
49
21
BlockDriverState *iter;
50
+ if (!blk) {
22
BlockDriverState *commit_top_bs = NULL;
51
+ return qemu_get_aio_context();
23
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
52
+ }
24
/* convert base to r/w, if necessary */
53
+
25
orig_base_flags = bdrv_get_flags(base);
54
+ bs = blk_bs(blk);
26
if (!(orig_base_flags & BDRV_O_RDWR)) {
55
if (bs) {
27
- reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
56
AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
28
- orig_base_flags | BDRV_O_RDWR);
57
assert(ctx == blk->ctx);
29
- }
58
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
30
-
59
index XXXXXXX..XXXXXXX 100644
31
- if (reopen_queue) {
60
--- a/scripts/block-coroutine-wrapper.py
32
- bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
61
+++ b/scripts/block-coroutine-wrapper.py
33
+ bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
62
@@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
34
if (local_err != NULL) {
63
raise ValueError(f"no_co function can't be rdlock: {self.name}")
35
error_propagate(errp, local_err);
64
self.target_name = f'{subsystem}_{subname}'
36
goto fail;
65
66
- t = self.args[0].type
67
- if t == 'BlockDriverState *':
68
- ctx = 'bdrv_get_aio_context(bs)'
69
- elif t == 'BdrvChild *':
70
- ctx = 'bdrv_get_aio_context(child->bs)'
71
- elif t == 'BlockBackend *':
72
- ctx = 'blk_get_aio_context(blk)'
73
- else:
74
- ctx = 'qemu_get_aio_context()'
75
- self.ctx = ctx
76
+ self.ctx = self.gen_ctx()
77
78
self.get_result = 's->ret = '
79
self.ret = 'return s.ret;'
80
@@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
81
self.co_ret = ''
82
self.return_field = ''
83
84
+ def gen_ctx(self, prefix: str = '') -> str:
85
+ t = self.args[0].type
86
+ if t == 'BlockDriverState *':
87
+ return f'bdrv_get_aio_context({prefix}bs)'
88
+ elif t == 'BdrvChild *':
89
+ return f'bdrv_get_aio_context({prefix}child->bs)'
90
+ elif t == 'BlockBackend *':
91
+ return f'blk_get_aio_context({prefix}blk)'
92
+ else:
93
+ return 'qemu_get_aio_context()'
94
+
95
def gen_list(self, format: str) -> str:
96
return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
97
98
@@ -XXX,XX +XXX,XX @@ def gen_no_co_wrapper(func: FuncDecl) -> str:
99
static void {name}_bh(void *opaque)
100
{{
101
{struct_name} *s = opaque;
102
+ AioContext *ctx = {func.gen_ctx('s->')};
103
104
+ aio_context_acquire(ctx);
105
{func.get_result}{name}({ func.gen_list('s->{name}') });
106
+ aio_context_release(ctx);
107
108
aio_co_wake(s->co);
109
}}
110
--
37
--
111
2.40.1
38
2.13.6
39
40
diff view generated by jsdifflib
1
bdrv_open_backing_file() calls bdrv_open_inherit(), so all callers must
1
The bdrv_reopen*() implementation doesn't like it if the graph is
2
hold the main AioContext lock.
2
changed between queuing nodes for reopen and actually reopening them
3
(one of the reasons is that queuing can be recursive).
4
5
So instead of draining the device only in bdrv_reopen_multiple(),
6
require that callers already drained all affected nodes, and assert this
7
in bdrv_reopen_queue().
3
8
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Message-Id: <20230525124713.401149-6-kwolf@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
11
---
9
block.c | 2 ++
12
block.c | 23 ++++++++++++++++-------
10
block/mirror.c | 6 ++++++
13
block/replication.c | 6 ++++++
11
2 files changed, 8 insertions(+)
14
qemu-io-cmds.c | 3 +++
15
3 files changed, 25 insertions(+), 7 deletions(-)
12
16
13
diff --git a/block.c b/block.c
17
diff --git a/block.c b/block.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
19
--- a/block.c
16
+++ b/block.c
20
+++ b/block.c
17
@@ -XXX,XX +XXX,XX @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
21
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
18
* itself, all options starting with "${bdref_key}." are considered part of the
22
* returns a pointer to bs_queue, which is either the newly allocated
19
* BlockdevRef.
23
* bs_queue, or the existing bs_queue being used.
20
*
24
*
21
+ * The caller must hold the main AioContext lock.
25
+ * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
22
+ *
23
* TODO Can this be unified with bdrv_open_image()?
24
*/
26
*/
25
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
27
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
26
diff --git a/block/mirror.c b/block/mirror.c
28
BlockDriverState *bs,
27
index XXXXXXX..XXXXXXX 100644
29
@@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
28
--- a/block/mirror.c
30
BdrvChild *child;
29
+++ b/block/mirror.c
31
QDict *old_options, *explicit_options;
30
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
32
31
bool abort = job->ret < 0;
33
+ /* Make sure that the caller remembered to use a drained section. This is
32
int ret = 0;
34
+ * important to avoid graph changes between the recursive queuing here and
33
35
+ * bdrv_reopen_multiple(). */
34
+ GLOBAL_STATE_CODE();
36
+ assert(bs->quiesce_counter > 0);
35
+
37
+
36
if (s->prepared) {
38
if (bs_queue == NULL) {
37
return 0;
39
bs_queue = g_new0(BlockReopenQueue, 1);
40
QSIMPLEQ_INIT(bs_queue);
41
@@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
42
* If all devices prepare successfully, then the changes are committed
43
* to all devices.
44
*
45
+ * All affected nodes must be drained between bdrv_reopen_queue() and
46
+ * bdrv_reopen_multiple().
47
*/
48
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
49
{
50
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
51
52
assert(bs_queue != NULL);
53
54
- aio_context_release(ctx);
55
- bdrv_drain_all_begin();
56
- aio_context_acquire(ctx);
57
-
58
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
59
+ assert(bs_entry->state.bs->quiesce_counter > 0);
60
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
61
error_propagate(errp, local_err);
62
goto cleanup;
63
@@ -XXX,XX +XXX,XX @@ cleanup:
38
}
64
}
39
s->prepared = true;
65
g_free(bs_queue);
40
66
41
+ aio_context_acquire(qemu_get_aio_context());
67
- bdrv_drain_all_end();
68
-
69
return ret;
70
}
71
72
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
73
{
74
int ret = -1;
75
Error *local_err = NULL;
76
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
77
+ BlockReopenQueue *queue;
78
79
+ bdrv_subtree_drained_begin(bs);
42
+
80
+
43
mirror_top_bs = s->mirror_top_bs;
81
+ queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
44
bs_opaque = mirror_top_bs->opaque;
82
ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
45
src = mirror_top_bs->backing->bs;
83
if (local_err != NULL) {
46
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
84
error_propagate(errp, local_err);
47
bdrv_unref(mirror_top_bs);
85
}
48
bdrv_unref(src);
86
+
49
87
+ bdrv_subtree_drained_end(bs);
50
+ aio_context_release(qemu_get_aio_context());
51
+
88
+
52
return ret;
89
return ret;
53
}
90
}
54
91
92
diff --git a/block/replication.c b/block/replication.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/replication.c
95
+++ b/block/replication.c
96
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
97
new_secondary_flags = s->orig_secondary_flags;
98
}
99
100
+ bdrv_subtree_drained_begin(s->hidden_disk->bs);
101
+ bdrv_subtree_drained_begin(s->secondary_disk->bs);
102
+
103
if (orig_hidden_flags != new_hidden_flags) {
104
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
105
new_hidden_flags);
106
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
107
reopen_queue, &local_err);
108
error_propagate(errp, local_err);
109
}
110
+
111
+ bdrv_subtree_drained_end(s->hidden_disk->bs);
112
+ bdrv_subtree_drained_end(s->secondary_disk->bs);
113
}
114
115
static void backup_job_cleanup(BlockDriverState *bs)
116
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/qemu-io-cmds.c
119
+++ b/qemu-io-cmds.c
120
@@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
121
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
122
qemu_opts_reset(&reopen_opts);
123
124
+ bdrv_subtree_drained_begin(bs);
125
brq = bdrv_reopen_queue(NULL, bs, opts, flags);
126
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
127
+ bdrv_subtree_drained_end(bs);
128
+
129
if (local_err) {
130
error_report_err(local_err);
131
} else {
55
--
132
--
56
2.40.1
133
2.13.6
134
135
diff view generated by jsdifflib