1 | The following changes since commit aa9bbd865502ed517624ab6fe7d4b5d89ca95e43: | 1 | The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-ppc-20230528' of https://gitlab.com/danielhb/qemu into staging (2023-05-29 14:31:52 -0700) | 3 | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to 60f782b6b78211c125970768be726c9f380dbd61: | 9 | for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8: |
10 | 10 | ||
11 | aio: remove aio_disable_external() API (2023-05-30 17:37:26 +0200) | 11 | block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches | 14 | Block layer patches |
15 | 15 | ||
16 | - Fix blockdev-create with iothreads | 16 | ---------------------------------------------------------------- |
17 | - Remove aio_disable_external() API | 17 | Doug Gale (1): |
18 | nvme: Add tracing | ||
18 | 19 | ||
19 | ---------------------------------------------------------------- | 20 | Edgar Kaziakhmedov (1): |
20 | Kevin Wolf (12): | 21 | qcow2: get rid of qcow2_backing_read1 routine |
21 | block-coroutine-wrapper: Take AioContext lock in no_co_wrappers | ||
22 | block: Clarify locking rules for bdrv_open(_inherit)() | ||
23 | block: Take main AioContext lock when calling bdrv_open() | ||
24 | block-backend: Fix blk_new_open() for iothreads | ||
25 | mirror: Hold main AioContext lock for calling bdrv_open_backing_file() | ||
26 | qcow2: Fix open with 'file' in iothread | ||
27 | raw-format: Fix open with 'file' in iothread | ||
28 | copy-before-write: Fix open with child in iothread | ||
29 | block: Take AioContext lock in bdrv_open_driver() | ||
30 | block: Fix AioContext locking in bdrv_insert_node() | ||
31 | iotests: Make verify_virtio_scsi_pci_or_ccw() public | ||
32 | iotests: Test blockdev-create in iothread | ||
33 | 22 | ||
34 | Stefan Hajnoczi (20): | 23 | Fam Zheng (2): |
35 | block-backend: split blk_do_set_aio_context() | 24 | block: Open backing image in force share mode for size probe |
36 | hw/qdev: introduce qdev_is_realized() helper | 25 | block: Remove unused bdrv_requests_pending |
37 | virtio-scsi: avoid race between unplug and transport event | ||
38 | virtio-scsi: stop using aio_disable_external() during unplug | ||
39 | util/vhost-user-server: rename refcount to in_flight counter | ||
40 | block/export: wait for vhost-user-blk requests when draining | ||
41 | block/export: stop using is_external in vhost-user-blk server | ||
42 | hw/xen: do not use aio_set_fd_handler(is_external=true) in xen_xenstore | ||
43 | block: add blk_in_drain() API | ||
44 | block: drain from main loop thread in bdrv_co_yield_to_drain() | ||
45 | xen-block: implement BlockDevOps->drained_begin() | ||
46 | hw/xen: do not set is_external=true on evtchn fds | ||
47 | block/export: rewrite vduse-blk drain code | ||
48 | block/export: don't require AioContext lock around blk_exp_ref/unref() | ||
49 | block/fuse: do not set is_external=true on FUSE fd | ||
50 | virtio: make it possible to detach host notifier from any thread | ||
51 | virtio-blk: implement BlockDevOps->drained_begin() | ||
52 | virtio-scsi: implement BlockDevOps->drained_begin() | ||
53 | virtio: do not set is_external=true on host notifiers | ||
54 | aio: remove aio_disable_external() API | ||
55 | 26 | ||
56 | hw/block/dataplane/xen-block.h | 2 + | 27 | John Snow (1): |
57 | include/block/aio.h | 57 ------------ | 28 | iotests: fix 197 for vpc |
58 | include/block/block-common.h | 3 + | 29 | |
59 | include/block/block_int-common.h | 72 +++++++-------- | 30 | Kevin Wolf (27): |
60 | include/block/export.h | 2 + | 31 | block: Formats don't need CONSISTENT_READ with NO_IO |
61 | include/hw/qdev-core.h | 17 +++- | 32 | block: Make bdrv_drain_invoke() recursive |
62 | include/hw/scsi/scsi.h | 14 +++ | 33 | block: Call .drain_begin only once in bdrv_drain_all_begin() |
63 | include/qemu/vhost-user-server.h | 8 +- | 34 | test-bdrv-drain: Test BlockDriver callbacks for drain |
64 | include/sysemu/block-backend-common.h | 25 ++--- | 35 | block: bdrv_drain_recurse(): Remove unused begin parameter |
65 | include/sysemu/block-backend-global-state.h | 1 + | 36 | block: Don't wait for requests in bdrv_drain*_end() |
66 | util/aio-posix.h | 1 - | 37 | block: Unify order in drain functions |
67 | block.c | 46 ++++++--- | 38 | block: Don't acquire AioContext in hmp_qemu_io() |
68 | block/blkio.c | 15 +-- | 39 | block: Document that x-blockdev-change breaks quorum children list |
69 | block/block-backend.c | 104 ++++++++++++--------- | 40 | block: Assert drain_all is only called from main AioContext |
70 | block/copy-before-write.c | 21 ++++- | 41 | block: Make bdrv_drain() driver callbacks non-recursive |
71 | block/curl.c | 10 +- | 42 | test-bdrv-drain: Test callback for bdrv_drain |
72 | block/export/export.c | 13 ++- | 43 | test-bdrv-drain: Test bs->quiesce_counter |
73 | block/export/fuse.c | 56 ++++++++++- | 44 | blockjob: Pause job on draining any job BDS |
74 | block/export/vduse-blk.c | 128 ++++++++++++++++++-------- | 45 | test-bdrv-drain: Test drain vs. block jobs |
75 | block/export/vhost-user-blk-server.c | 52 +++++++++-- | 46 | block: Don't block_job_pause_all() in bdrv_drain_all() |
76 | block/io.c | 16 ++-- | 47 | block: Nested drain_end must still call callbacks |
77 | block/io_uring.c | 4 +- | 48 | test-bdrv-drain: Test nested drain sections |
78 | block/iscsi.c | 3 +- | 49 | block: Don't notify parents in drain call chain |
79 | block/linux-aio.c | 4 +- | 50 | block: Add bdrv_subtree_drained_begin/end() |
80 | block/mirror.c | 6 ++ | 51 | test-bdrv-drain: Tests for bdrv_subtree_drain |
81 | block/nfs.c | 5 +- | 52 | test-bdrv-drain: Test behaviour in coroutine context |
82 | block/nvme.c | 8 +- | 53 | test-bdrv-drain: Recursive draining with multiple parents |
83 | block/qapi-sysemu.c | 3 + | 54 | block: Allow graph changes in subtree drained section |
84 | block/qcow2.c | 8 +- | 55 | test-bdrv-drain: Test graph changes in drained section |
85 | block/raw-format.c | 5 + | 56 | commit: Simplify reopen of base |
86 | block/ssh.c | 4 +- | 57 | block: Keep nodes drained between reopen_queue/multiple |
87 | block/win32-aio.c | 6 +- | 58 | |
88 | blockdev.c | 29 ++++-- | 59 | Thomas Huth (3): |
89 | hw/block/dataplane/virtio-blk.c | 23 +++-- | 60 | block: Remove the obsolete -drive boot=on|off parameter |
90 | hw/block/dataplane/xen-block.c | 42 ++++++--- | 61 | block: Remove the deprecated -hdachs option |
91 | hw/block/virtio-blk.c | 38 +++++++- | 62 | block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter |
92 | hw/block/xen-block.c | 24 ++++- | 63 | |
93 | hw/i386/kvm/xen_xenstore.c | 2 +- | 64 | qapi/block-core.json | 4 + |
94 | hw/scsi/scsi-bus.c | 46 ++++++++- | 65 | block/qcow2.h | 3 - |
95 | hw/scsi/scsi-disk.c | 27 +++++- | 66 | include/block/block.h | 15 +- |
96 | hw/scsi/virtio-scsi-dataplane.c | 32 +++++-- | 67 | include/block/block_int.h | 6 +- |
97 | hw/scsi/virtio-scsi.c | 127 +++++++++++++++++++------ | 68 | block.c | 75 ++++- |
98 | hw/virtio/virtio.c | 9 +- | 69 | block/commit.c | 8 +- |
99 | hw/xen/xen-bus.c | 11 ++- | 70 | block/io.c | 164 +++++++--- |
100 | io/channel-command.c | 6 +- | 71 | block/qcow2.c | 51 +-- |
101 | io/channel-file.c | 3 +- | 72 | block/replication.c | 6 + |
102 | io/channel-socket.c | 3 +- | 73 | blockdev.c | 11 - |
103 | migration/rdma.c | 16 ++-- | 74 | blockjob.c | 22 +- |
104 | qemu-nbd.c | 4 + | 75 | hmp.c | 6 - |
105 | tests/unit/test-aio.c | 27 +----- | 76 | hw/block/nvme.c | 349 +++++++++++++++++---- |
106 | tests/unit/test-bdrv-drain.c | 15 +-- | 77 | qemu-io-cmds.c | 3 + |
107 | tests/unit/test-block-iothread.c | 4 +- | 78 | tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++ |
108 | tests/unit/test-fdmon-epoll.c | 73 --------------- | 79 | vl.c | 86 +----- |
109 | tests/unit/test-nested-aio-poll.c | 9 +- | 80 | hw/block/trace-events | 93 ++++++ |
110 | util/aio-posix.c | 20 +--- | 81 | qemu-doc.texi | 29 +- |
111 | util/aio-win32.c | 8 +- | 82 | qemu-options.hx | 19 +- |
112 | util/async.c | 3 +- | 83 | tests/Makefile.include | 2 + |
113 | util/fdmon-epoll.c | 10 -- | 84 | tests/qemu-iotests/197 | 4 + |
114 | util/fdmon-io_uring.c | 8 +- | 85 | tests/qemu-iotests/common.filter | 3 +- |
115 | util/fdmon-poll.c | 3 +- | 86 | 22 files changed, 1294 insertions(+), 316 deletions(-) |
116 | util/main-loop.c | 7 +- | 87 | create mode 100644 tests/test-bdrv-drain.c |
117 | util/qemu-coroutine-io.c | 7 +- | 88 | |
118 | util/vhost-user-server.c | 33 ++++--- | ||
119 | scripts/block-coroutine-wrapper.py | 25 +++-- | ||
120 | tests/qemu-iotests/iotests.py | 2 +- | ||
121 | hw/scsi/trace-events | 2 + | ||
122 | tests/qemu-iotests/256 | 2 +- | ||
123 | tests/qemu-iotests/tests/iothreads-create | 67 ++++++++++++++ | ||
124 | tests/qemu-iotests/tests/iothreads-create.out | 4 + | ||
125 | tests/unit/meson.build | 3 - | ||
126 | 70 files changed, 931 insertions(+), 562 deletions(-) | ||
127 | delete mode 100644 tests/unit/test-fdmon-epoll.c | ||
128 | create mode 100755 tests/qemu-iotests/tests/iothreads-create | ||
129 | create mode 100644 tests/qemu-iotests/tests/iothreads-create.out | diff view generated by jsdifflib |
1 | While calling bdrv_new_open_driver_opts(), the main AioContext lock must | 1 | Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently |
---|---|---|---|
2 | be held, not the lock of the AioContext of the block subtree it will be | 2 | in use as a mirror target. It is not enough for image formats, though, |
3 | added to afterwards. | 3 | as these still unconditionally request BLK_PERM_CONSISTENT_READ. |
4 | |||
5 | As this permission is geared towards whether the guest-visible data is | ||
6 | consistent, and has no impact on whether the metadata is sane, and | ||
7 | 'qemu-img info' does not read guest-visible data (except for the raw | ||
8 | format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there | ||
9 | is not going to be any guest I/O performed, regardless of image format. | ||
4 | 10 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Message-Id: <20230525124713.401149-11-kwolf@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | --- | 12 | --- |
10 | block.c | 11 +++++++++++ | 13 | block.c | 6 +++++- |
11 | 1 file changed, 11 insertions(+) | 14 | 1 file changed, 5 insertions(+), 1 deletion(-) |
12 | 15 | ||
13 | diff --git a/block.c b/block.c | 16 | diff --git a/block.c b/block.c |
14 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block.c | 18 | --- a/block.c |
16 | +++ b/block.c | 19 | +++ b/block.c |
17 | @@ -XXX,XX +XXX,XX @@ static void bdrv_delete(BlockDriverState *bs) | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
18 | * empty set of options. The reference to the QDict belongs to the block layer | 21 | assert(role == &child_backing || role == &child_file); |
19 | * after the call (even on failure), so if the caller intends to reuse the | 22 | |
20 | * dictionary, it needs to use qobject_ref() before calling bdrv_open. | 23 | if (!backing) { |
21 | + * | 24 | + int flags = bdrv_reopen_get_flags(reopen_queue, bs); |
22 | + * The caller holds the AioContext lock for @bs. It must make sure that @bs | ||
23 | + * stays in the same AioContext, i.e. @options must not refer to nodes in a | ||
24 | + * different AioContext. | ||
25 | */ | ||
26 | BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, | ||
27 | int flags, Error **errp) | ||
28 | { | ||
29 | ERRP_GUARD(); | ||
30 | int ret; | ||
31 | + AioContext *ctx = bdrv_get_aio_context(bs); | ||
32 | BlockDriverState *new_node_bs = NULL; | ||
33 | const char *drvname, *node_name; | ||
34 | BlockDriver *drv; | ||
35 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, | ||
36 | |||
37 | GLOBAL_STATE_CODE(); | ||
38 | |||
39 | + aio_context_release(ctx); | ||
40 | + aio_context_acquire(qemu_get_aio_context()); | ||
41 | new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, | ||
42 | errp); | ||
43 | + aio_context_release(qemu_get_aio_context()); | ||
44 | + aio_context_acquire(ctx); | ||
45 | + assert(bdrv_get_aio_context(bs) == ctx); | ||
46 | + | 25 | + |
47 | options = NULL; /* bdrv_new_open_driver() eats options */ | 26 | /* Apart from the modifications below, the same permissions are |
48 | if (!new_node_bs) { | 27 | * forwarded and left alone as for filters */ |
49 | error_prepend(errp, "Could not create node: "); | 28 | bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, |
29 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | ||
30 | |||
31 | /* bs->file always needs to be consistent because of the metadata. We | ||
32 | * can never allow other users to resize or write to it. */ | ||
33 | - perm |= BLK_PERM_CONSISTENT_READ; | ||
34 | + if (!(flags & BDRV_O_NO_IO)) { | ||
35 | + perm |= BLK_PERM_CONSISTENT_READ; | ||
36 | + } | ||
37 | shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); | ||
38 | } else { | ||
39 | /* We want consistent read from backing files if the parent needs it. | ||
50 | -- | 40 | -- |
51 | 2.40.1 | 41 | 2.13.6 |
42 | |||
43 | diff view generated by jsdifflib |
1 | The AioContext lock must not be held for bdrv_open_child(), but it is | 1 | From: John Snow <jsnow@redhat.com> |
---|---|---|---|
2 | necessary for the following operations, in particular those using nested | ||
3 | event loops in coroutine wrappers. | ||
4 | 2 | ||
5 | Temporarily dropping the main AioContext lock is not necessary because | 3 | VPC has some difficulty creating geometries of particular size. |
6 | we know we run in the main thread. | 4 | However, we can indeed force it to use a literal one, so let's |
5 | do that for the sake of test 197, which is testing some specific | ||
6 | offsets. | ||
7 | 7 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: John Snow <jsnow@redhat.com> |
9 | Message-Id: <20230525124713.401149-9-kwolf@redhat.com> | 9 | Reviewed-by: Eric Blake <eblake@redhat.com> |
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Lukáš Doktor <ldoktor@redhat.com> | ||
12 | --- | 13 | --- |
13 | block/copy-before-write.c | 21 ++++++++++++++++----- | 14 | tests/qemu-iotests/197 | 4 ++++ |
14 | 1 file changed, 16 insertions(+), 5 deletions(-) | 15 | tests/qemu-iotests/common.filter | 3 ++- |
16 | 2 files changed, 6 insertions(+), 1 deletion(-) | ||
15 | 17 | ||
16 | diff --git a/block/copy-before-write.c b/block/copy-before-write.c | 18 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 |
19 | index XXXXXXX..XXXXXXX 100755 | ||
20 | --- a/tests/qemu-iotests/197 | ||
21 | +++ b/tests/qemu-iotests/197 | ||
22 | @@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ===' | ||
23 | echo | ||
24 | |||
25 | # Prep the images | ||
26 | +# VPC rounds image sizes to a specific geometry, force a specific size. | ||
27 | +if [ "$IMGFMT" = "vpc" ]; then | ||
28 | + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") | ||
29 | +fi | ||
30 | _make_test_img 4G | ||
31 | $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io | ||
32 | IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ | ||
33 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
17 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/copy-before-write.c | 35 | --- a/tests/qemu-iotests/common.filter |
19 | +++ b/block/copy-before-write.c | 36 | +++ b/tests/qemu-iotests/common.filter |
20 | @@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, | 37 | @@ -XXX,XX +XXX,XX @@ _filter_img_create() |
21 | int64_t cluster_size; | 38 | -e "s# log_size=[0-9]\\+##g" \ |
22 | g_autoptr(BlockdevOptions) full_opts = NULL; | 39 | -e "s# refcount_bits=[0-9]\\+##g" \ |
23 | BlockdevOptionsCbw *opts; | 40 | -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ |
24 | + AioContext *ctx; | 41 | - -e "s# iter-time=[0-9]\\+##g" |
25 | int ret; | 42 | + -e "s# iter-time=[0-9]\\+##g" \ |
26 | 43 | + -e "s# force_size=\\(on\\|off\\)##g" | |
27 | full_opts = cbw_parse_options(options, errp); | ||
28 | @@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, | ||
29 | return -EINVAL; | ||
30 | } | ||
31 | |||
32 | + ctx = bdrv_get_aio_context(bs); | ||
33 | + aio_context_acquire(ctx); | ||
34 | + | ||
35 | if (opts->bitmap) { | ||
36 | bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, | ||
37 | opts->bitmap->name, NULL, errp); | ||
38 | if (!bitmap) { | ||
39 | - return -EINVAL; | ||
40 | + ret = -EINVAL; | ||
41 | + goto out; | ||
42 | } | ||
43 | } | ||
44 | s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : | ||
45 | @@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, | ||
46 | s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); | ||
47 | if (!s->bcs) { | ||
48 | error_prepend(errp, "Cannot create block-copy-state: "); | ||
49 | - return -EINVAL; | ||
50 | + ret = -EINVAL; | ||
51 | + goto out; | ||
52 | } | ||
53 | |||
54 | cluster_size = block_copy_cluster_size(s->bcs); | ||
55 | |||
56 | s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); | ||
57 | if (!s->done_bitmap) { | ||
58 | - return -EINVAL; | ||
59 | + ret = -EINVAL; | ||
60 | + goto out; | ||
61 | } | ||
62 | bdrv_disable_dirty_bitmap(s->done_bitmap); | ||
63 | |||
64 | /* s->access_bitmap starts equal to bcs bitmap */ | ||
65 | s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); | ||
66 | if (!s->access_bitmap) { | ||
67 | - return -EINVAL; | ||
68 | + ret = -EINVAL; | ||
69 | + goto out; | ||
70 | } | ||
71 | bdrv_disable_dirty_bitmap(s->access_bitmap); | ||
72 | bdrv_dirty_bitmap_merge_internal(s->access_bitmap, | ||
73 | @@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, | ||
74 | qemu_co_mutex_init(&s->lock); | ||
75 | QLIST_INIT(&s->frozen_read_reqs); | ||
76 | |||
77 | - return 0; | ||
78 | + ret = 0; | ||
79 | +out: | ||
80 | + aio_context_release(ctx); | ||
81 | + return ret; | ||
82 | } | 44 | } |
83 | 45 | ||
84 | static void cbw_close(BlockDriverState *bs) | 46 | _filter_img_info() |
85 | -- | 47 | -- |
86 | 2.40.1 | 48 | 2.13.6 |
49 | |||
50 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | This change separates bdrv_drain_invoke(), which calls the BlockDriver |
---|---|---|---|
2 | drain callbacks, from bdrv_drain_recurse(). Instead, the function | ||
3 | performs its own recursion now. | ||
2 | 4 | ||
3 | Only report a transport reset event to the guest after the SCSIDevice | 5 | One reason for this is that bdrv_drain_recurse() can be called multiple |
4 | has been unrealized by qdev_simple_device_unplug_cb(). | 6 | times by bdrv_drain_all_begin(), but the callbacks may only be called |
7 | once. The separation is necessary to fix this bug. | ||
5 | 8 | ||
6 | qdev_simple_device_unplug_cb() sets the SCSIDevice's qdev.realized field | 9 | The other reason is that we intend to go to a model where we call all |
7 | to false so that scsi_device_find/get() no longer see it. | 10 | driver callbacks first, and only then start polling. This is not fully |
11 | achieved yet with this patch, as bdrv_drain_invoke() contains a | ||
12 | BDRV_POLL_WHILE() loop for the block driver callbacks, which can still | ||
13 | call callbacks for any unrelated event. It's a step in this direction | ||
14 | anyway. | ||
8 | 15 | ||
9 | scsi_target_emulate_report_luns() also needs to be updated to filter out | 16 | Cc: qemu-stable@nongnu.org |
10 | SCSIDevices that are unrealized. | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | ||
20 | block/io.c | 14 +++++++++++--- | ||
21 | 1 file changed, 11 insertions(+), 3 deletions(-) | ||
11 | 22 | ||
12 | Change virtio_scsi_push_event() to take event information as an argument | 23 | diff --git a/block/io.c b/block/io.c |
13 | instead of the SCSIDevice. This allows virtio_scsi_hotunplug() to emit a | ||
14 | VIRTIO_SCSI_T_TRANSPORT_RESET event after the SCSIDevice has already | ||
15 | been unrealized. | ||
16 | |||
17 | These changes ensure that the guest driver does not see the SCSIDevice | ||
18 | that's being unplugged if it responds very quickly to the transport | ||
19 | reset event. | ||
20 | |||
21 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
22 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
23 | Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru> | ||
24 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
25 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
26 | Message-Id: <20230516190238.8401-4-stefanha@redhat.com> | ||
27 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
28 | --- | ||
29 | hw/scsi/scsi-bus.c | 3 +- | ||
30 | hw/scsi/virtio-scsi.c | 86 ++++++++++++++++++++++++++++++------------- | ||
31 | 2 files changed, 63 insertions(+), 26 deletions(-) | ||
32 | |||
33 | diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/hw/scsi/scsi-bus.c | 25 | --- a/block/io.c |
36 | +++ b/hw/scsi/scsi-bus.c | 26 | +++ b/block/io.c |
37 | @@ -XXX,XX +XXX,XX @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) | 27 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
38 | DeviceState *qdev = kid->child; | 28 | bdrv_wakeup(bs); |
39 | SCSIDevice *dev = SCSI_DEVICE(qdev); | ||
40 | |||
41 | - if (dev->channel == channel && dev->id == id && dev->lun != 0) { | ||
42 | + if (dev->channel == channel && dev->id == id && dev->lun != 0 && | ||
43 | + qdev_is_realized(&dev->qdev)) { | ||
44 | store_lun(tmp, dev->lun); | ||
45 | g_byte_array_append(buf, tmp, 8); | ||
46 | len += 8; | ||
47 | diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/hw/scsi/virtio-scsi.c | ||
50 | +++ b/hw/scsi/virtio-scsi.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev) | ||
52 | s->events_dropped = false; | ||
53 | } | 29 | } |
54 | 30 | ||
55 | -static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, | 31 | +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
56 | - uint32_t event, uint32_t reason) | 32 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
57 | +typedef struct { | 33 | { |
58 | + uint32_t event; | 34 | + BdrvChild *child, *tmp; |
59 | + uint32_t reason; | 35 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
60 | + union { | 36 | |
61 | + /* Used by messages specific to a device */ | 37 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || |
62 | + struct { | 38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
63 | + uint32_t id; | 39 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); |
64 | + uint32_t lun; | 40 | bdrv_coroutine_enter(bs, data.co); |
65 | + } address; | 41 | BDRV_POLL_WHILE(bs, !data.done); |
66 | + }; | ||
67 | +} VirtIOSCSIEventInfo; | ||
68 | + | 42 | + |
69 | +static void virtio_scsi_push_event(VirtIOSCSI *s, | 43 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
70 | + const VirtIOSCSIEventInfo *info) | 44 | + bdrv_drain_invoke(child->bs, begin); |
71 | { | ||
72 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); | ||
73 | VirtIOSCSIReq *req; | ||
74 | VirtIOSCSIEvent *evt; | ||
75 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
76 | + uint32_t event = info->event; | ||
77 | + uint32_t reason = info->reason; | ||
78 | |||
79 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { | ||
80 | return; | ||
81 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, | ||
82 | memset(evt, 0, sizeof(VirtIOSCSIEvent)); | ||
83 | evt->event = virtio_tswap32(vdev, event); | ||
84 | evt->reason = virtio_tswap32(vdev, reason); | ||
85 | - if (!dev) { | ||
86 | - assert(event == VIRTIO_SCSI_T_EVENTS_MISSED); | ||
87 | - } else { | ||
88 | + if (event != VIRTIO_SCSI_T_EVENTS_MISSED) { | ||
89 | evt->lun[0] = 1; | ||
90 | - evt->lun[1] = dev->id; | ||
91 | + evt->lun[1] = info->address.id; | ||
92 | |||
93 | /* Linux wants us to keep the same encoding we use for REPORT LUNS. */ | ||
94 | - if (dev->lun >= 256) { | ||
95 | - evt->lun[2] = (dev->lun >> 8) | 0x40; | ||
96 | + if (info->address.lun >= 256) { | ||
97 | + evt->lun[2] = (info->address.lun >> 8) | 0x40; | ||
98 | } | ||
99 | - evt->lun[3] = dev->lun & 0xFF; | ||
100 | + evt->lun[3] = info->address.lun & 0xFF; | ||
101 | } | ||
102 | trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason); | ||
103 | - | ||
104 | + | ||
105 | virtio_scsi_complete_req(req); | ||
106 | } | ||
107 | |||
108 | static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) | ||
109 | { | ||
110 | if (s->events_dropped) { | ||
111 | - virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); | ||
112 | + VirtIOSCSIEventInfo info = { | ||
113 | + .event = VIRTIO_SCSI_T_NO_EVENT, | ||
114 | + }; | ||
115 | + virtio_scsi_push_event(s, &info); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) | ||
120 | |||
121 | if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) && | ||
122 | dev->type != TYPE_ROM) { | ||
123 | + VirtIOSCSIEventInfo info = { | ||
124 | + .event = VIRTIO_SCSI_T_PARAM_CHANGE, | ||
125 | + .reason = sense.asc | (sense.ascq << 8), | ||
126 | + .address = { | ||
127 | + .id = dev->id, | ||
128 | + .lun = dev->lun, | ||
129 | + }, | ||
130 | + }; | ||
131 | + | ||
132 | virtio_scsi_acquire(s); | ||
133 | - virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, | ||
134 | - sense.asc | (sense.ascq << 8)); | ||
135 | + virtio_scsi_push_event(s, &info); | ||
136 | virtio_scsi_release(s); | ||
137 | } | ||
138 | } | ||
139 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, | ||
140 | } | ||
141 | |||
142 | if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { | ||
143 | + VirtIOSCSIEventInfo info = { | ||
144 | + .event = VIRTIO_SCSI_T_TRANSPORT_RESET, | ||
145 | + .reason = VIRTIO_SCSI_EVT_RESET_RESCAN, | ||
146 | + .address = { | ||
147 | + .id = sd->id, | ||
148 | + .lun = sd->lun, | ||
149 | + }, | ||
150 | + }; | ||
151 | + | ||
152 | virtio_scsi_acquire(s); | ||
153 | - virtio_scsi_push_event(s, sd, | ||
154 | - VIRTIO_SCSI_T_TRANSPORT_RESET, | ||
155 | - VIRTIO_SCSI_EVT_RESET_RESCAN); | ||
156 | + virtio_scsi_push_event(s, &info); | ||
157 | scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); | ||
158 | virtio_scsi_release(s); | ||
159 | } | ||
160 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, | ||
161 | VirtIOSCSI *s = VIRTIO_SCSI(vdev); | ||
162 | SCSIDevice *sd = SCSI_DEVICE(dev); | ||
163 | AioContext *ctx = s->ctx ?: qemu_get_aio_context(); | ||
164 | - | ||
165 | - if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { | ||
166 | - virtio_scsi_acquire(s); | ||
167 | - virtio_scsi_push_event(s, sd, | ||
168 | - VIRTIO_SCSI_T_TRANSPORT_RESET, | ||
169 | - VIRTIO_SCSI_EVT_RESET_REMOVED); | ||
170 | - scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); | ||
171 | - virtio_scsi_release(s); | ||
172 | - } | ||
173 | + VirtIOSCSIEventInfo info = { | ||
174 | + .event = VIRTIO_SCSI_T_TRANSPORT_RESET, | ||
175 | + .reason = VIRTIO_SCSI_EVT_RESET_REMOVED, | ||
176 | + .address = { | ||
177 | + .id = sd->id, | ||
178 | + .lun = sd->lun, | ||
179 | + }, | ||
180 | + }; | ||
181 | |||
182 | aio_disable_external(ctx); | ||
183 | qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); | ||
184 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, | ||
185 | blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); | ||
186 | virtio_scsi_release(s); | ||
187 | } | ||
188 | + | ||
189 | + if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { | ||
190 | + virtio_scsi_acquire(s); | ||
191 | + virtio_scsi_push_event(s, &info); | ||
192 | + scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); | ||
193 | + virtio_scsi_release(s); | ||
194 | + } | 45 | + } |
195 | } | 46 | } |
196 | 47 | ||
197 | static struct SCSIBusInfo virtio_scsi_scsi_info = { | 48 | static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
49 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | ||
50 | BdrvChild *child, *tmp; | ||
51 | bool waited; | ||
52 | |||
53 | - /* Ensure any pending metadata writes are submitted to bs->file. */ | ||
54 | - bdrv_drain_invoke(bs, begin); | ||
55 | - | ||
56 | /* Wait for drained requests to finish */ | ||
57 | waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); | ||
58 | |||
59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | ||
60 | bdrv_parent_drained_begin(bs); | ||
61 | } | ||
62 | |||
63 | + bdrv_drain_invoke(bs, true); | ||
64 | bdrv_drain_recurse(bs, true); | ||
65 | } | ||
66 | |||
67 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
68 | } | ||
69 | |||
70 | bdrv_parent_drained_end(bs); | ||
71 | + bdrv_drain_invoke(bs, false); | ||
72 | bdrv_drain_recurse(bs, false); | ||
73 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
74 | } | ||
75 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
76 | aio_context_acquire(aio_context); | ||
77 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
78 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
79 | + /* FIXME Calling this multiple times is wrong */ | ||
80 | + bdrv_drain_invoke(bs, true); | ||
81 | waited |= bdrv_drain_recurse(bs, true); | ||
82 | } | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
85 | aio_context_acquire(aio_context); | ||
86 | aio_enable_external(aio_context); | ||
87 | bdrv_parent_drained_end(bs); | ||
88 | + bdrv_drain_invoke(bs, false); | ||
89 | bdrv_drain_recurse(bs, false); | ||
90 | aio_context_release(aio_context); | ||
91 | } | ||
198 | -- | 92 | -- |
199 | 2.40.1 | 93 | 2.13.6 |
94 | |||
95 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver | ||
2 | callback inside its polling loop. This means that how many times it got | ||
3 | called for each node depended on long it had to poll the event loop. | ||
1 | 4 | ||
5 | This is obviously not right and results in nodes that stay drained even | ||
6 | after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per | ||
7 | node. | ||
8 | |||
9 | Fix bdrv_drain_all_begin() to call the callback only once, too. | ||
10 | |||
11 | Cc: qemu-stable@nongnu.org | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | --- | ||
15 | block/io.c | 3 +-- | ||
16 | 1 file changed, 1 insertion(+), 2 deletions(-) | ||
17 | |||
18 | diff --git a/block/io.c b/block/io.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/io.c | ||
21 | +++ b/block/io.c | ||
22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
23 | aio_context_acquire(aio_context); | ||
24 | bdrv_parent_drained_begin(bs); | ||
25 | aio_disable_external(aio_context); | ||
26 | + bdrv_drain_invoke(bs, true); | ||
27 | aio_context_release(aio_context); | ||
28 | |||
29 | if (!g_slist_find(aio_ctxs, aio_context)) { | ||
30 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
31 | aio_context_acquire(aio_context); | ||
32 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
33 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
34 | - /* FIXME Calling this multiple times is wrong */ | ||
35 | - bdrv_drain_invoke(bs, true); | ||
36 | waited |= bdrv_drain_recurse(bs, true); | ||
37 | } | ||
38 | } | ||
39 | -- | ||
40 | 2.13.6 | ||
41 | |||
42 | diff view generated by jsdifflib |
1 | If blockdev-create references an existing node in an iothread (e.g. as | 1 | This adds a test case that the BlockDriver callbacks for drain are |
---|---|---|---|
2 | it's 'file' child), then suddenly all of the image creation code must | 2 | called in bdrv_drained_all_begin/end(), and that both of them are called |
3 | run in that AioContext, too. Test that this actually works. | 3 | exactly once. |
4 | 4 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Message-Id: <20230525124713.401149-13-kwolf@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Reviewed-by: Eric Blake <eblake@redhat.com> |
9 | --- | 8 | --- |
10 | tests/qemu-iotests/tests/iothreads-create | 67 +++++++++++++++++++ | 9 | tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ |
11 | tests/qemu-iotests/tests/iothreads-create.out | 4 ++ | 10 | tests/Makefile.include | 2 + |
12 | 2 files changed, 71 insertions(+) | 11 | 2 files changed, 139 insertions(+) |
13 | create mode 100755 tests/qemu-iotests/tests/iothreads-create | 12 | create mode 100644 tests/test-bdrv-drain.c |
14 | create mode 100644 tests/qemu-iotests/tests/iothreads-create.out | ||
15 | 13 | ||
16 | diff --git a/tests/qemu-iotests/tests/iothreads-create b/tests/qemu-iotests/tests/iothreads-create | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
17 | new file mode 100755 | ||
18 | index XXXXXXX..XXXXXXX | ||
19 | --- /dev/null | ||
20 | +++ b/tests/qemu-iotests/tests/iothreads-create | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | +#!/usr/bin/env python3 | ||
23 | +# group: rw quick | ||
24 | +# | ||
25 | +# Copyright (C) 2023 Red Hat, Inc. | ||
26 | +# | ||
27 | +# This program is free software; you can redistribute it and/or modify | ||
28 | +# it under the terms of the GNU General Public License as published by | ||
29 | +# the Free Software Foundation; either version 2 of the License, or | ||
30 | +# (at your option) any later version. | ||
31 | +# | ||
32 | +# This program is distributed in the hope that it will be useful, | ||
33 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
34 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
35 | +# GNU General Public License for more details. | ||
36 | +# | ||
37 | +# You should have received a copy of the GNU General Public License | ||
38 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
39 | +# | ||
40 | +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> | ||
41 | + | ||
42 | +import asyncio | ||
43 | +import iotests | ||
44 | + | ||
45 | +iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vdi', | ||
46 | + 'vmdk', 'parallels']) | ||
47 | +iotests.verify_virtio_scsi_pci_or_ccw() | ||
48 | + | ||
49 | +with iotests.FilePath('disk.img') as img_path, \ | ||
50 | + iotests.VM() as vm: | ||
51 | + | ||
52 | + iotests.qemu_img_create('-f', 'raw', img_path, '0') | ||
53 | + | ||
54 | + vm.add_object('iothread,id=iothread0') | ||
55 | + vm.add_blockdev(f'file,node-name=img-file,read-only=on,' | ||
56 | + f'filename={img_path}') | ||
57 | + vm.add_device('virtio-scsi,iothread=iothread0') | ||
58 | + vm.add_device('scsi-hd,drive=img-file,share-rw=on') | ||
59 | + | ||
60 | + vm.launch() | ||
61 | + | ||
62 | + iotests.log(vm.qmp( | ||
63 | + 'blockdev-reopen', | ||
64 | + options=[{ | ||
65 | + 'driver': 'file', | ||
66 | + 'filename': img_path, | ||
67 | + 'node-name': 'img-file', | ||
68 | + 'read-only': False, | ||
69 | + }], | ||
70 | + )) | ||
71 | + iotests.log(vm.qmp( | ||
72 | + 'blockdev-create', | ||
73 | + job_id='job0', | ||
74 | + options={ | ||
75 | + 'driver': iotests.imgfmt, | ||
76 | + 'file': 'img-file', | ||
77 | + 'size': 1024 * 1024, | ||
78 | + }, | ||
79 | + )) | ||
80 | + | ||
81 | + # Should succeed and not time out | ||
82 | + try: | ||
83 | + vm.run_job('job0', wait=5.0) | ||
84 | + vm.shutdown() | ||
85 | + except asyncio.TimeoutError: | ||
86 | + # VM may be stuck, kill it | ||
87 | + vm.kill() | ||
88 | + raise | ||
89 | diff --git a/tests/qemu-iotests/tests/iothreads-create.out b/tests/qemu-iotests/tests/iothreads-create.out | ||
90 | new file mode 100644 | 15 | new file mode 100644 |
91 | index XXXXXXX..XXXXXXX | 16 | index XXXXXXX..XXXXXXX |
92 | --- /dev/null | 17 | --- /dev/null |
93 | +++ b/tests/qemu-iotests/tests/iothreads-create.out | 18 | +++ b/tests/test-bdrv-drain.c |
94 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
95 | +{"return": {}} | 20 | +/* |
96 | +{"return": {}} | 21 | + * Block node draining tests |
97 | +{"execute": "job-dismiss", "arguments": {"id": "job0"}} | 22 | + * |
98 | +{"return": {}} | 23 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> |
24 | + * | ||
25 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
26 | + * of this software and associated documentation files (the "Software"), to deal | ||
27 | + * in the Software without restriction, including without limitation the rights | ||
28 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
29 | + * copies of the Software, and to permit persons to whom the Software is | ||
30 | + * furnished to do so, subject to the following conditions: | ||
31 | + * | ||
32 | + * The above copyright notice and this permission notice shall be included in | ||
33 | + * all copies or substantial portions of the Software. | ||
34 | + * | ||
35 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
36 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
37 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
38 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
39 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
40 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
41 | + * THE SOFTWARE. | ||
42 | + */ | ||
43 | + | ||
44 | +#include "qemu/osdep.h" | ||
45 | +#include "block/block.h" | ||
46 | +#include "sysemu/block-backend.h" | ||
47 | +#include "qapi/error.h" | ||
48 | + | ||
49 | +typedef struct BDRVTestState { | ||
50 | + int drain_count; | ||
51 | +} BDRVTestState; | ||
52 | + | ||
53 | +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | ||
54 | +{ | ||
55 | + BDRVTestState *s = bs->opaque; | ||
56 | + s->drain_count++; | ||
57 | +} | ||
58 | + | ||
59 | +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) | ||
60 | +{ | ||
61 | + BDRVTestState *s = bs->opaque; | ||
62 | + s->drain_count--; | ||
63 | +} | ||
64 | + | ||
65 | +static void bdrv_test_close(BlockDriverState *bs) | ||
66 | +{ | ||
67 | + BDRVTestState *s = bs->opaque; | ||
68 | + g_assert_cmpint(s->drain_count, >, 0); | ||
69 | +} | ||
70 | + | ||
71 | +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | ||
72 | + uint64_t offset, uint64_t bytes, | ||
73 | + QEMUIOVector *qiov, int flags) | ||
74 | +{ | ||
75 | + /* We want this request to stay until the polling loop in drain waits for | ||
76 | + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes | ||
77 | + * first and polls its result, too, but it shouldn't accidentally complete | ||
78 | + * this request yet. */ | ||
79 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | ||
80 | + | ||
81 | + return 0; | ||
82 | +} | ||
83 | + | ||
84 | +static BlockDriver bdrv_test = { | ||
85 | + .format_name = "test", | ||
86 | + .instance_size = sizeof(BDRVTestState), | ||
87 | + | ||
88 | + .bdrv_close = bdrv_test_close, | ||
89 | + .bdrv_co_preadv = bdrv_test_co_preadv, | ||
90 | + | ||
91 | + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | ||
92 | + .bdrv_co_drain_end = bdrv_test_co_drain_end, | ||
93 | +}; | ||
94 | + | ||
95 | +static void aio_ret_cb(void *opaque, int ret) | ||
96 | +{ | ||
97 | + int *aio_ret = opaque; | ||
98 | + *aio_ret = ret; | ||
99 | +} | ||
100 | + | ||
101 | +static void test_drv_cb_drain_all(void) | ||
102 | +{ | ||
103 | + BlockBackend *blk; | ||
104 | + BlockDriverState *bs; | ||
105 | + BDRVTestState *s; | ||
106 | + BlockAIOCB *acb; | ||
107 | + int aio_ret; | ||
108 | + | ||
109 | + QEMUIOVector qiov; | ||
110 | + struct iovec iov = { | ||
111 | + .iov_base = NULL, | ||
112 | + .iov_len = 0, | ||
113 | + }; | ||
114 | + qemu_iovec_init_external(&qiov, &iov, 1); | ||
115 | + | ||
116 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
117 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
118 | + &error_abort); | ||
119 | + s = bs->opaque; | ||
120 | + blk_insert_bs(blk, bs, &error_abort); | ||
121 | + | ||
122 | + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
123 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
124 | + bdrv_drain_all_begin(); | ||
125 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
126 | + bdrv_drain_all_end(); | ||
127 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
128 | + | ||
129 | + /* Now do the same while a request is pending */ | ||
130 | + aio_ret = -EINPROGRESS; | ||
131 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); | ||
132 | + g_assert(acb != NULL); | ||
133 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | ||
134 | + | ||
135 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
136 | + bdrv_drain_all_begin(); | ||
137 | + g_assert_cmpint(aio_ret, ==, 0); | ||
138 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
139 | + bdrv_drain_all_end(); | ||
140 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
141 | + | ||
142 | + bdrv_unref(bs); | ||
143 | + blk_unref(blk); | ||
144 | +} | ||
145 | + | ||
146 | +int main(int argc, char **argv) | ||
147 | +{ | ||
148 | + bdrv_init(); | ||
149 | + qemu_init_main_loop(&error_abort); | ||
150 | + | ||
151 | + g_test_init(&argc, &argv, NULL); | ||
152 | + | ||
153 | + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
154 | + | ||
155 | + return g_test_run(); | ||
156 | +} | ||
157 | diff --git a/tests/Makefile.include b/tests/Makefile.include | ||
158 | index XXXXXXX..XXXXXXX 100644 | ||
159 | --- a/tests/Makefile.include | ||
160 | +++ b/tests/Makefile.include | ||
161 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c | ||
162 | gcov-files-test-hbitmap-y = util/hbitmap.c | ||
163 | check-unit-y += tests/test-hbitmap$(EXESUF) | ||
164 | gcov-files-test-hbitmap-y = blockjob.c | ||
165 | +check-unit-y += tests/test-bdrv-drain$(EXESUF) | ||
166 | check-unit-y += tests/test-blockjob$(EXESUF) | ||
167 | check-unit-y += tests/test-blockjob-txn$(EXESUF) | ||
168 | check-unit-y += tests/test-x86-cpuid$(EXESUF) | ||
169 | @@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) | ||
170 | tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) | ||
171 | tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) | ||
172 | tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) | ||
173 | +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) | ||
174 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) | ||
175 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) | ||
176 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) | ||
99 | -- | 177 | -- |
100 | 2.40.1 | 178 | 2.13.6 |
179 | |||
180 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Now that the bdrv_drain_invoke() calls are pulled up to the callers of |
---|---|---|---|
2 | bdrv_drain_recurse(), the 'begin' parameter isn't needed any more. | ||
2 | 3 | ||
3 | virtio_queue_aio_detach_host_notifier() does two things: | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | 1. It removes the fd handler from the event loop. | 5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
5 | 2. It processes the virtqueue one last time. | 6 | --- |
7 | block/io.c | 12 ++++++------ | ||
8 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
6 | 9 | ||
7 | The first step can be peformed by any thread and without taking the | 10 | diff --git a/block/io.c b/block/io.c |
8 | AioContext lock. | ||
9 | |||
10 | The second step may need the AioContext lock (depending on the device | ||
11 | implementation) and runs in the thread where request processing takes | ||
12 | place. virtio-blk and virtio-scsi therefore call | ||
13 | virtio_queue_aio_detach_host_notifier() from a BH that is scheduled in | ||
14 | AioContext. | ||
15 | |||
16 | The next patch will introduce a .drained_begin() function that needs to | ||
17 | call virtio_queue_aio_detach_host_notifier(). .drained_begin() functions | ||
18 | cannot call aio_poll() to wait synchronously for the BH. It is possible | ||
19 | for a .drained_poll() callback to asynchronously wait for the BH, but | ||
20 | that is more complex than necessary here. | ||
21 | |||
22 | Move the virtqueue processing out to the callers of | ||
23 | virtio_queue_aio_detach_host_notifier() so that the function can be | ||
24 | called from any thread. This is in preparation for the next patch. | ||
25 | |||
26 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
27 | Message-Id: <20230516190238.8401-17-stefanha@redhat.com> | ||
28 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
29 | --- | ||
30 | hw/block/dataplane/virtio-blk.c | 7 +++++++ | ||
31 | hw/scsi/virtio-scsi-dataplane.c | 14 ++++++++++++++ | ||
32 | hw/virtio/virtio.c | 3 --- | ||
33 | 3 files changed, 21 insertions(+), 3 deletions(-) | ||
34 | |||
35 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/hw/block/dataplane/virtio-blk.c | 12 | --- a/block/io.c |
38 | +++ b/hw/block/dataplane/virtio-blk.c | 13 | +++ b/block/io.c |
39 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_data_plane_stop_bh(void *opaque) | 14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
40 | |||
41 | for (i = 0; i < s->conf->num_queues; i++) { | ||
42 | VirtQueue *vq = virtio_get_queue(s->vdev, i); | ||
43 | + EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); | ||
44 | |||
45 | virtio_queue_aio_detach_host_notifier(vq, s->ctx); | ||
46 | + | ||
47 | + /* | ||
48 | + * Test and clear notifier after disabling event, in case poll callback | ||
49 | + * didn't have time to run. | ||
50 | + */ | ||
51 | + virtio_queue_host_notifier_read(host_notifier); | ||
52 | } | 15 | } |
53 | } | 16 | } |
54 | 17 | ||
55 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c | 18 | -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
56 | index XXXXXXX..XXXXXXX 100644 | 19 | +static bool bdrv_drain_recurse(BlockDriverState *bs) |
57 | --- a/hw/scsi/virtio-scsi-dataplane.c | ||
58 | +++ b/hw/scsi/virtio-scsi-dataplane.c | ||
59 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_dataplane_stop_bh(void *opaque) | ||
60 | { | 20 | { |
61 | VirtIOSCSI *s = opaque; | 21 | BdrvChild *child, *tmp; |
62 | VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); | 22 | bool waited; |
63 | + EventNotifier *host_notifier; | 23 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
64 | int i; | 24 | */ |
65 | 25 | bdrv_ref(bs); | |
66 | virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx); | 26 | } |
67 | + host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq); | 27 | - waited |= bdrv_drain_recurse(bs, begin); |
68 | + | 28 | + waited |= bdrv_drain_recurse(bs); |
69 | + /* | 29 | if (in_main_loop) { |
70 | + * Test and clear notifier after disabling event, in case poll callback | 30 | bdrv_unref(bs); |
71 | + * didn't have time to run. | 31 | } |
72 | + */ | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
73 | + virtio_queue_host_notifier_read(host_notifier); | ||
74 | + | ||
75 | virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx); | ||
76 | + host_notifier = virtio_queue_get_host_notifier(vs->event_vq); | ||
77 | + virtio_queue_host_notifier_read(host_notifier); | ||
78 | + | ||
79 | for (i = 0; i < vs->conf.num_queues; i++) { | ||
80 | virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx); | ||
81 | + host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]); | ||
82 | + virtio_queue_host_notifier_read(host_notifier); | ||
83 | } | 33 | } |
34 | |||
35 | bdrv_drain_invoke(bs, true); | ||
36 | - bdrv_drain_recurse(bs, true); | ||
37 | + bdrv_drain_recurse(bs); | ||
84 | } | 38 | } |
85 | 39 | ||
86 | diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c | 40 | void bdrv_drained_end(BlockDriverState *bs) |
87 | index XXXXXXX..XXXXXXX 100644 | 41 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
88 | --- a/hw/virtio/virtio.c | 42 | |
89 | +++ b/hw/virtio/virtio.c | 43 | bdrv_parent_drained_end(bs); |
90 | @@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct | 44 | bdrv_drain_invoke(bs, false); |
91 | void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) | 45 | - bdrv_drain_recurse(bs, false); |
92 | { | 46 | + bdrv_drain_recurse(bs); |
93 | aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); | 47 | aio_enable_external(bdrv_get_aio_context(bs)); |
94 | - /* Test and clear notifier before after disabling event, | ||
95 | - * in case poll callback didn't have time to run. */ | ||
96 | - virtio_queue_host_notifier_read(&vq->host_notifier); | ||
97 | } | 48 | } |
98 | 49 | ||
99 | void virtio_queue_host_notifier_read(EventNotifier *n) | 50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
51 | aio_context_acquire(aio_context); | ||
52 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
53 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
54 | - waited |= bdrv_drain_recurse(bs, true); | ||
55 | + waited |= bdrv_drain_recurse(bs); | ||
56 | } | ||
57 | } | ||
58 | aio_context_release(aio_context); | ||
59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
60 | aio_enable_external(aio_context); | ||
61 | bdrv_parent_drained_end(bs); | ||
62 | bdrv_drain_invoke(bs, false); | ||
63 | - bdrv_drain_recurse(bs, false); | ||
64 | + bdrv_drain_recurse(bs); | ||
65 | aio_context_release(aio_context); | ||
66 | } | ||
67 | |||
100 | -- | 68 | -- |
101 | 2.40.1 | 69 | 2.13.6 |
70 | |||
71 | diff view generated by jsdifflib |
1 | When opening the 'file' child moves bs to an iothread, we need to hold | 1 | The device is drained, so there is no point in waiting for requests at |
---|---|---|---|
2 | the AioContext lock of it before we can call raw_apply_options() (and | 2 | the end of the drained section. Remove the bdrv_drain_recurse() calls |
3 | more specifically, bdrv_getlength() inside of it). | 3 | there. |
4 | |||
5 | The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e | ||
6 | in order to call the .bdrv_co_drain_end() driver callback. This is now | ||
7 | done by a separate bdrv_drain_invoke() call. | ||
4 | 8 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Message-Id: <20230525124713.401149-8-kwolf@redhat.com> | 10 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> |
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | --- | 12 | --- |
10 | block/raw-format.c | 5 +++++ | 13 | block/io.c | 2 -- |
11 | tests/unit/test-block-iothread.c | 3 --- | 14 | 1 file changed, 2 deletions(-) |
12 | 2 files changed, 5 insertions(+), 3 deletions(-) | ||
13 | 15 | ||
14 | diff --git a/block/raw-format.c b/block/raw-format.c | 16 | diff --git a/block/io.c b/block/io.c |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/raw-format.c | 18 | --- a/block/io.c |
17 | +++ b/block/raw-format.c | 19 | +++ b/block/io.c |
18 | @@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
19 | Error **errp) | 21 | |
20 | { | 22 | bdrv_parent_drained_end(bs); |
21 | BDRVRawState *s = bs->opaque; | 23 | bdrv_drain_invoke(bs, false); |
22 | + AioContext *ctx; | 24 | - bdrv_drain_recurse(bs); |
23 | bool has_size; | 25 | aio_enable_external(bdrv_get_aio_context(bs)); |
24 | uint64_t offset, size; | 26 | } |
25 | BdrvChildRole file_role; | 27 | |
26 | @@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
27 | bs->file->bs->filename); | 29 | aio_enable_external(aio_context); |
30 | bdrv_parent_drained_end(bs); | ||
31 | bdrv_drain_invoke(bs, false); | ||
32 | - bdrv_drain_recurse(bs); | ||
33 | aio_context_release(aio_context); | ||
28 | } | 34 | } |
29 | 35 | ||
30 | + ctx = bdrv_get_aio_context(bs); | ||
31 | + aio_context_acquire(ctx); | ||
32 | ret = raw_apply_options(bs, s, offset, has_size, size, errp); | ||
33 | + aio_context_release(ctx); | ||
34 | + | ||
35 | if (ret < 0) { | ||
36 | return ret; | ||
37 | } | ||
38 | diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/tests/unit/test-block-iothread.c | ||
41 | +++ b/tests/unit/test-block-iothread.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void) | ||
43 | qdict_put_str(options, "driver", "raw"); | ||
44 | qdict_put_str(options, "file", "base"); | ||
45 | |||
46 | - /* FIXME raw_open() should take ctx's lock internally */ | ||
47 | - aio_context_acquire(ctx); | ||
48 | aio_context_acquire(main_ctx); | ||
49 | filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); | ||
50 | aio_context_release(main_ctx); | ||
51 | - aio_context_release(ctx); | ||
52 | |||
53 | g_assert(blk_get_aio_context(blk) == ctx); | ||
54 | g_assert(bdrv_get_aio_context(bs) == ctx); | ||
55 | -- | 36 | -- |
56 | 2.40.1 | 37 | 2.13.6 |
38 | |||
39 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Drain requests are propagated to child nodes, parent nodes and directly |
---|---|---|---|
2 | to the AioContext. The order in which this happened was different | ||
3 | between all combinations of drain/drain_all and begin/end. | ||
2 | 4 | ||
3 | The VuServer object has a refcount field and ref/unref APIs. The name is | 5 | The correct order is to keep children only drained when their parents |
4 | confusing because it's actually an in-flight request counter instead of | 6 | are also drained. This means that at the start of a drained section, the |
5 | a refcount. | 7 | AioContext needs to be drained first, the parents second and only then |
8 | the children. The correct order for the end of a drained section is the | ||
9 | opposite. | ||
6 | 10 | ||
7 | Normally a refcount destroys the object upon reaching zero. The VuServer | 11 | This patch changes the three other functions to follow the example of |
8 | counter is used to wake up the vhost-user coroutine when there are no | 12 | bdrv_drained_begin(), which is the only one that got it right. |
9 | more requests. | ||
10 | 13 | ||
11 | Avoid confusing by renaming refcount and ref/unref to in_flight and | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | inc/dec. | 15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | ||
17 | block/io.c | 12 ++++++++---- | ||
18 | 1 file changed, 8 insertions(+), 4 deletions(-) | ||
13 | 19 | ||
14 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 20 | diff --git a/block/io.c b/block/io.c |
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | Message-Id: <20230516190238.8401-6-stefanha@redhat.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
20 | --- | ||
21 | include/qemu/vhost-user-server.h | 6 +++--- | ||
22 | block/export/vhost-user-blk-server.c | 11 +++++++---- | ||
23 | util/vhost-user-server.c | 14 +++++++------- | ||
24 | 3 files changed, 17 insertions(+), 14 deletions(-) | ||
25 | |||
26 | diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/include/qemu/vhost-user-server.h | 22 | --- a/block/io.c |
29 | +++ b/include/qemu/vhost-user-server.h | 23 | +++ b/block/io.c |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 24 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
31 | const VuDevIface *vu_iface; | ||
32 | |||
33 | /* Protected by ctx lock */ | ||
34 | - unsigned int refcount; | ||
35 | + unsigned int in_flight; | ||
36 | bool wait_idle; | ||
37 | VuDev vu_dev; | ||
38 | QIOChannel *ioc; /* The I/O channel with the client */ | ||
39 | @@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server, | ||
40 | |||
41 | void vhost_user_server_stop(VuServer *server); | ||
42 | |||
43 | -void vhost_user_server_ref(VuServer *server); | ||
44 | -void vhost_user_server_unref(VuServer *server); | ||
45 | +void vhost_user_server_inc_in_flight(VuServer *server); | ||
46 | +void vhost_user_server_dec_in_flight(VuServer *server); | ||
47 | |||
48 | void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); | ||
49 | void vhost_user_server_detach_aio_context(VuServer *server); | ||
50 | diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/block/export/vhost-user-blk-server.c | ||
53 | +++ b/block/export/vhost-user-blk-server.c | ||
54 | @@ -XXX,XX +XXX,XX @@ static void vu_blk_req_complete(VuBlkReq *req, size_t in_len) | ||
55 | free(req); | ||
56 | } | ||
57 | |||
58 | -/* Called with server refcount increased, must decrease before returning */ | ||
59 | +/* | ||
60 | + * Called with server in_flight counter increased, must decrease before | ||
61 | + * returning. | ||
62 | + */ | ||
63 | static void coroutine_fn vu_blk_virtio_process_req(void *opaque) | ||
64 | { | ||
65 | VuBlkReq *req = opaque; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) | ||
67 | in_num, out_num); | ||
68 | if (in_len < 0) { | ||
69 | free(req); | ||
70 | - vhost_user_server_unref(server); | ||
71 | + vhost_user_server_dec_in_flight(server); | ||
72 | return; | 25 | return; |
73 | } | 26 | } |
74 | 27 | ||
75 | vu_blk_req_complete(req, in_len); | 28 | + /* Stop things in parent-to-child order */ |
76 | - vhost_user_server_unref(server); | 29 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { |
77 | + vhost_user_server_dec_in_flight(server); | 30 | aio_disable_external(bdrv_get_aio_context(bs)); |
31 | bdrv_parent_drained_begin(bs); | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
33 | return; | ||
34 | } | ||
35 | |||
36 | - bdrv_parent_drained_end(bs); | ||
37 | + /* Re-enable things in child-to-parent order */ | ||
38 | bdrv_drain_invoke(bs, false); | ||
39 | + bdrv_parent_drained_end(bs); | ||
40 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
78 | } | 41 | } |
79 | 42 | ||
80 | static void vu_blk_process_vq(VuDev *vu_dev, int idx) | 43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
81 | @@ -XXX,XX +XXX,XX @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) | 44 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
82 | Coroutine *co = | 45 | AioContext *aio_context = bdrv_get_aio_context(bs); |
83 | qemu_coroutine_create(vu_blk_virtio_process_req, req); | 46 | |
84 | 47 | + /* Stop things in parent-to-child order */ | |
85 | - vhost_user_server_ref(server); | 48 | aio_context_acquire(aio_context); |
86 | + vhost_user_server_inc_in_flight(server); | 49 | - bdrv_parent_drained_begin(bs); |
87 | qemu_coroutine_enter(co); | 50 | aio_disable_external(aio_context); |
51 | + bdrv_parent_drained_begin(bs); | ||
52 | bdrv_drain_invoke(bs, true); | ||
53 | aio_context_release(aio_context); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
57 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
58 | |||
59 | + /* Re-enable things in child-to-parent order */ | ||
60 | aio_context_acquire(aio_context); | ||
61 | - aio_enable_external(aio_context); | ||
62 | - bdrv_parent_drained_end(bs); | ||
63 | bdrv_drain_invoke(bs, false); | ||
64 | + bdrv_parent_drained_end(bs); | ||
65 | + aio_enable_external(aio_context); | ||
66 | aio_context_release(aio_context); | ||
88 | } | 67 | } |
89 | } | ||
90 | diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/util/vhost-user-server.c | ||
93 | +++ b/util/vhost-user-server.c | ||
94 | @@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf) | ||
95 | error_report("vu_panic: %s", buf); | ||
96 | } | ||
97 | |||
98 | -void vhost_user_server_ref(VuServer *server) | ||
99 | +void vhost_user_server_inc_in_flight(VuServer *server) | ||
100 | { | ||
101 | assert(!server->wait_idle); | ||
102 | - server->refcount++; | ||
103 | + server->in_flight++; | ||
104 | } | ||
105 | |||
106 | -void vhost_user_server_unref(VuServer *server) | ||
107 | +void vhost_user_server_dec_in_flight(VuServer *server) | ||
108 | { | ||
109 | - server->refcount--; | ||
110 | - if (server->wait_idle && !server->refcount) { | ||
111 | + server->in_flight--; | ||
112 | + if (server->wait_idle && !server->in_flight) { | ||
113 | aio_co_wake(server->co_trip); | ||
114 | } | ||
115 | } | ||
116 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque) | ||
117 | /* Keep running */ | ||
118 | } | ||
119 | |||
120 | - if (server->refcount) { | ||
121 | + if (server->in_flight) { | ||
122 | /* Wait for requests to complete before we can unmap the memory */ | ||
123 | server->wait_idle = true; | ||
124 | qemu_coroutine_yield(); | ||
125 | server->wait_idle = false; | ||
126 | } | ||
127 | - assert(server->refcount == 0); | ||
128 | + assert(server->in_flight == 0); | ||
129 | |||
130 | vu_deinit(vu_dev); | ||
131 | 68 | ||
132 | -- | 69 | -- |
133 | 2.40.1 | 70 | 2.13.6 |
134 | 71 | ||
135 | 72 | diff view generated by jsdifflib |
1 | This fixes blk_new_open() to not assume that bs is in the main context. | 1 | Commit 15afd94a047 added code to acquire and release the AioContext in |
---|---|---|---|
2 | qemuio_command(). This means that the lock is taken twice now in the | ||
3 | call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for | ||
4 | any requests issued to nodes in a non-mainloop AioContext. | ||
2 | 5 | ||
3 | In particular, the BlockBackend must be created with the right | 6 | Dropping the first locking from hmp_qemu_io() fixes the problem. |
4 | AioContext because it will refuse to move to a different context | ||
5 | afterwards. (blk->allow_aio_context_change is false.) | ||
6 | |||
7 | Use this opportunity to use blk_insert_bs() instead of duplicating the | ||
8 | bdrv_root_attach_child() call. This is consistent with what | ||
9 | blk_new_with_bs() does. Add comments to document the locking rules. | ||
10 | 7 | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Message-Id: <20230525124713.401149-5-kwolf@redhat.com> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
15 | --- | 10 | --- |
16 | block/block-backend.c | 27 +++++++++++++++++++++------ | 11 | hmp.c | 6 ------ |
17 | 1 file changed, 21 insertions(+), 6 deletions(-) | 12 | 1 file changed, 6 deletions(-) |
18 | 13 | ||
19 | diff --git a/block/block-backend.c b/block/block-backend.c | 14 | diff --git a/hmp.c b/hmp.c |
20 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/block-backend.c | 16 | --- a/hmp.c |
22 | +++ b/block/block-backend.c | 17 | +++ b/hmp.c |
23 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) | 18 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
24 | * Both sets of permissions can be changed later using blk_set_perm(). | ||
25 | * | ||
26 | * Return the new BlockBackend on success, null on failure. | ||
27 | + * | ||
28 | + * Callers must hold the AioContext lock of @bs. | ||
29 | */ | ||
30 | BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, | ||
31 | uint64_t shared_perm, Error **errp) | ||
32 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, | ||
33 | |||
34 | /* | ||
35 | * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. | ||
36 | - * The new BlockBackend is in the main AioContext. | ||
37 | + * By default, the new BlockBackend is in the main AioContext, but if the | ||
38 | + * parameters connect it with any existing node in a different AioContext, it | ||
39 | + * may end up there instead. | ||
40 | * | ||
41 | * Just as with bdrv_open(), after having called this function the reference to | ||
42 | * @options belongs to the block layer (even on failure). | ||
43 | * | ||
44 | + * Called without holding an AioContext lock. | ||
45 | + * | ||
46 | * TODO: Remove @filename and @flags; it should be possible to specify a whole | ||
47 | * BDS tree just by specifying the @options QDict (or @reference, | ||
48 | * alternatively). At the time of adding this function, this is not possible, | ||
49 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference, | ||
50 | { | 19 | { |
51 | BlockBackend *blk; | 20 | BlockBackend *blk; |
52 | BlockDriverState *bs; | 21 | BlockBackend *local_blk = NULL; |
53 | + AioContext *ctx; | 22 | - AioContext *aio_context; |
54 | uint64_t perm = 0; | 23 | const char* device = qdict_get_str(qdict, "device"); |
55 | uint64_t shared = BLK_PERM_ALL; | 24 | const char* command = qdict_get_str(qdict, "command"); |
56 | 25 | Error *err = NULL; | |
57 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference, | 26 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
58 | shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; | 27 | } |
59 | } | 28 | } |
60 | 29 | ||
61 | - blk = blk_new(qemu_get_aio_context(), perm, shared); | 30 | - aio_context = blk_get_aio_context(blk); |
62 | aio_context_acquire(qemu_get_aio_context()); | 31 | - aio_context_acquire(aio_context); |
63 | bs = bdrv_open(filename, reference, options, flags, errp); | 32 | - |
64 | aio_context_release(qemu_get_aio_context()); | 33 | /* |
65 | if (!bs) { | 34 | * Notably absent: Proper permission management. This is sad, but it seems |
66 | - blk_unref(blk); | 35 | * almost impossible to achieve without changing the semantics and thereby |
67 | return NULL; | 36 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
68 | } | 37 | */ |
69 | 38 | qemuio_command(blk, command); | |
70 | - blk->root = bdrv_root_attach_child(bs, "root", &child_root, | 39 | |
71 | - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, | 40 | - aio_context_release(aio_context); |
72 | - perm, shared, blk, errp); | 41 | - |
73 | + /* bdrv_open() could have moved bs to a different AioContext */ | 42 | fail: |
74 | + ctx = bdrv_get_aio_context(bs); | 43 | blk_unref(local_blk); |
75 | + blk = blk_new(bdrv_get_aio_context(bs), perm, shared); | 44 | hmp_handle_error(mon, &err); |
76 | + blk->perm = perm; | ||
77 | + blk->shared_perm = shared; | ||
78 | + | ||
79 | + aio_context_acquire(ctx); | ||
80 | + blk_insert_bs(blk, bs, errp); | ||
81 | + bdrv_unref(bs); | ||
82 | + aio_context_release(ctx); | ||
83 | + | ||
84 | if (!blk->root) { | ||
85 | blk_unref(blk); | ||
86 | return NULL; | ||
87 | @@ -XXX,XX +XXX,XX @@ void blk_remove_bs(BlockBackend *blk) | ||
88 | |||
89 | /* | ||
90 | * Associates a new BlockDriverState with @blk. | ||
91 | + * | ||
92 | + * Callers must hold the AioContext lock of @bs. | ||
93 | */ | ||
94 | int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) | ||
95 | { | ||
96 | -- | 45 | -- |
97 | 2.40.1 | 46 | 2.13.6 |
47 | |||
48 | diff view generated by jsdifflib |
1 | qcow2_open() doesn't work correctly when opening the 'file' child moves | 1 | From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
---|---|---|---|
2 | bs to an iothread, for several reasons: | ||
3 | 2 | ||
4 | - It uses BDRV_POLL_WHILE() to wait for the qcow2_open_entry() | 3 | Since bdrv_co_preadv does all neccessary checks including |
5 | coroutine, which involves dropping the AioContext lock for bs when it | 4 | reading after the end of the backing file, avoid duplication |
6 | is not in the main context - but we don't hold it, so this crashes. | 5 | of verification before bdrv_co_preadv call. |
7 | 6 | ||
8 | - It runs the qcow2_open_entry() coroutine in the current thread instead | 7 | Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
9 | of the new AioContext of bs. | 8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
10 | 9 | Reviewed-by: Eric Blake <eblake@redhat.com> | |
11 | - qcow2_open_entry() doesn't notify the main loop when it's done. | ||
12 | |||
13 | This patches fixes these issues around delegating work to a coroutine. | ||
14 | Temporarily dropping the main AioContext lock is not necessary because | ||
15 | we know we run in the main thread. | ||
16 | |||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | Message-Id: <20230525124713.401149-7-kwolf@redhat.com> | ||
19 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
21 | --- | 11 | --- |
22 | block.c | 6 ++++++ | 12 | block/qcow2.h | 3 --- |
23 | block/qcow2.c | 8 ++++++-- | 13 | block/qcow2.c | 51 ++++++++------------------------------------------- |
24 | 2 files changed, 12 insertions(+), 2 deletions(-) | 14 | 2 files changed, 8 insertions(+), 46 deletions(-) |
25 | 15 | ||
26 | diff --git a/block.c b/block.c | 16 | diff --git a/block/qcow2.h b/block/qcow2.h |
27 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block.c | 18 | --- a/block/qcow2.h |
29 | +++ b/block.c | 19 | +++ b/block/qcow2.h |
30 | @@ -XXX,XX +XXX,XX @@ done: | 20 | @@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) |
31 | * BlockdevRef. | 21 | } |
32 | * | 22 | |
33 | * The BlockdevRef will be removed from the options QDict. | 23 | /* qcow2.c functions */ |
34 | + * | 24 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
35 | + * @parent can move to a different AioContext in this function. Callers must | 25 | - int64_t sector_num, int nb_sectors); |
36 | + * make sure that their AioContext locking is still correct after this. | 26 | - |
37 | */ | 27 | int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, |
38 | BdrvChild *bdrv_open_child(const char *filename, | 28 | int refcount_order, bool generous_increase, |
39 | QDict *options, const char *bdref_key, | 29 | uint64_t *refblock_count); |
40 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_open_child(const char *filename, | ||
41 | |||
42 | /* | ||
43 | * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. | ||
44 | + * | ||
45 | + * @parent can move to a different AioContext in this function. Callers must | ||
46 | + * make sure that their AioContext locking is still correct after this. | ||
47 | */ | ||
48 | int bdrv_open_file_child(const char *filename, | ||
49 | QDict *options, const char *bdref_key, | ||
50 | diff --git a/block/qcow2.c b/block/qcow2.c | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
51 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/block/qcow2.c | 32 | --- a/block/qcow2.c |
53 | +++ b/block/qcow2.c | 33 | +++ b/block/qcow2.c |
54 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn qcow2_open_entry(void *opaque) | 34 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, |
55 | qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true, | 35 | return status; |
56 | qoc->errp); | ||
57 | qemu_co_mutex_unlock(&s->lock); | ||
58 | + | ||
59 | + aio_wait_kick(); | ||
60 | } | 36 | } |
61 | 37 | ||
62 | static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, | 38 | -/* handle reading after the end of the backing file */ |
63 | @@ -XXX,XX +XXX,XX @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, | 39 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
64 | 40 | - int64_t offset, int bytes) | |
65 | assert(!qemu_in_coroutine()); | 41 | -{ |
66 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | 42 | - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; |
67 | - qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); | 43 | - int n1; |
68 | - BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); | 44 | - |
69 | + | 45 | - if ((offset + bytes) <= bs_size) { |
70 | + aio_co_enter(bdrv_get_aio_context(bs), | 46 | - return bytes; |
71 | + qemu_coroutine_create(qcow2_open_entry, &qoc)); | 47 | - } |
72 | + AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS); | 48 | - |
73 | 49 | - if (offset >= bs_size) { | |
74 | return qoc.ret; | 50 | - n1 = 0; |
75 | } | 51 | - } else { |
52 | - n1 = bs_size - offset; | ||
53 | - } | ||
54 | - | ||
55 | - qemu_iovec_memset(qiov, n1, 0, bytes - n1); | ||
56 | - | ||
57 | - return n1; | ||
58 | -} | ||
59 | - | ||
60 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
61 | uint64_t bytes, QEMUIOVector *qiov, | ||
62 | int flags) | ||
63 | { | ||
64 | BDRVQcow2State *s = bs->opaque; | ||
65 | - int offset_in_cluster, n1; | ||
66 | + int offset_in_cluster; | ||
67 | int ret; | ||
68 | unsigned int cur_bytes; /* number of bytes in current iteration */ | ||
69 | uint64_t cluster_offset = 0; | ||
70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
71 | case QCOW2_CLUSTER_UNALLOCATED: | ||
72 | |||
73 | if (bs->backing) { | ||
74 | - /* read from the base image */ | ||
75 | - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, | ||
76 | - offset, cur_bytes); | ||
77 | - if (n1 > 0) { | ||
78 | - QEMUIOVector local_qiov; | ||
79 | - | ||
80 | - qemu_iovec_init(&local_qiov, hd_qiov.niov); | ||
81 | - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); | ||
82 | - | ||
83 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
84 | - qemu_co_mutex_unlock(&s->lock); | ||
85 | - ret = bdrv_co_preadv(bs->backing, offset, n1, | ||
86 | - &local_qiov, 0); | ||
87 | - qemu_co_mutex_lock(&s->lock); | ||
88 | - | ||
89 | - qemu_iovec_destroy(&local_qiov); | ||
90 | - | ||
91 | - if (ret < 0) { | ||
92 | - goto fail; | ||
93 | - } | ||
94 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
95 | + qemu_co_mutex_unlock(&s->lock); | ||
96 | + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, | ||
97 | + &hd_qiov, 0); | ||
98 | + qemu_co_mutex_lock(&s->lock); | ||
99 | + if (ret < 0) { | ||
100 | + goto fail; | ||
101 | } | ||
102 | } else { | ||
103 | /* Note: in this case, no need to wait */ | ||
76 | -- | 104 | -- |
77 | 2.40.1 | 105 | 2.13.6 |
106 | |||
107 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Removing a quorum child node with x-blockdev-change results in a quorum | ||
2 | driver state that cannot be recreated with create options because it | ||
3 | would require a list with gaps. This causes trouble in at least | ||
4 | .bdrv_refresh_filename(). | ||
1 | 5 | ||
6 | Document this problem so that we won't accidentally mark the command | ||
7 | stable without having addressed it. | ||
8 | |||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
11 | --- | ||
12 | qapi/block-core.json | 4 ++++ | ||
13 | 1 file changed, 4 insertions(+) | ||
14 | |||
15 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qapi/block-core.json | ||
18 | +++ b/qapi/block-core.json | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | # does not support all kinds of operations, all kinds of children, nor | ||
21 | # all block drivers. | ||
22 | # | ||
23 | +# FIXME Removing children from a quorum node means introducing gaps in the | ||
24 | +# child indices. This cannot be represented in the 'children' list of | ||
25 | +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). | ||
26 | +# | ||
27 | # Warning: The data in a new quorum child MUST be consistent with that of | ||
28 | # the rest of the array. | ||
29 | # | ||
30 | -- | ||
31 | 2.13.6 | ||
32 | |||
33 | diff view generated by jsdifflib |
1 | It has no internal callers, so its only use is being called from | 1 | From: Doug Gale <doug16k@gmail.com> |
---|---|---|---|
2 | individual test cases. If the name starts with an underscore, it is | ||
3 | considered private and linters warn against calling it. 256 only gets | ||
4 | away with it currently because it's on the exception list for linters. | ||
5 | 2 | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Add trace output for commands, errors, and undefined behavior. |
7 | Message-Id: <20230525124713.401149-12-kwolf@redhat.com> | 4 | Add guest error log output for undefined behavior. |
5 | Report invalid undefined accesses to MMIO. | ||
6 | Annotate unlikely error checks with unlikely. | ||
7 | |||
8 | Signed-off-by: Doug Gale <doug16k@gmail.com> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 12 | --- |
11 | tests/qemu-iotests/iotests.py | 2 +- | 13 | hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++-------- |
12 | tests/qemu-iotests/256 | 2 +- | 14 | hw/block/trace-events | 93 ++++++++++++++ |
13 | 2 files changed, 2 insertions(+), 2 deletions(-) | 15 | 2 files changed, 390 insertions(+), 52 deletions(-) |
14 | 16 | ||
15 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py | 17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/qemu-iotests/iotests.py | 19 | --- a/hw/block/nvme.c |
18 | +++ b/tests/qemu-iotests/iotests.py | 20 | +++ b/hw/block/nvme.c |
19 | @@ -XXX,XX +XXX,XX @@ def _verify_virtio_blk() -> None: | 21 | @@ -XXX,XX +XXX,XX @@ |
20 | if 'virtio-blk' not in out: | 22 | #include "qapi/visitor.h" |
21 | notrun('Missing virtio-blk in QEMU binary') | 23 | #include "sysemu/block-backend.h" |
22 | 24 | ||
23 | -def _verify_virtio_scsi_pci_or_ccw() -> None: | 25 | +#include "qemu/log.h" |
24 | +def verify_virtio_scsi_pci_or_ccw() -> None: | 26 | +#include "trace.h" |
25 | out = qemu_pipe('-M', 'none', '-device', 'help') | 27 | #include "nvme.h" |
26 | if 'virtio-scsi-pci' not in out and 'virtio-scsi-ccw' not in out: | 28 | |
27 | notrun('Missing virtio-scsi-pci or virtio-scsi-ccw in QEMU binary') | 29 | +#define NVME_GUEST_ERR(trace, fmt, ...) \ |
28 | diff --git a/tests/qemu-iotests/256 b/tests/qemu-iotests/256 | 30 | + do { \ |
29 | index XXXXXXX..XXXXXXX 100755 | 31 | + (trace_##trace)(__VA_ARGS__); \ |
30 | --- a/tests/qemu-iotests/256 | 32 | + qemu_log_mask(LOG_GUEST_ERROR, #trace \ |
31 | +++ b/tests/qemu-iotests/256 | 33 | + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ |
32 | @@ -XXX,XX +XXX,XX @@ import os | 34 | + } while (0) |
33 | import iotests | 35 | + |
34 | from iotests import log | 36 | static void nvme_process_sq(void *opaque); |
35 | 37 | ||
36 | -iotests._verify_virtio_scsi_pci_or_ccw() | 38 | static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) |
37 | +iotests.verify_virtio_scsi_pci_or_ccw() | 39 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) |
38 | 40 | { | |
39 | iotests.script_initialize(supported_fmts=['qcow2']) | 41 | if (cq->irq_enabled) { |
40 | size = 64 * 1024 * 1024 | 42 | if (msix_enabled(&(n->parent_obj))) { |
43 | + trace_nvme_irq_msix(cq->vector); | ||
44 | msix_notify(&(n->parent_obj), cq->vector); | ||
45 | } else { | ||
46 | + trace_nvme_irq_pin(); | ||
47 | pci_irq_pulse(&n->parent_obj); | ||
48 | } | ||
49 | + } else { | ||
50 | + trace_nvme_irq_masked(); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
55 | trans_len = MIN(len, trans_len); | ||
56 | int num_prps = (len >> n->page_bits) + 1; | ||
57 | |||
58 | - if (!prp1) { | ||
59 | + if (unlikely(!prp1)) { | ||
60 | + trace_nvme_err_invalid_prp(); | ||
61 | return NVME_INVALID_FIELD | NVME_DNR; | ||
62 | } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && | ||
63 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
65 | } | ||
66 | len -= trans_len; | ||
67 | if (len) { | ||
68 | - if (!prp2) { | ||
69 | + if (unlikely(!prp2)) { | ||
70 | + trace_nvme_err_invalid_prp2_missing(); | ||
71 | goto unmap; | ||
72 | } | ||
73 | if (len > n->page_size) { | ||
74 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); | ||
76 | |||
77 | if (i == n->max_prp_ents - 1 && len > n->page_size) { | ||
78 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
79 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
80 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
81 | goto unmap; | ||
82 | } | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
85 | prp_ent = le64_to_cpu(prp_list[i]); | ||
86 | } | ||
87 | |||
88 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
89 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
90 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
91 | goto unmap; | ||
92 | } | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
95 | i++; | ||
96 | } | ||
97 | } else { | ||
98 | - if (prp2 & (n->page_size - 1)) { | ||
99 | + if (unlikely(prp2 & (n->page_size - 1))) { | ||
100 | + trace_nvme_err_invalid_prp2_align(prp2); | ||
101 | goto unmap; | ||
102 | } | ||
103 | if (qsg->nsg) { | ||
104 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, | ||
105 | QEMUIOVector iov; | ||
106 | uint16_t status = NVME_SUCCESS; | ||
107 | |||
108 | + trace_nvme_dma_read(prp1, prp2); | ||
109 | + | ||
110 | if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { | ||
111 | return NVME_INVALID_FIELD | NVME_DNR; | ||
112 | } | ||
113 | if (qsg.nsg > 0) { | ||
114 | - if (dma_buf_read(ptr, len, &qsg)) { | ||
115 | + if (unlikely(dma_buf_read(ptr, len, &qsg))) { | ||
116 | + trace_nvme_err_invalid_dma(); | ||
117 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
118 | } | ||
119 | qemu_sglist_destroy(&qsg); | ||
120 | } else { | ||
121 | - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { | ||
122 | + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { | ||
123 | + trace_nvme_err_invalid_dma(); | ||
124 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
125 | } | ||
126 | qemu_iovec_destroy(&iov); | ||
127 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
128 | uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); | ||
129 | uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); | ||
130 | |||
131 | - if (slba + nlb > ns->id_ns.nsze) { | ||
132 | + if (unlikely(slba + nlb > ns->id_ns.nsze)) { | ||
133 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
134 | return NVME_LBA_RANGE | NVME_DNR; | ||
135 | } | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
138 | int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; | ||
139 | enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; | ||
140 | |||
141 | - if ((slba + nlb) > ns->id_ns.nsze) { | ||
142 | + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); | ||
143 | + | ||
144 | + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { | ||
145 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); | ||
146 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
147 | return NVME_LBA_RANGE | NVME_DNR; | ||
148 | } | ||
149 | |||
150 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
151 | NvmeNamespace *ns; | ||
152 | uint32_t nsid = le32_to_cpu(cmd->nsid); | ||
153 | |||
154 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
155 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
156 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
157 | return NVME_INVALID_NSID | NVME_DNR; | ||
158 | } | ||
159 | |||
160 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
161 | case NVME_CMD_READ: | ||
162 | return nvme_rw(n, ns, cmd, req); | ||
163 | default: | ||
164 | + trace_nvme_err_invalid_opc(cmd->opcode); | ||
165 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
166 | } | ||
167 | } | ||
168 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
169 | NvmeCQueue *cq; | ||
170 | uint16_t qid = le16_to_cpu(c->qid); | ||
171 | |||
172 | - if (!qid || nvme_check_sqid(n, qid)) { | ||
173 | + if (unlikely(!qid || nvme_check_sqid(n, qid))) { | ||
174 | + trace_nvme_err_invalid_del_sq(qid); | ||
175 | return NVME_INVALID_QID | NVME_DNR; | ||
176 | } | ||
177 | |||
178 | + trace_nvme_del_sq(qid); | ||
179 | + | ||
180 | sq = n->sq[qid]; | ||
181 | while (!QTAILQ_EMPTY(&sq->out_req_list)) { | ||
182 | req = QTAILQ_FIRST(&sq->out_req_list); | ||
183 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
184 | uint16_t qflags = le16_to_cpu(c->sq_flags); | ||
185 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
186 | |||
187 | - if (!cqid || nvme_check_cqid(n, cqid)) { | ||
188 | + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); | ||
189 | + | ||
190 | + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { | ||
191 | + trace_nvme_err_invalid_create_sq_cqid(cqid); | ||
192 | return NVME_INVALID_CQID | NVME_DNR; | ||
193 | } | ||
194 | - if (!sqid || !nvme_check_sqid(n, sqid)) { | ||
195 | + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { | ||
196 | + trace_nvme_err_invalid_create_sq_sqid(sqid); | ||
197 | return NVME_INVALID_QID | NVME_DNR; | ||
198 | } | ||
199 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
200 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
201 | + trace_nvme_err_invalid_create_sq_size(qsize); | ||
202 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
203 | } | ||
204 | - if (!prp1 || prp1 & (n->page_size - 1)) { | ||
205 | + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { | ||
206 | + trace_nvme_err_invalid_create_sq_addr(prp1); | ||
207 | return NVME_INVALID_FIELD | NVME_DNR; | ||
208 | } | ||
209 | - if (!(NVME_SQ_FLAGS_PC(qflags))) { | ||
210 | + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { | ||
211 | + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); | ||
212 | return NVME_INVALID_FIELD | NVME_DNR; | ||
213 | } | ||
214 | sq = g_malloc0(sizeof(*sq)); | ||
215 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
216 | NvmeCQueue *cq; | ||
217 | uint16_t qid = le16_to_cpu(c->qid); | ||
218 | |||
219 | - if (!qid || nvme_check_cqid(n, qid)) { | ||
220 | + if (unlikely(!qid || nvme_check_cqid(n, qid))) { | ||
221 | + trace_nvme_err_invalid_del_cq_cqid(qid); | ||
222 | return NVME_INVALID_CQID | NVME_DNR; | ||
223 | } | ||
224 | |||
225 | cq = n->cq[qid]; | ||
226 | - if (!QTAILQ_EMPTY(&cq->sq_list)) { | ||
227 | + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { | ||
228 | + trace_nvme_err_invalid_del_cq_notempty(qid); | ||
229 | return NVME_INVALID_QUEUE_DEL; | ||
230 | } | ||
231 | + trace_nvme_del_cq(qid); | ||
232 | nvme_free_cq(cq, n); | ||
233 | return NVME_SUCCESS; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
236 | uint16_t qflags = le16_to_cpu(c->cq_flags); | ||
237 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
238 | |||
239 | - if (!cqid || !nvme_check_cqid(n, cqid)) { | ||
240 | + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, | ||
241 | + NVME_CQ_FLAGS_IEN(qflags) != 0); | ||
242 | + | ||
243 | + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { | ||
244 | + trace_nvme_err_invalid_create_cq_cqid(cqid); | ||
245 | return NVME_INVALID_CQID | NVME_DNR; | ||
246 | } | ||
247 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
248 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
249 | + trace_nvme_err_invalid_create_cq_size(qsize); | ||
250 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
251 | } | ||
252 | - if (!prp1) { | ||
253 | + if (unlikely(!prp1)) { | ||
254 | + trace_nvme_err_invalid_create_cq_addr(prp1); | ||
255 | return NVME_INVALID_FIELD | NVME_DNR; | ||
256 | } | ||
257 | - if (vector > n->num_queues) { | ||
258 | + if (unlikely(vector > n->num_queues)) { | ||
259 | + trace_nvme_err_invalid_create_cq_vector(vector); | ||
260 | return NVME_INVALID_IRQ_VECTOR | NVME_DNR; | ||
261 | } | ||
262 | - if (!(NVME_CQ_FLAGS_PC(qflags))) { | ||
263 | + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { | ||
264 | + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); | ||
265 | return NVME_INVALID_FIELD | NVME_DNR; | ||
266 | } | ||
267 | |||
268 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) | ||
269 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
270 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
271 | |||
272 | + trace_nvme_identify_ctrl(); | ||
273 | + | ||
274 | return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), | ||
275 | prp1, prp2); | ||
276 | } | ||
277 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) | ||
278 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
279 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
280 | |||
281 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
282 | + trace_nvme_identify_ns(nsid); | ||
283 | + | ||
284 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
285 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
286 | return NVME_INVALID_NSID | NVME_DNR; | ||
287 | } | ||
288 | |||
289 | ns = &n->namespaces[nsid - 1]; | ||
290 | + | ||
291 | return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), | ||
292 | prp1, prp2); | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) | ||
295 | uint16_t ret; | ||
296 | int i, j = 0; | ||
297 | |||
298 | + trace_nvme_identify_nslist(min_nsid); | ||
299 | + | ||
300 | list = g_malloc0(data_len); | ||
301 | for (i = 0; i < n->num_namespaces; i++) { | ||
302 | if (i < min_nsid) { | ||
303 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) | ||
304 | case 0x02: | ||
305 | return nvme_identify_nslist(n, c); | ||
306 | default: | ||
307 | + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); | ||
308 | return NVME_INVALID_FIELD | NVME_DNR; | ||
309 | } | ||
310 | } | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
312 | switch (dw10) { | ||
313 | case NVME_VOLATILE_WRITE_CACHE: | ||
314 | result = blk_enable_write_cache(n->conf.blk); | ||
315 | + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); | ||
316 | break; | ||
317 | case NVME_NUMBER_OF_QUEUES: | ||
318 | result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
319 | + trace_nvme_getfeat_numq(result); | ||
320 | break; | ||
321 | default: | ||
322 | + trace_nvme_err_invalid_getfeat(dw10); | ||
323 | return NVME_INVALID_FIELD | NVME_DNR; | ||
324 | } | ||
325 | |||
326 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
327 | blk_set_enable_write_cache(n->conf.blk, dw11 & 1); | ||
328 | break; | ||
329 | case NVME_NUMBER_OF_QUEUES: | ||
330 | + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, | ||
331 | + ((dw11 >> 16) & 0xFFFF) + 1, | ||
332 | + n->num_queues - 1, n->num_queues - 1); | ||
333 | req->cqe.result = | ||
334 | cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
335 | break; | ||
336 | default: | ||
337 | + trace_nvme_err_invalid_setfeat(dw10); | ||
338 | return NVME_INVALID_FIELD | NVME_DNR; | ||
339 | } | ||
340 | return NVME_SUCCESS; | ||
341 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
342 | case NVME_ADM_CMD_GET_FEATURES: | ||
343 | return nvme_get_feature(n, cmd, req); | ||
344 | default: | ||
345 | + trace_nvme_err_invalid_admin_opc(cmd->opcode); | ||
346 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
347 | } | ||
348 | } | ||
349 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
350 | uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; | ||
351 | uint32_t page_size = 1 << page_bits; | ||
352 | |||
353 | - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || | ||
354 | - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || | ||
355 | - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || | ||
356 | - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || | ||
357 | - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || | ||
358 | - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || | ||
359 | - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || | ||
360 | - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || | ||
361 | - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { | ||
362 | + if (unlikely(n->cq[0])) { | ||
363 | + trace_nvme_err_startfail_cq(); | ||
364 | + return -1; | ||
365 | + } | ||
366 | + if (unlikely(n->sq[0])) { | ||
367 | + trace_nvme_err_startfail_sq(); | ||
368 | + return -1; | ||
369 | + } | ||
370 | + if (unlikely(!n->bar.asq)) { | ||
371 | + trace_nvme_err_startfail_nbarasq(); | ||
372 | + return -1; | ||
373 | + } | ||
374 | + if (unlikely(!n->bar.acq)) { | ||
375 | + trace_nvme_err_startfail_nbaracq(); | ||
376 | + return -1; | ||
377 | + } | ||
378 | + if (unlikely(n->bar.asq & (page_size - 1))) { | ||
379 | + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); | ||
380 | + return -1; | ||
381 | + } | ||
382 | + if (unlikely(n->bar.acq & (page_size - 1))) { | ||
383 | + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); | ||
384 | + return -1; | ||
385 | + } | ||
386 | + if (unlikely(NVME_CC_MPS(n->bar.cc) < | ||
387 | + NVME_CAP_MPSMIN(n->bar.cap))) { | ||
388 | + trace_nvme_err_startfail_page_too_small( | ||
389 | + NVME_CC_MPS(n->bar.cc), | ||
390 | + NVME_CAP_MPSMIN(n->bar.cap)); | ||
391 | + return -1; | ||
392 | + } | ||
393 | + if (unlikely(NVME_CC_MPS(n->bar.cc) > | ||
394 | + NVME_CAP_MPSMAX(n->bar.cap))) { | ||
395 | + trace_nvme_err_startfail_page_too_large( | ||
396 | + NVME_CC_MPS(n->bar.cc), | ||
397 | + NVME_CAP_MPSMAX(n->bar.cap)); | ||
398 | + return -1; | ||
399 | + } | ||
400 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < | ||
401 | + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { | ||
402 | + trace_nvme_err_startfail_cqent_too_small( | ||
403 | + NVME_CC_IOCQES(n->bar.cc), | ||
404 | + NVME_CTRL_CQES_MIN(n->bar.cap)); | ||
405 | + return -1; | ||
406 | + } | ||
407 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > | ||
408 | + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { | ||
409 | + trace_nvme_err_startfail_cqent_too_large( | ||
410 | + NVME_CC_IOCQES(n->bar.cc), | ||
411 | + NVME_CTRL_CQES_MAX(n->bar.cap)); | ||
412 | + return -1; | ||
413 | + } | ||
414 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < | ||
415 | + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { | ||
416 | + trace_nvme_err_startfail_sqent_too_small( | ||
417 | + NVME_CC_IOSQES(n->bar.cc), | ||
418 | + NVME_CTRL_SQES_MIN(n->bar.cap)); | ||
419 | + return -1; | ||
420 | + } | ||
421 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > | ||
422 | + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { | ||
423 | + trace_nvme_err_startfail_sqent_too_large( | ||
424 | + NVME_CC_IOSQES(n->bar.cc), | ||
425 | + NVME_CTRL_SQES_MAX(n->bar.cap)); | ||
426 | + return -1; | ||
427 | + } | ||
428 | + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { | ||
429 | + trace_nvme_err_startfail_asqent_sz_zero(); | ||
430 | + return -1; | ||
431 | + } | ||
432 | + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { | ||
433 | + trace_nvme_err_startfail_acqent_sz_zero(); | ||
434 | return -1; | ||
435 | } | ||
436 | |||
437 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
438 | static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
439 | unsigned size) | ||
440 | { | ||
441 | + if (unlikely(offset & (sizeof(uint32_t) - 1))) { | ||
442 | + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, | ||
443 | + "MMIO write not 32-bit aligned," | ||
444 | + " offset=0x%"PRIx64"", offset); | ||
445 | + /* should be ignored, fall through for now */ | ||
446 | + } | ||
447 | + | ||
448 | + if (unlikely(size < sizeof(uint32_t))) { | ||
449 | + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, | ||
450 | + "MMIO write smaller than 32-bits," | ||
451 | + " offset=0x%"PRIx64", size=%u", | ||
452 | + offset, size); | ||
453 | + /* should be ignored, fall through for now */ | ||
454 | + } | ||
455 | + | ||
456 | switch (offset) { | ||
457 | - case 0xc: | ||
458 | + case 0xc: /* INTMS */ | ||
459 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
460 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
461 | + "undefined access to interrupt mask set" | ||
462 | + " when MSI-X is enabled"); | ||
463 | + /* should be ignored, fall through for now */ | ||
464 | + } | ||
465 | n->bar.intms |= data & 0xffffffff; | ||
466 | n->bar.intmc = n->bar.intms; | ||
467 | + trace_nvme_mmio_intm_set(data & 0xffffffff, | ||
468 | + n->bar.intmc); | ||
469 | break; | ||
470 | - case 0x10: | ||
471 | + case 0x10: /* INTMC */ | ||
472 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
473 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
474 | + "undefined access to interrupt mask clr" | ||
475 | + " when MSI-X is enabled"); | ||
476 | + /* should be ignored, fall through for now */ | ||
477 | + } | ||
478 | n->bar.intms &= ~(data & 0xffffffff); | ||
479 | n->bar.intmc = n->bar.intms; | ||
480 | + trace_nvme_mmio_intm_clr(data & 0xffffffff, | ||
481 | + n->bar.intmc); | ||
482 | break; | ||
483 | - case 0x14: | ||
484 | + case 0x14: /* CC */ | ||
485 | + trace_nvme_mmio_cfg(data & 0xffffffff); | ||
486 | /* Windows first sends data, then sends enable bit */ | ||
487 | if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && | ||
488 | !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
490 | |||
491 | if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { | ||
492 | n->bar.cc = data; | ||
493 | - if (nvme_start_ctrl(n)) { | ||
494 | + if (unlikely(nvme_start_ctrl(n))) { | ||
495 | + trace_nvme_err_startfail(); | ||
496 | n->bar.csts = NVME_CSTS_FAILED; | ||
497 | } else { | ||
498 | + trace_nvme_mmio_start_success(); | ||
499 | n->bar.csts = NVME_CSTS_READY; | ||
500 | } | ||
501 | } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { | ||
502 | + trace_nvme_mmio_stopped(); | ||
503 | nvme_clear_ctrl(n); | ||
504 | n->bar.csts &= ~NVME_CSTS_READY; | ||
505 | } | ||
506 | if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { | ||
507 | - nvme_clear_ctrl(n); | ||
508 | - n->bar.cc = data; | ||
509 | - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
510 | + trace_nvme_mmio_shutdown_set(); | ||
511 | + nvme_clear_ctrl(n); | ||
512 | + n->bar.cc = data; | ||
513 | + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
514 | } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { | ||
515 | - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
516 | - n->bar.cc = data; | ||
517 | + trace_nvme_mmio_shutdown_cleared(); | ||
518 | + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
519 | + n->bar.cc = data; | ||
520 | + } | ||
521 | + break; | ||
522 | + case 0x1C: /* CSTS */ | ||
523 | + if (data & (1 << 4)) { | ||
524 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, | ||
525 | + "attempted to W1C CSTS.NSSRO" | ||
526 | + " but CAP.NSSRS is zero (not supported)"); | ||
527 | + } else if (data != 0) { | ||
528 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, | ||
529 | + "attempted to set a read only bit" | ||
530 | + " of controller status"); | ||
531 | + } | ||
532 | + break; | ||
533 | + case 0x20: /* NSSR */ | ||
534 | + if (data == 0x4E564D65) { | ||
535 | + trace_nvme_ub_mmiowr_ssreset_unsupported(); | ||
536 | + } else { | ||
537 | + /* The spec says that writes of other values have no effect */ | ||
538 | + return; | ||
539 | } | ||
540 | break; | ||
541 | - case 0x24: | ||
542 | + case 0x24: /* AQA */ | ||
543 | n->bar.aqa = data & 0xffffffff; | ||
544 | + trace_nvme_mmio_aqattr(data & 0xffffffff); | ||
545 | break; | ||
546 | - case 0x28: | ||
547 | + case 0x28: /* ASQ */ | ||
548 | n->bar.asq = data; | ||
549 | + trace_nvme_mmio_asqaddr(data); | ||
550 | break; | ||
551 | - case 0x2c: | ||
552 | + case 0x2c: /* ASQ hi */ | ||
553 | n->bar.asq |= data << 32; | ||
554 | + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); | ||
555 | break; | ||
556 | - case 0x30: | ||
557 | + case 0x30: /* ACQ */ | ||
558 | + trace_nvme_mmio_acqaddr(data); | ||
559 | n->bar.acq = data; | ||
560 | break; | ||
561 | - case 0x34: | ||
562 | + case 0x34: /* ACQ hi */ | ||
563 | n->bar.acq |= data << 32; | ||
564 | + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); | ||
565 | break; | ||
566 | + case 0x38: /* CMBLOC */ | ||
567 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, | ||
568 | + "invalid write to reserved CMBLOC" | ||
569 | + " when CMBSZ is zero, ignored"); | ||
570 | + return; | ||
571 | + case 0x3C: /* CMBSZ */ | ||
572 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
573 | + "invalid write to read only CMBSZ, ignored"); | ||
574 | + return; | ||
575 | default: | ||
576 | + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
577 | + "invalid MMIO write," | ||
578 | + " offset=0x%"PRIx64", data=%"PRIx64"", | ||
579 | + offset, data); | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
584 | uint8_t *ptr = (uint8_t *)&n->bar; | ||
585 | uint64_t val = 0; | ||
586 | |||
587 | + if (unlikely(addr & (sizeof(uint32_t) - 1))) { | ||
588 | + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, | ||
589 | + "MMIO read not 32-bit aligned," | ||
590 | + " offset=0x%"PRIx64"", addr); | ||
591 | + /* should RAZ, fall through for now */ | ||
592 | + } else if (unlikely(size < sizeof(uint32_t))) { | ||
593 | + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, | ||
594 | + "MMIO read smaller than 32-bits," | ||
595 | + " offset=0x%"PRIx64"", addr); | ||
596 | + /* should RAZ, fall through for now */ | ||
597 | + } | ||
598 | + | ||
599 | if (addr < sizeof(n->bar)) { | ||
600 | memcpy(&val, ptr + addr, size); | ||
601 | + } else { | ||
602 | + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
603 | + "MMIO read beyond last register," | ||
604 | + " offset=0x%"PRIx64", returning 0", addr); | ||
605 | } | ||
606 | + | ||
607 | return val; | ||
608 | } | ||
609 | |||
610 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
611 | { | ||
612 | uint32_t qid; | ||
613 | |||
614 | - if (addr & ((1 << 2) - 1)) { | ||
615 | + if (unlikely(addr & ((1 << 2) - 1))) { | ||
616 | + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, | ||
617 | + "doorbell write not 32-bit aligned," | ||
618 | + " offset=0x%"PRIx64", ignoring", addr); | ||
619 | return; | ||
620 | } | ||
621 | |||
622 | if (((addr - 0x1000) >> 2) & 1) { | ||
623 | + /* Completion queue doorbell write */ | ||
624 | + | ||
625 | uint16_t new_head = val & 0xffff; | ||
626 | int start_sqs; | ||
627 | NvmeCQueue *cq; | ||
628 | |||
629 | qid = (addr - (0x1000 + (1 << 2))) >> 3; | ||
630 | - if (nvme_check_cqid(n, qid)) { | ||
631 | + if (unlikely(nvme_check_cqid(n, qid))) { | ||
632 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, | ||
633 | + "completion queue doorbell write" | ||
634 | + " for nonexistent queue," | ||
635 | + " sqid=%"PRIu32", ignoring", qid); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | cq = n->cq[qid]; | ||
640 | - if (new_head >= cq->size) { | ||
641 | + if (unlikely(new_head >= cq->size)) { | ||
642 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, | ||
643 | + "completion queue doorbell write value" | ||
644 | + " beyond queue size, sqid=%"PRIu32"," | ||
645 | + " new_head=%"PRIu16", ignoring", | ||
646 | + qid, new_head); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
651 | nvme_isr_notify(n, cq); | ||
652 | } | ||
653 | } else { | ||
654 | + /* Submission queue doorbell write */ | ||
655 | + | ||
656 | uint16_t new_tail = val & 0xffff; | ||
657 | NvmeSQueue *sq; | ||
658 | |||
659 | qid = (addr - 0x1000) >> 3; | ||
660 | - if (nvme_check_sqid(n, qid)) { | ||
661 | + if (unlikely(nvme_check_sqid(n, qid))) { | ||
662 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, | ||
663 | + "submission queue doorbell write" | ||
664 | + " for nonexistent queue," | ||
665 | + " sqid=%"PRIu32", ignoring", qid); | ||
666 | return; | ||
667 | } | ||
668 | |||
669 | sq = n->sq[qid]; | ||
670 | - if (new_tail >= sq->size) { | ||
671 | + if (unlikely(new_tail >= sq->size)) { | ||
672 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, | ||
673 | + "submission queue doorbell write value" | ||
674 | + " beyond queue size, sqid=%"PRIu32"," | ||
675 | + " new_tail=%"PRIu16", ignoring", | ||
676 | + qid, new_tail); | ||
677 | return; | ||
678 | } | ||
679 | |||
680 | diff --git a/hw/block/trace-events b/hw/block/trace-events | ||
681 | index XXXXXXX..XXXXXXX 100644 | ||
682 | --- a/hw/block/trace-events | ||
683 | +++ b/hw/block/trace-events | ||
684 | @@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 | ||
685 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" | ||
686 | hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" | ||
687 | |||
688 | +# hw/block/nvme.c | ||
689 | +# nvme traces for successful events | ||
690 | +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" | ||
691 | +nvme_irq_pin(void) "pulsing IRQ pin" | ||
692 | +nvme_irq_masked(void) "IRQ is masked" | ||
693 | +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" | ||
694 | +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" | ||
695 | +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" | ||
696 | +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" | ||
697 | +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" | ||
698 | +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" | ||
699 | +nvme_identify_ctrl(void) "identify controller" | ||
700 | +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" | ||
701 | +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" | ||
702 | +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" | ||
703 | +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" | ||
704 | +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" | ||
705 | +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
706 | +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
707 | +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" | ||
708 | +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" | ||
709 | +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" | ||
710 | +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" | ||
711 | +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
712 | +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
713 | +nvme_mmio_start_success(void) "setting controller enable bit succeeded" | ||
714 | +nvme_mmio_stopped(void) "cleared controller enable bit" | ||
715 | +nvme_mmio_shutdown_set(void) "shutdown bit set" | ||
716 | +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" | ||
717 | + | ||
718 | +# nvme traces for error conditions | ||
719 | +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" | ||
720 | +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" | ||
721 | +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" | ||
722 | +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" | ||
723 | +nvme_err_invalid_field(void) "invalid field" | ||
724 | +nvme_err_invalid_prp(void) "invalid PRP" | ||
725 | +nvme_err_invalid_sgl(void) "invalid SGL" | ||
726 | +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" | ||
727 | +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" | ||
728 | +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" | ||
729 | +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" | ||
730 | +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" | ||
731 | +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" | ||
732 | +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" | ||
733 | +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" | ||
734 | +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" | ||
735 | +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" | ||
736 | +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" | ||
737 | +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" | ||
738 | +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" | ||
739 | +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" | ||
740 | +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" | ||
741 | +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" | ||
742 | +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" | ||
743 | +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" | ||
744 | +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" | ||
745 | +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" | ||
746 | +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" | ||
747 | +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" | ||
748 | +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" | ||
749 | +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" | ||
750 | +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" | ||
751 | +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" | ||
752 | +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" | ||
753 | +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" | ||
754 | +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" | ||
755 | +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" | ||
756 | +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" | ||
757 | +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" | ||
758 | +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" | ||
759 | +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" | ||
760 | +nvme_err_startfail(void) "setting controller enable bit failed" | ||
761 | + | ||
762 | +# Traces for undefined behavior | ||
763 | +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" | ||
764 | +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" | ||
765 | +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" | ||
766 | +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" | ||
767 | +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" | ||
768 | +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
769 | +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
770 | +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
771 | +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
772 | +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
773 | +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
774 | +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" | ||
775 | +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" | ||
776 | +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" | ||
777 | +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
778 | +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" | ||
779 | +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
780 | + | ||
781 | # hw/block/xen_disk.c | ||
782 | xen_disk_alloc(char *name) "%s" | ||
783 | xen_disk_init(char *name) "%s" | ||
41 | -- | 784 | -- |
42 | 2.40.1 | 785 | 2.13.6 |
786 | |||
787 | diff view generated by jsdifflib |
1 | bdrv_refresh_total_sectors() and bdrv_refresh_limits() expect to be | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | called under the AioContext lock of the node. Take the lock. | ||
3 | 2 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Management tools create overlays of running guests with qemu-img: |
5 | Message-Id: <20230525124713.401149-10-kwolf@redhat.com> | 4 | |
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | $ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2 |
6 | |||
7 | but this doesn't work anymore due to image locking: | ||
8 | |||
9 | qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock | ||
10 | Is another process using the image? | ||
11 | Could not open backing image to determine size. | ||
12 | Use the force share option to allow this use case again. | ||
13 | |||
14 | Cc: qemu-stable@nongnu.org | ||
15 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
16 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 18 | --- |
9 | block.c | 7 +++++++ | 19 | block.c | 3 ++- |
10 | 1 file changed, 7 insertions(+) | 20 | 1 file changed, 2 insertions(+), 1 deletion(-) |
11 | 21 | ||
12 | diff --git a/block.c b/block.c | 22 | diff --git a/block.c b/block.c |
13 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/block.c | 24 | --- a/block.c |
15 | +++ b/block.c | 25 | +++ b/block.c |
16 | @@ -XXX,XX +XXX,XX @@ static int no_coroutine_fn GRAPH_UNLOCKED | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt, |
17 | bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, | 27 | back_flags = flags; |
18 | QDict *options, int open_flags, Error **errp) | 28 | back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); |
19 | { | 29 | |
20 | + AioContext *ctx; | 30 | + backing_options = qdict_new(); |
21 | Error *local_err = NULL; | 31 | if (backing_fmt) { |
22 | int i, ret; | 32 | - backing_options = qdict_new(); |
23 | GLOBAL_STATE_CODE(); | 33 | qdict_put_str(backing_options, "driver", backing_fmt); |
24 | @@ -XXX,XX +XXX,XX @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, | 34 | } |
25 | bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; | 35 | + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); |
26 | bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; | 36 | |
27 | 37 | bs = bdrv_open(full_backing, NULL, backing_options, back_flags, | |
28 | + /* Get the context after .bdrv_open, it can change the context */ | 38 | &local_err); |
29 | + ctx = bdrv_get_aio_context(bs); | ||
30 | + aio_context_acquire(ctx); | ||
31 | + | ||
32 | ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); | ||
33 | if (ret < 0) { | ||
34 | error_setg_errno(errp, -ret, "Could not refresh total sector count"); | ||
35 | + aio_context_release(ctx); | ||
36 | return ret; | ||
37 | } | ||
38 | |||
39 | bdrv_graph_rdlock_main_loop(); | ||
40 | bdrv_refresh_limits(bs, NULL, &local_err); | ||
41 | bdrv_graph_rdunlock_main_loop(); | ||
42 | + aio_context_release(ctx); | ||
43 | |||
44 | if (local_err) { | ||
45 | error_propagate(errp, local_err); | ||
46 | -- | 39 | -- |
47 | 2.40.1 | 40 | 2.13.6 |
41 | |||
42 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | is_external=true suspends fd handlers between aio_disable_external() and | 3 | It's not working anymore since QEMU v1.3.0 - time to remove it now. |
4 | aio_enable_external(). The block layer's drain operation uses this | ||
5 | mechanism to prevent new I/O from sneaking in between | ||
6 | bdrv_drained_begin() and bdrv_drained_end(). | ||
7 | 4 | ||
8 | The previous commit converted the xen-block device to use BlockDevOps | 5 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
9 | .drained_begin/end() callbacks. It no longer relies on is_external=true | 6 | Reviewed-by: John Snow <jsnow@redhat.com> |
10 | so it is safe to pass is_external=false. | 7 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
11 | |||
12 | This is part of ongoing work to remove the aio_disable_external() API. | ||
13 | |||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
16 | Message-Id: <20230516190238.8401-13-stefanha@redhat.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | --- | 9 | --- |
19 | hw/xen/xen-bus.c | 8 ++++---- | 10 | blockdev.c | 11 ----------- |
20 | 1 file changed, 4 insertions(+), 4 deletions(-) | 11 | qemu-doc.texi | 6 ------ |
12 | 2 files changed, 17 deletions(-) | ||
21 | 13 | ||
22 | diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c | 14 | diff --git a/blockdev.c b/blockdev.c |
23 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/hw/xen/xen-bus.c | 16 | --- a/blockdev.c |
25 | +++ b/hw/xen/xen-bus.c | 17 | +++ b/blockdev.c |
26 | @@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev, | 18 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = { |
19 | .type = QEMU_OPT_STRING, | ||
20 | .help = "chs translation (auto, lba, none)", | ||
21 | },{ | ||
22 | - .name = "boot", | ||
23 | - .type = QEMU_OPT_BOOL, | ||
24 | - .help = "(deprecated, ignored)", | ||
25 | - },{ | ||
26 | .name = "addr", | ||
27 | .type = QEMU_OPT_STRING, | ||
28 | .help = "pci address (virtio only)", | ||
29 | @@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) | ||
30 | goto fail; | ||
27 | } | 31 | } |
28 | 32 | ||
29 | if (channel->ctx) | 33 | - /* Deprecated option boot=[on|off] */ |
30 | - aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, | 34 | - if (qemu_opt_get(legacy_opts, "boot") != NULL) { |
31 | + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false, | 35 | - fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " |
32 | NULL, NULL, NULL, NULL, NULL); | 36 | - "ignored. Future versions will reject this parameter. Please " |
33 | 37 | - "update your scripts.\n"); | |
34 | channel->ctx = ctx; | 38 | - } |
35 | if (ctx) { | 39 | - |
36 | aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), | 40 | /* Other deprecated options */ |
37 | - true, xen_device_event, NULL, xen_device_poll, NULL, | 41 | if (!qtest_enabled()) { |
38 | - channel); | 42 | for (i = 0; i < ARRAY_SIZE(deprecated); i++) { |
39 | + false, xen_device_event, NULL, xen_device_poll, | 43 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
40 | + NULL, channel); | 44 | index XXXXXXX..XXXXXXX 100644 |
41 | } | 45 | --- a/qemu-doc.texi |
42 | } | 46 | +++ b/qemu-doc.texi |
43 | 47 | @@ -XXX,XX +XXX,XX @@ deprecated. | |
44 | @@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev, | 48 | |
45 | 49 | @section System emulator command line arguments | |
46 | QLIST_REMOVE(channel, list); | 50 | |
47 | 51 | -@subsection -drive boot=on|off (since 1.3.0) | |
48 | - aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, | 52 | - |
49 | + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false, | 53 | -The ``boot=on|off'' option to the ``-drive'' argument is |
50 | NULL, NULL, NULL, NULL, NULL); | 54 | -ignored. Applications should use the ``bootindex=N'' parameter |
51 | 55 | -to set an absolute ordering between devices instead. | |
52 | if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) { | 56 | - |
57 | @subsection -tdf (since 1.3.0) | ||
58 | |||
59 | The ``-tdf'' argument is ignored. The behaviour implemented | ||
53 | -- | 60 | -- |
54 | 2.40.1 | 61 | 2.13.6 |
62 | |||
63 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch is part of an effort to remove the aio_disable_external() | 3 | It's been marked as deprecated since QEMU v2.10.0, and so far nobody |
4 | API because it does not fit in a multi-queue block layer world where | 4 | complained that we should keep it, so let's remove this legacy option |
5 | many AioContexts may be submitting requests to the same disk. | 5 | now to simplify the code quite a bit. |
6 | 6 | ||
7 | The SCSI emulation code is already in good shape to stop using | 7 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
8 | aio_disable_external(). It was only used by commit 9c5aad84da1c | 8 | Reviewed-by: John Snow <jsnow@redhat.com> |
9 | ("virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi | 9 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
10 | disk") to ensure that virtio_scsi_hotunplug() works while the guest | ||
11 | driver is submitting I/O. | ||
12 | |||
13 | Ensure virtio_scsi_hotunplug() is safe as follows: | ||
14 | |||
15 | 1. qdev_simple_device_unplug_cb() -> qdev_unrealize() -> | ||
16 | device_set_realized() calls qatomic_set(&dev->realized, false) so | ||
17 | that future scsi_device_get() calls return NULL because they exclude | ||
18 | SCSIDevices with realized=false. | ||
19 | |||
20 | That means virtio-scsi will reject new I/O requests to this | ||
21 | SCSIDevice with VIRTIO_SCSI_S_BAD_TARGET even while | ||
22 | virtio_scsi_hotunplug() is still executing. We are protected against | ||
23 | new requests! | ||
24 | |||
25 | 2. scsi_qdev_unrealize() already contains a call to | ||
26 | scsi_device_purge_requests() so that in-flight requests are cancelled | ||
27 | synchronously. This ensures that no in-flight requests remain once | ||
28 | qdev_simple_device_unplug_cb() returns. | ||
29 | |||
30 | Thanks to these two conditions we don't need aio_disable_external() | ||
31 | anymore. | ||
32 | |||
33 | Cc: Zhengui Li <lizhengui@huawei.com> | ||
34 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
35 | Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru> | ||
36 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
37 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
38 | Message-Id: <20230516190238.8401-5-stefanha@redhat.com> | ||
39 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
40 | --- | 11 | --- |
41 | hw/scsi/virtio-scsi.c | 3 --- | 12 | vl.c | 86 ++------------------------------------------------------- |
42 | 1 file changed, 3 deletions(-) | 13 | qemu-doc.texi | 8 ------ |
43 | 14 | qemu-options.hx | 19 ++----------- | |
44 | diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c | 15 | 3 files changed, 4 insertions(+), 109 deletions(-) |
16 | |||
17 | diff --git a/vl.c b/vl.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/hw/scsi/virtio-scsi.c | 19 | --- a/vl.c |
47 | +++ b/hw/scsi/virtio-scsi.c | 20 | +++ b/vl.c |
48 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, | 21 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
49 | VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); | 22 | const char *boot_order = NULL; |
50 | VirtIOSCSI *s = VIRTIO_SCSI(vdev); | 23 | const char *boot_once = NULL; |
51 | SCSIDevice *sd = SCSI_DEVICE(dev); | 24 | DisplayState *ds; |
52 | - AioContext *ctx = s->ctx ?: qemu_get_aio_context(); | 25 | - int cyls, heads, secs, translation; |
53 | VirtIOSCSIEventInfo info = { | 26 | QemuOpts *opts, *machine_opts; |
54 | .event = VIRTIO_SCSI_T_TRANSPORT_RESET, | 27 | - QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL; |
55 | .reason = VIRTIO_SCSI_EVT_RESET_REMOVED, | 28 | + QemuOpts *icount_opts = NULL, *accel_opts = NULL; |
56 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, | 29 | QemuOptsList *olist; |
57 | }, | 30 | int optind; |
58 | }; | 31 | const char *optarg; |
59 | 32 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | |
60 | - aio_disable_external(ctx); | 33 | |
61 | qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); | 34 | cpu_model = NULL; |
62 | - aio_enable_external(ctx); | 35 | snapshot = 0; |
63 | 36 | - cyls = heads = secs = 0; | |
64 | if (s->ctx) { | 37 | - translation = BIOS_ATA_TRANSLATION_AUTO; |
65 | virtio_scsi_acquire(s); | 38 | |
39 | nb_nics = 0; | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
42 | if (optind >= argc) | ||
43 | break; | ||
44 | if (argv[optind][0] != '-') { | ||
45 | - hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); | ||
46 | + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); | ||
47 | } else { | ||
48 | const QEMUOption *popt; | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
51 | cpu_model = optarg; | ||
52 | break; | ||
53 | case QEMU_OPTION_hda: | ||
54 | - { | ||
55 | - char buf[256]; | ||
56 | - if (cyls == 0) | ||
57 | - snprintf(buf, sizeof(buf), "%s", HD_OPTS); | ||
58 | - else | ||
59 | - snprintf(buf, sizeof(buf), | ||
60 | - "%s,cyls=%d,heads=%d,secs=%d%s", | ||
61 | - HD_OPTS , cyls, heads, secs, | ||
62 | - translation == BIOS_ATA_TRANSLATION_LBA ? | ||
63 | - ",trans=lba" : | ||
64 | - translation == BIOS_ATA_TRANSLATION_NONE ? | ||
65 | - ",trans=none" : ""); | ||
66 | - drive_add(IF_DEFAULT, 0, optarg, buf); | ||
67 | - break; | ||
68 | - } | ||
69 | case QEMU_OPTION_hdb: | ||
70 | case QEMU_OPTION_hdc: | ||
71 | case QEMU_OPTION_hdd: | ||
72 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
73 | case QEMU_OPTION_snapshot: | ||
74 | snapshot = 1; | ||
75 | break; | ||
76 | - case QEMU_OPTION_hdachs: | ||
77 | - { | ||
78 | - const char *p; | ||
79 | - p = optarg; | ||
80 | - cyls = strtol(p, (char **)&p, 0); | ||
81 | - if (cyls < 1 || cyls > 16383) | ||
82 | - goto chs_fail; | ||
83 | - if (*p != ',') | ||
84 | - goto chs_fail; | ||
85 | - p++; | ||
86 | - heads = strtol(p, (char **)&p, 0); | ||
87 | - if (heads < 1 || heads > 16) | ||
88 | - goto chs_fail; | ||
89 | - if (*p != ',') | ||
90 | - goto chs_fail; | ||
91 | - p++; | ||
92 | - secs = strtol(p, (char **)&p, 0); | ||
93 | - if (secs < 1 || secs > 63) | ||
94 | - goto chs_fail; | ||
95 | - if (*p == ',') { | ||
96 | - p++; | ||
97 | - if (!strcmp(p, "large")) { | ||
98 | - translation = BIOS_ATA_TRANSLATION_LARGE; | ||
99 | - } else if (!strcmp(p, "rechs")) { | ||
100 | - translation = BIOS_ATA_TRANSLATION_RECHS; | ||
101 | - } else if (!strcmp(p, "none")) { | ||
102 | - translation = BIOS_ATA_TRANSLATION_NONE; | ||
103 | - } else if (!strcmp(p, "lba")) { | ||
104 | - translation = BIOS_ATA_TRANSLATION_LBA; | ||
105 | - } else if (!strcmp(p, "auto")) { | ||
106 | - translation = BIOS_ATA_TRANSLATION_AUTO; | ||
107 | - } else { | ||
108 | - goto chs_fail; | ||
109 | - } | ||
110 | - } else if (*p != '\0') { | ||
111 | - chs_fail: | ||
112 | - error_report("invalid physical CHS format"); | ||
113 | - exit(1); | ||
114 | - } | ||
115 | - if (hda_opts != NULL) { | ||
116 | - qemu_opt_set_number(hda_opts, "cyls", cyls, | ||
117 | - &error_abort); | ||
118 | - qemu_opt_set_number(hda_opts, "heads", heads, | ||
119 | - &error_abort); | ||
120 | - qemu_opt_set_number(hda_opts, "secs", secs, | ||
121 | - &error_abort); | ||
122 | - if (translation == BIOS_ATA_TRANSLATION_LARGE) { | ||
123 | - qemu_opt_set(hda_opts, "trans", "large", | ||
124 | - &error_abort); | ||
125 | - } else if (translation == BIOS_ATA_TRANSLATION_RECHS) { | ||
126 | - qemu_opt_set(hda_opts, "trans", "rechs", | ||
127 | - &error_abort); | ||
128 | - } else if (translation == BIOS_ATA_TRANSLATION_LBA) { | ||
129 | - qemu_opt_set(hda_opts, "trans", "lba", | ||
130 | - &error_abort); | ||
131 | - } else if (translation == BIOS_ATA_TRANSLATION_NONE) { | ||
132 | - qemu_opt_set(hda_opts, "trans", "none", | ||
133 | - &error_abort); | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - error_report("'-hdachs' is deprecated, please use '-device" | ||
138 | - " ide-hd,cyls=c,heads=h,secs=s,...' instead"); | ||
139 | - break; | ||
140 | case QEMU_OPTION_numa: | ||
141 | opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), | ||
142 | optarg, true); | ||
143 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/qemu-doc.texi | ||
146 | +++ b/qemu-doc.texi | ||
147 | @@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the | ||
148 | ``-object filter-dump'' argument which works in combination | ||
149 | with the modern ``-netdev`` backends instead. | ||
150 | |||
151 | -@subsection -hdachs (since 2.10.0) | ||
152 | - | ||
153 | -The ``-hdachs'' argument is now a synonym for setting | ||
154 | -the ``cyls'', ``heads'', ``secs'', and ``trans'' properties | ||
155 | -on the ``ide-hd'' device using the ``-device'' argument. | ||
156 | -The new syntax allows different settings to be provided | ||
157 | -per disk. | ||
158 | - | ||
159 | @subsection -usbdevice (since 2.10.0) | ||
160 | |||
161 | The ``-usbdevice DEV'' argument is now a synonym for setting | ||
162 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/qemu-options.hx | ||
165 | +++ b/qemu-options.hx | ||
166 | @@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type. | ||
167 | @item media=@var{media} | ||
168 | This option defines the type of the media: disk or cdrom. | ||
169 | @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}] | ||
170 | -These options have the same definition as they have in @option{-hdachs}. | ||
171 | -These parameters are deprecated, use the corresponding parameters | ||
172 | +Force disk physical geometry and the optional BIOS translation (trans=none or | ||
173 | +lba). These parameters are deprecated, use the corresponding parameters | ||
174 | of @code{-device} instead. | ||
175 | @item snapshot=@var{snapshot} | ||
176 | @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive | ||
177 | @@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force | ||
178 | the write back by pressing @key{C-a s} (@pxref{disk_images}). | ||
179 | ETEXI | ||
180 | |||
181 | -DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \ | ||
182 | - "-hdachs c,h,s[,t]\n" \ | ||
183 | - " force hard disk 0 physical geometry and the optional BIOS\n" \ | ||
184 | - " translation (t=none or lba) (usually QEMU can guess them)\n", | ||
185 | - QEMU_ARCH_ALL) | ||
186 | -STEXI | ||
187 | -@item -hdachs @var{c},@var{h},@var{s},[,@var{t}] | ||
188 | -@findex -hdachs | ||
189 | -Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <= | ||
190 | -@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS | ||
191 | -translation mode (@var{t}=none, lba or auto). Usually QEMU can guess | ||
192 | -all those parameters. This option is deprecated, please use | ||
193 | -@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead. | ||
194 | -ETEXI | ||
195 | - | ||
196 | DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, | ||
197 | "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" | ||
198 | " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" | ||
66 | -- | 199 | -- |
67 | 2.40.1 | 200 | 2.13.6 |
201 | |||
202 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | There is no need to suspend activity between aio_disable_external() and | 3 | Looks like we forgot to announce the deprecation of these options in |
4 | aio_enable_external(), which is mainly used for the block layer's drain | 4 | the corresponding chapter of the qemu-doc text, so let's do that now. |
5 | operation. | ||
6 | 5 | ||
7 | This is part of ongoing work to remove the aio_disable_external() API. | 6 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
8 | 7 | Reviewed-by: John Snow <jsnow@redhat.com> | |
9 | Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> | 8 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
10 | Reviewed-by: Paul Durrant <paul@xen.org> | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Message-Id: <20230516190238.8401-9-stefanha@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 10 | --- |
16 | hw/i386/kvm/xen_xenstore.c | 2 +- | 11 | qemu-doc.texi | 15 +++++++++++++++ |
17 | 1 file changed, 1 insertion(+), 1 deletion(-) | 12 | 1 file changed, 15 insertions(+) |
18 | 13 | ||
19 | diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c | 14 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
20 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/i386/kvm/xen_xenstore.c | 16 | --- a/qemu-doc.texi |
22 | +++ b/hw/i386/kvm/xen_xenstore.c | 17 | +++ b/qemu-doc.texi |
23 | @@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) | 18 | @@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU. |
24 | error_setg(errp, "Xenstore evtchn port init failed"); | 19 | The ``-drive if=scsi'' argument is replaced by the the |
25 | return; | 20 | ``-device BUS-TYPE'' argument combined with ``-drive if=none''. |
26 | } | 21 | |
27 | - aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, | 22 | +@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0) |
28 | + aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false, | 23 | + |
29 | xen_xenstore_event, NULL, NULL, NULL, s); | 24 | +The drive geometry arguments are replaced by the the geometry arguments |
30 | 25 | +that can be specified with the ``-device'' parameter. | |
31 | s->impl = xs_impl_create(xen_domid); | 26 | + |
27 | +@subsection -drive serial=... (since 2.10.0) | ||
28 | + | ||
29 | +The drive serial argument is replaced by the the serial argument | ||
30 | +that can be specified with the ``-device'' parameter. | ||
31 | + | ||
32 | +@subsection -drive addr=... (since 2.10.0) | ||
33 | + | ||
34 | +The drive addr argument is replaced by the the addr argument | ||
35 | +that can be specified with the ``-device'' parameter. | ||
36 | + | ||
37 | @subsection -net dump (since 2.10.0) | ||
38 | |||
39 | The ``--net dump'' argument is now replaced with the | ||
32 | -- | 40 | -- |
33 | 2.40.1 | 41 | 2.13.6 |
42 | |||
43 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | All callers now pass is_external=false to aio_set_fd_handler() and | 3 | Signed-off-by: Fam Zheng <famz@redhat.com> |
4 | aio_set_event_notifier(). The aio_disable_external() API that | ||
5 | temporarily disables fd handlers that were registered is_external=true | ||
6 | is therefore dead code. | ||
7 | |||
8 | Remove aio_disable_external(), aio_enable_external(), and the | ||
9 | is_external arguments to aio_set_fd_handler() and | ||
10 | aio_set_event_notifier(). | ||
11 | |||
12 | The entire test-fdmon-epoll test is removed because its sole purpose was | ||
13 | testing aio_disable_external(). | ||
14 | |||
15 | Parts of this patch were generated using the following coccinelle | ||
16 | (https://coccinelle.lip6.fr/) semantic patch: | ||
17 | |||
18 | @@ | ||
19 | expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque; | ||
20 | @@ | ||
21 | - aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque) | ||
22 | + aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque) | ||
23 | |||
24 | @@ | ||
25 | expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready; | ||
26 | @@ | ||
27 | - aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready) | ||
28 | + aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready) | ||
29 | |||
30 | Reviewed-by: Juan Quintela <quintela@redhat.com> | ||
31 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
32 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
33 | Message-Id: <20230516190238.8401-21-stefanha@redhat.com> | ||
34 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
35 | --- | 5 | --- |
36 | include/block/aio.h | 57 ------------------------ | 6 | include/block/block_int.h | 1 - |
37 | util/aio-posix.h | 1 - | 7 | block/io.c | 18 ------------------ |
38 | block.c | 7 --- | 8 | 2 files changed, 19 deletions(-) |
39 | block/blkio.c | 15 +++---- | ||
40 | block/curl.c | 10 ++--- | ||
41 | block/export/fuse.c | 8 ++-- | ||
42 | block/export/vduse-blk.c | 10 ++--- | ||
43 | block/io.c | 2 - | ||
44 | block/io_uring.c | 4 +- | ||
45 | block/iscsi.c | 3 +- | ||
46 | block/linux-aio.c | 4 +- | ||
47 | block/nfs.c | 5 +-- | ||
48 | block/nvme.c | 8 ++-- | ||
49 | block/ssh.c | 4 +- | ||
50 | block/win32-aio.c | 6 +-- | ||
51 | hw/i386/kvm/xen_xenstore.c | 2 +- | ||
52 | hw/virtio/virtio.c | 6 +-- | ||
53 | hw/xen/xen-bus.c | 8 ++-- | ||
54 | io/channel-command.c | 6 +-- | ||
55 | io/channel-file.c | 3 +- | ||
56 | io/channel-socket.c | 3 +- | ||
57 | migration/rdma.c | 16 +++---- | ||
58 | tests/unit/test-aio.c | 27 +----------- | ||
59 | tests/unit/test-bdrv-drain.c | 1 - | ||
60 | tests/unit/test-fdmon-epoll.c | 73 ------------------------------- | ||
61 | tests/unit/test-nested-aio-poll.c | 9 ++-- | ||
62 | util/aio-posix.c | 20 +++------ | ||
63 | util/aio-win32.c | 8 +--- | ||
64 | util/async.c | 3 +- | ||
65 | util/fdmon-epoll.c | 10 ----- | ||
66 | util/fdmon-io_uring.c | 8 +--- | ||
67 | util/fdmon-poll.c | 3 +- | ||
68 | util/main-loop.c | 7 ++- | ||
69 | util/qemu-coroutine-io.c | 7 ++- | ||
70 | util/vhost-user-server.c | 11 +++-- | ||
71 | tests/unit/meson.build | 3 -- | ||
72 | 36 files changed, 80 insertions(+), 298 deletions(-) | ||
73 | delete mode 100644 tests/unit/test-fdmon-epoll.c | ||
74 | 9 | ||
75 | diff --git a/include/block/aio.h b/include/block/aio.h | 10 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
76 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
77 | --- a/include/block/aio.h | 12 | --- a/include/block/block_int.h |
78 | +++ b/include/block/aio.h | 13 | +++ b/include/block/block_int.h |
79 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | 14 | @@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk); |
80 | */ | 15 | bool blk_dev_is_medium_locked(BlockBackend *blk); |
81 | QEMUTimerListGroup tlg; | 16 | |
82 | 17 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); | |
83 | - int external_disable_cnt; | 18 | -bool bdrv_requests_pending(BlockDriverState *bs); |
84 | - | 19 | |
85 | /* Number of AioHandlers without .io_poll() */ | 20 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); |
86 | int poll_disable_cnt; | 21 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); |
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking); | ||
89 | */ | ||
90 | void aio_set_fd_handler(AioContext *ctx, | ||
91 | int fd, | ||
92 | - bool is_external, | ||
93 | IOHandler *io_read, | ||
94 | IOHandler *io_write, | ||
95 | AioPollFn *io_poll, | ||
96 | @@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx, | ||
97 | */ | ||
98 | void aio_set_event_notifier(AioContext *ctx, | ||
99 | EventNotifier *notifier, | ||
100 | - bool is_external, | ||
101 | EventNotifierHandler *io_read, | ||
102 | AioPollFn *io_poll, | ||
103 | EventNotifierHandler *io_poll_ready); | ||
104 | @@ -XXX,XX +XXX,XX @@ static inline void aio_timer_init(AioContext *ctx, | ||
105 | */ | ||
106 | int64_t aio_compute_timeout(AioContext *ctx); | ||
107 | |||
108 | -/** | ||
109 | - * aio_disable_external: | ||
110 | - * @ctx: the aio context | ||
111 | - * | ||
112 | - * Disable the further processing of external clients. | ||
113 | - */ | ||
114 | -static inline void aio_disable_external(AioContext *ctx) | ||
115 | -{ | ||
116 | - qatomic_inc(&ctx->external_disable_cnt); | ||
117 | -} | ||
118 | - | ||
119 | -/** | ||
120 | - * aio_enable_external: | ||
121 | - * @ctx: the aio context | ||
122 | - * | ||
123 | - * Enable the processing of external clients. | ||
124 | - */ | ||
125 | -static inline void aio_enable_external(AioContext *ctx) | ||
126 | -{ | ||
127 | - int old; | ||
128 | - | ||
129 | - old = qatomic_fetch_dec(&ctx->external_disable_cnt); | ||
130 | - assert(old > 0); | ||
131 | - if (old == 1) { | ||
132 | - /* Kick event loop so it re-arms file descriptors */ | ||
133 | - aio_notify(ctx); | ||
134 | - } | ||
135 | -} | ||
136 | - | ||
137 | -/** | ||
138 | - * aio_external_disabled: | ||
139 | - * @ctx: the aio context | ||
140 | - * | ||
141 | - * Return true if the external clients are disabled. | ||
142 | - */ | ||
143 | -static inline bool aio_external_disabled(AioContext *ctx) | ||
144 | -{ | ||
145 | - return qatomic_read(&ctx->external_disable_cnt); | ||
146 | -} | ||
147 | - | ||
148 | -/** | ||
149 | - * aio_node_check: | ||
150 | - * @ctx: the aio context | ||
151 | - * @is_external: Whether or not the checked node is an external event source. | ||
152 | - * | ||
153 | - * Check if the node's is_external flag is okay to be polled by the ctx at this | ||
154 | - * moment. True means green light. | ||
155 | - */ | ||
156 | -static inline bool aio_node_check(AioContext *ctx, bool is_external) | ||
157 | -{ | ||
158 | - return !is_external || !qatomic_read(&ctx->external_disable_cnt); | ||
159 | -} | ||
160 | - | ||
161 | /** | ||
162 | * aio_co_schedule: | ||
163 | * @ctx: the aio context | ||
164 | diff --git a/util/aio-posix.h b/util/aio-posix.h | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/util/aio-posix.h | ||
167 | +++ b/util/aio-posix.h | ||
168 | @@ -XXX,XX +XXX,XX @@ struct AioHandler { | ||
169 | #endif | ||
170 | int64_t poll_idle_timeout; /* when to stop userspace polling */ | ||
171 | bool poll_ready; /* has polling detected an event? */ | ||
172 | - bool is_external; | ||
173 | }; | ||
174 | |||
175 | /* Add a handler to a ready list */ | ||
176 | diff --git a/block.c b/block.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/block.c | ||
179 | +++ b/block.c | ||
180 | @@ -XXX,XX +XXX,XX @@ static void bdrv_detach_aio_context(BlockDriverState *bs) | ||
181 | bs->drv->bdrv_detach_aio_context(bs); | ||
182 | } | ||
183 | |||
184 | - if (bs->quiesce_counter) { | ||
185 | - aio_enable_external(bs->aio_context); | ||
186 | - } | ||
187 | bs->aio_context = NULL; | ||
188 | } | ||
189 | |||
190 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs, | ||
191 | BdrvAioNotifier *ban, *ban_tmp; | ||
192 | GLOBAL_STATE_CODE(); | ||
193 | |||
194 | - if (bs->quiesce_counter) { | ||
195 | - aio_disable_external(new_context); | ||
196 | - } | ||
197 | - | ||
198 | bs->aio_context = new_context; | ||
199 | |||
200 | if (bs->drv && bs->drv->bdrv_attach_aio_context) { | ||
201 | diff --git a/block/blkio.c b/block/blkio.c | ||
202 | index XXXXXXX..XXXXXXX 100644 | ||
203 | --- a/block/blkio.c | ||
204 | +++ b/block/blkio.c | ||
205 | @@ -XXX,XX +XXX,XX @@ static void blkio_attach_aio_context(BlockDriverState *bs, | ||
206 | { | ||
207 | BDRVBlkioState *s = bs->opaque; | ||
208 | |||
209 | - aio_set_fd_handler(new_context, | ||
210 | - s->completion_fd, | ||
211 | - false, | ||
212 | - blkio_completion_fd_read, | ||
213 | - NULL, | ||
214 | + aio_set_fd_handler(new_context, s->completion_fd, | ||
215 | + blkio_completion_fd_read, NULL, | ||
216 | blkio_completion_fd_poll, | ||
217 | - blkio_completion_fd_poll_ready, | ||
218 | - bs); | ||
219 | + blkio_completion_fd_poll_ready, bs); | ||
220 | } | ||
221 | |||
222 | static void blkio_detach_aio_context(BlockDriverState *bs) | ||
223 | { | ||
224 | BDRVBlkioState *s = bs->opaque; | ||
225 | |||
226 | - aio_set_fd_handler(bdrv_get_aio_context(bs), | ||
227 | - s->completion_fd, | ||
228 | - false, NULL, NULL, NULL, NULL, NULL); | ||
229 | + aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL, | ||
230 | + NULL, NULL, NULL); | ||
231 | } | ||
232 | |||
233 | /* Call with s->blkio_lock held to submit I/O after enqueuing a new request */ | ||
234 | diff --git a/block/curl.c b/block/curl.c | ||
235 | index XXXXXXX..XXXXXXX 100644 | ||
236 | --- a/block/curl.c | ||
237 | +++ b/block/curl.c | ||
238 | @@ -XXX,XX +XXX,XX @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque) | ||
239 | CURLSocket *socket = value; | ||
240 | BDRVCURLState *s = socket->s; | ||
241 | |||
242 | - aio_set_fd_handler(s->aio_context, socket->fd, false, | ||
243 | + aio_set_fd_handler(s->aio_context, socket->fd, | ||
244 | NULL, NULL, NULL, NULL, NULL); | ||
245 | return true; | ||
246 | } | ||
247 | @@ -XXX,XX +XXX,XX @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, | ||
248 | trace_curl_sock_cb(action, (int)fd); | ||
249 | switch (action) { | ||
250 | case CURL_POLL_IN: | ||
251 | - aio_set_fd_handler(s->aio_context, fd, false, | ||
252 | + aio_set_fd_handler(s->aio_context, fd, | ||
253 | curl_multi_do, NULL, NULL, NULL, socket); | ||
254 | break; | ||
255 | case CURL_POLL_OUT: | ||
256 | - aio_set_fd_handler(s->aio_context, fd, false, | ||
257 | + aio_set_fd_handler(s->aio_context, fd, | ||
258 | NULL, curl_multi_do, NULL, NULL, socket); | ||
259 | break; | ||
260 | case CURL_POLL_INOUT: | ||
261 | - aio_set_fd_handler(s->aio_context, fd, false, | ||
262 | + aio_set_fd_handler(s->aio_context, fd, | ||
263 | curl_multi_do, curl_multi_do, | ||
264 | NULL, NULL, socket); | ||
265 | break; | ||
266 | case CURL_POLL_REMOVE: | ||
267 | - aio_set_fd_handler(s->aio_context, fd, false, | ||
268 | + aio_set_fd_handler(s->aio_context, fd, | ||
269 | NULL, NULL, NULL, NULL, NULL); | ||
270 | break; | ||
271 | } | ||
272 | diff --git a/block/export/fuse.c b/block/export/fuse.c | ||
273 | index XXXXXXX..XXXXXXX 100644 | ||
274 | --- a/block/export/fuse.c | ||
275 | +++ b/block/export/fuse.c | ||
276 | @@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_begin(void *opaque) | ||
277 | FuseExport *exp = opaque; | ||
278 | |||
279 | aio_set_fd_handler(exp->common.ctx, | ||
280 | - fuse_session_fd(exp->fuse_session), false, | ||
281 | + fuse_session_fd(exp->fuse_session), | ||
282 | NULL, NULL, NULL, NULL, NULL); | ||
283 | exp->fd_handler_set_up = false; | ||
284 | } | ||
285 | @@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_end(void *opaque) | ||
286 | exp->common.ctx = blk_get_aio_context(exp->common.blk); | ||
287 | |||
288 | aio_set_fd_handler(exp->common.ctx, | ||
289 | - fuse_session_fd(exp->fuse_session), false, | ||
290 | + fuse_session_fd(exp->fuse_session), | ||
291 | read_from_fuse_export, NULL, NULL, NULL, exp); | ||
292 | exp->fd_handler_set_up = true; | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, | ||
295 | g_hash_table_insert(exports, g_strdup(mountpoint), NULL); | ||
296 | |||
297 | aio_set_fd_handler(exp->common.ctx, | ||
298 | - fuse_session_fd(exp->fuse_session), false, | ||
299 | + fuse_session_fd(exp->fuse_session), | ||
300 | read_from_fuse_export, NULL, NULL, NULL, exp); | ||
301 | exp->fd_handler_set_up = true; | ||
302 | |||
303 | @@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp) | ||
304 | |||
305 | if (exp->fd_handler_set_up) { | ||
306 | aio_set_fd_handler(exp->common.ctx, | ||
307 | - fuse_session_fd(exp->fuse_session), false, | ||
308 | + fuse_session_fd(exp->fuse_session), | ||
309 | NULL, NULL, NULL, NULL, NULL); | ||
310 | exp->fd_handler_set_up = false; | ||
311 | } | ||
312 | diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c | ||
313 | index XXXXXXX..XXXXXXX 100644 | ||
314 | --- a/block/export/vduse-blk.c | ||
315 | +++ b/block/export/vduse-blk.c | ||
316 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) | ||
317 | } | ||
318 | |||
319 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), | ||
320 | - false, on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
321 | + on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
322 | /* Make sure we don't miss any kick afer reconnecting */ | ||
323 | eventfd_write(vduse_queue_get_fd(vq), 1); | ||
324 | } | ||
325 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq) | ||
326 | return; | ||
327 | } | ||
328 | |||
329 | - aio_set_fd_handler(vblk_exp->export.ctx, fd, false, | ||
330 | + aio_set_fd_handler(vblk_exp->export.ctx, fd, | ||
331 | NULL, NULL, NULL, NULL, NULL); | ||
332 | } | ||
333 | |||
334 | @@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque) | ||
335 | static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx) | ||
336 | { | ||
337 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
338 | - false, on_vduse_dev_kick, NULL, NULL, NULL, | ||
339 | + on_vduse_dev_kick, NULL, NULL, NULL, | ||
340 | vblk_exp->dev); | ||
341 | |||
342 | /* Virtqueues are handled by vduse_blk_drained_end() */ | ||
343 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx) | ||
344 | static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp) | ||
345 | { | ||
346 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
347 | - false, NULL, NULL, NULL, NULL, NULL); | ||
348 | + NULL, NULL, NULL, NULL, NULL); | ||
349 | |||
350 | /* Virtqueues are handled by vduse_blk_drained_begin() */ | ||
351 | } | ||
352 | @@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, | ||
353 | vduse_dev_setup_queue(vblk_exp->dev, i, queue_size); | ||
354 | } | ||
355 | |||
356 | - aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false, | ||
357 | + aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
358 | on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev); | ||
359 | |||
360 | blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, | ||
361 | diff --git a/block/io.c b/block/io.c | 22 | diff --git a/block/io.c b/block/io.c |
362 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
363 | --- a/block/io.c | 24 | --- a/block/io.c |
364 | +++ b/block/io.c | 25 | +++ b/block/io.c |
365 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) |
366 | 27 | assert(old >= 1); | |
367 | /* Stop things in parent-to-child order */ | ||
368 | if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
369 | - aio_disable_external(bdrv_get_aio_context(bs)); | ||
370 | bdrv_parent_drained_begin(bs, parent); | ||
371 | if (bs->drv && bs->drv->bdrv_drain_begin) { | ||
372 | bs->drv->bdrv_drain_begin(bs); | ||
373 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
374 | bs->drv->bdrv_drain_end(bs); | ||
375 | } | ||
376 | bdrv_parent_drained_end(bs, parent); | ||
377 | - aio_enable_external(bdrv_get_aio_context(bs)); | ||
378 | } | ||
379 | } | 28 | } |
380 | 29 | ||
381 | diff --git a/block/io_uring.c b/block/io_uring.c | 30 | -/* Check if any requests are in-flight (including throttled requests) */ |
382 | index XXXXXXX..XXXXXXX 100644 | 31 | -bool bdrv_requests_pending(BlockDriverState *bs) |
383 | --- a/block/io_uring.c | ||
384 | +++ b/block/io_uring.c | ||
385 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, | ||
386 | |||
387 | void luring_detach_aio_context(LuringState *s, AioContext *old_context) | ||
388 | { | ||
389 | - aio_set_fd_handler(old_context, s->ring.ring_fd, false, | ||
390 | + aio_set_fd_handler(old_context, s->ring.ring_fd, | ||
391 | NULL, NULL, NULL, NULL, s); | ||
392 | qemu_bh_delete(s->completion_bh); | ||
393 | s->aio_context = NULL; | ||
394 | @@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context) | ||
395 | { | ||
396 | s->aio_context = new_context; | ||
397 | s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); | ||
398 | - aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, | ||
399 | + aio_set_fd_handler(s->aio_context, s->ring.ring_fd, | ||
400 | qemu_luring_completion_cb, NULL, | ||
401 | qemu_luring_poll_cb, qemu_luring_poll_ready, s); | ||
402 | } | ||
403 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
404 | index XXXXXXX..XXXXXXX 100644 | ||
405 | --- a/block/iscsi.c | ||
406 | +++ b/block/iscsi.c | ||
407 | @@ -XXX,XX +XXX,XX @@ iscsi_set_events(IscsiLun *iscsilun) | ||
408 | |||
409 | if (ev != iscsilun->events) { | ||
410 | aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi), | ||
411 | - false, | ||
412 | (ev & POLLIN) ? iscsi_process_read : NULL, | ||
413 | (ev & POLLOUT) ? iscsi_process_write : NULL, | ||
414 | NULL, NULL, | ||
415 | @@ -XXX,XX +XXX,XX @@ static void iscsi_detach_aio_context(BlockDriverState *bs) | ||
416 | IscsiLun *iscsilun = bs->opaque; | ||
417 | |||
418 | aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi), | ||
419 | - false, NULL, NULL, NULL, NULL, NULL); | ||
420 | + NULL, NULL, NULL, NULL, NULL); | ||
421 | iscsilun->events = 0; | ||
422 | |||
423 | if (iscsilun->nop_timer) { | ||
424 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
425 | index XXXXXXX..XXXXXXX 100644 | ||
426 | --- a/block/linux-aio.c | ||
427 | +++ b/block/linux-aio.c | ||
428 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, | ||
429 | |||
430 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) | ||
431 | { | ||
432 | - aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL); | ||
433 | + aio_set_event_notifier(old_context, &s->e, NULL, NULL, NULL); | ||
434 | qemu_bh_delete(s->completion_bh); | ||
435 | s->aio_context = NULL; | ||
436 | } | ||
437 | @@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) | ||
438 | { | ||
439 | s->aio_context = new_context; | ||
440 | s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); | ||
441 | - aio_set_event_notifier(new_context, &s->e, false, | ||
442 | + aio_set_event_notifier(new_context, &s->e, | ||
443 | qemu_laio_completion_cb, | ||
444 | qemu_laio_poll_cb, | ||
445 | qemu_laio_poll_ready); | ||
446 | diff --git a/block/nfs.c b/block/nfs.c | ||
447 | index XXXXXXX..XXXXXXX 100644 | ||
448 | --- a/block/nfs.c | ||
449 | +++ b/block/nfs.c | ||
450 | @@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client) | ||
451 | int ev = nfs_which_events(client->context); | ||
452 | if (ev != client->events) { | ||
453 | aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), | ||
454 | - false, | ||
455 | (ev & POLLIN) ? nfs_process_read : NULL, | ||
456 | (ev & POLLOUT) ? nfs_process_write : NULL, | ||
457 | NULL, NULL, client); | ||
458 | @@ -XXX,XX +XXX,XX @@ static void nfs_detach_aio_context(BlockDriverState *bs) | ||
459 | NFSClient *client = bs->opaque; | ||
460 | |||
461 | aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), | ||
462 | - false, NULL, NULL, NULL, NULL, NULL); | ||
463 | + NULL, NULL, NULL, NULL, NULL); | ||
464 | client->events = 0; | ||
465 | } | ||
466 | |||
467 | @@ -XXX,XX +XXX,XX @@ static void nfs_client_close(NFSClient *client) | ||
468 | if (client->context) { | ||
469 | qemu_mutex_lock(&client->mutex); | ||
470 | aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), | ||
471 | - false, NULL, NULL, NULL, NULL, NULL); | ||
472 | + NULL, NULL, NULL, NULL, NULL); | ||
473 | qemu_mutex_unlock(&client->mutex); | ||
474 | if (client->fh) { | ||
475 | nfs_close(client->context, client->fh); | ||
476 | diff --git a/block/nvme.c b/block/nvme.c | ||
477 | index XXXXXXX..XXXXXXX 100644 | ||
478 | --- a/block/nvme.c | ||
479 | +++ b/block/nvme.c | ||
480 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
481 | } | ||
482 | aio_set_event_notifier(bdrv_get_aio_context(bs), | ||
483 | &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
484 | - false, nvme_handle_event, nvme_poll_cb, | ||
485 | + nvme_handle_event, nvme_poll_cb, | ||
486 | nvme_poll_ready); | ||
487 | |||
488 | if (!nvme_identify(bs, namespace, errp)) { | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs) | ||
490 | g_free(s->queues); | ||
491 | aio_set_event_notifier(bdrv_get_aio_context(bs), | ||
492 | &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
493 | - false, NULL, NULL, NULL); | ||
494 | + NULL, NULL, NULL); | ||
495 | event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); | ||
496 | qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, | ||
497 | 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); | ||
498 | @@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs) | ||
499 | |||
500 | aio_set_event_notifier(bdrv_get_aio_context(bs), | ||
501 | &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
502 | - false, NULL, NULL, NULL); | ||
503 | + NULL, NULL, NULL); | ||
504 | } | ||
505 | |||
506 | static void nvme_attach_aio_context(BlockDriverState *bs, | ||
507 | @@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs, | ||
508 | |||
509 | s->aio_context = new_context; | ||
510 | aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
511 | - false, nvme_handle_event, nvme_poll_cb, | ||
512 | + nvme_handle_event, nvme_poll_cb, | ||
513 | nvme_poll_ready); | ||
514 | |||
515 | for (unsigned i = 0; i < s->queue_count; i++) { | ||
516 | diff --git a/block/ssh.c b/block/ssh.c | ||
517 | index XXXXXXX..XXXXXXX 100644 | ||
518 | --- a/block/ssh.c | ||
519 | +++ b/block/ssh.c | ||
520 | @@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque) | ||
521 | AioContext *ctx = bdrv_get_aio_context(bs); | ||
522 | |||
523 | trace_ssh_restart_coroutine(restart->co); | ||
524 | - aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL, NULL); | ||
525 | + aio_set_fd_handler(ctx, s->sock, NULL, NULL, NULL, NULL, NULL); | ||
526 | |||
527 | aio_co_wake(restart->co); | ||
528 | } | ||
529 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) | ||
530 | trace_ssh_co_yield(s->sock, rd_handler, wr_handler); | ||
531 | |||
532 | aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, | ||
533 | - false, rd_handler, wr_handler, NULL, NULL, &restart); | ||
534 | + rd_handler, wr_handler, NULL, NULL, &restart); | ||
535 | qemu_coroutine_yield(); | ||
536 | trace_ssh_co_yield_back(s->sock); | ||
537 | } | ||
538 | diff --git a/block/win32-aio.c b/block/win32-aio.c | ||
539 | index XXXXXXX..XXXXXXX 100644 | ||
540 | --- a/block/win32-aio.c | ||
541 | +++ b/block/win32-aio.c | ||
542 | @@ -XXX,XX +XXX,XX @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) | ||
543 | void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, | ||
544 | AioContext *old_context) | ||
545 | { | ||
546 | - aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL, NULL); | ||
547 | + aio_set_event_notifier(old_context, &aio->e, NULL, NULL, NULL); | ||
548 | aio->aio_ctx = NULL; | ||
549 | } | ||
550 | |||
551 | @@ -XXX,XX +XXX,XX @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, | ||
552 | AioContext *new_context) | ||
553 | { | ||
554 | aio->aio_ctx = new_context; | ||
555 | - aio_set_event_notifier(new_context, &aio->e, false, | ||
556 | - win32_aio_completion_cb, NULL, NULL); | ||
557 | + aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb, | ||
558 | + NULL, NULL); | ||
559 | } | ||
560 | |||
561 | QEMUWin32AIOState *win32_aio_init(void) | ||
562 | diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c | ||
563 | index XXXXXXX..XXXXXXX 100644 | ||
564 | --- a/hw/i386/kvm/xen_xenstore.c | ||
565 | +++ b/hw/i386/kvm/xen_xenstore.c | ||
566 | @@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) | ||
567 | error_setg(errp, "Xenstore evtchn port init failed"); | ||
568 | return; | ||
569 | } | ||
570 | - aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false, | ||
571 | + aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), | ||
572 | xen_xenstore_event, NULL, NULL, NULL, s); | ||
573 | |||
574 | s->impl = xs_impl_create(xen_domid); | ||
575 | diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c | ||
576 | index XXXXXXX..XXXXXXX 100644 | ||
577 | --- a/hw/virtio/virtio.c | ||
578 | +++ b/hw/virtio/virtio.c | ||
579 | @@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) | ||
580 | |||
581 | void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) | ||
582 | { | ||
583 | - aio_set_event_notifier(ctx, &vq->host_notifier, false, | ||
584 | + aio_set_event_notifier(ctx, &vq->host_notifier, | ||
585 | virtio_queue_host_notifier_read, | ||
586 | virtio_queue_host_notifier_aio_poll, | ||
587 | virtio_queue_host_notifier_aio_poll_ready); | ||
588 | @@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) | ||
589 | */ | ||
590 | void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) | ||
591 | { | ||
592 | - aio_set_event_notifier(ctx, &vq->host_notifier, false, | ||
593 | + aio_set_event_notifier(ctx, &vq->host_notifier, | ||
594 | virtio_queue_host_notifier_read, | ||
595 | NULL, NULL); | ||
596 | } | ||
597 | |||
598 | void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) | ||
599 | { | ||
600 | - aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL); | ||
601 | + aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); | ||
602 | } | ||
603 | |||
604 | void virtio_queue_host_notifier_read(EventNotifier *n) | ||
605 | diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c | ||
606 | index XXXXXXX..XXXXXXX 100644 | ||
607 | --- a/hw/xen/xen-bus.c | ||
608 | +++ b/hw/xen/xen-bus.c | ||
609 | @@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev, | ||
610 | } | ||
611 | |||
612 | if (channel->ctx) | ||
613 | - aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false, | ||
614 | + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), | ||
615 | NULL, NULL, NULL, NULL, NULL); | ||
616 | |||
617 | channel->ctx = ctx; | ||
618 | if (ctx) { | ||
619 | aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), | ||
620 | - false, xen_device_event, NULL, xen_device_poll, | ||
621 | - NULL, channel); | ||
622 | + xen_device_event, NULL, xen_device_poll, NULL, | ||
623 | + channel); | ||
624 | } | ||
625 | } | ||
626 | |||
627 | @@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev, | ||
628 | |||
629 | QLIST_REMOVE(channel, list); | ||
630 | |||
631 | - aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false, | ||
632 | + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), | ||
633 | NULL, NULL, NULL, NULL, NULL); | ||
634 | |||
635 | if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) { | ||
636 | diff --git a/io/channel-command.c b/io/channel-command.c | ||
637 | index XXXXXXX..XXXXXXX 100644 | ||
638 | --- a/io/channel-command.c | ||
639 | +++ b/io/channel-command.c | ||
640 | @@ -XXX,XX +XXX,XX @@ static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc, | ||
641 | void *opaque) | ||
642 | { | ||
643 | QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); | ||
644 | - aio_set_fd_handler(ctx, cioc->readfd, false, | ||
645 | - io_read, NULL, NULL, NULL, opaque); | ||
646 | - aio_set_fd_handler(ctx, cioc->writefd, false, | ||
647 | - NULL, io_write, NULL, NULL, opaque); | ||
648 | + aio_set_fd_handler(ctx, cioc->readfd, io_read, NULL, NULL, NULL, opaque); | ||
649 | + aio_set_fd_handler(ctx, cioc->writefd, NULL, io_write, NULL, NULL, opaque); | ||
650 | } | ||
651 | |||
652 | |||
653 | diff --git a/io/channel-file.c b/io/channel-file.c | ||
654 | index XXXXXXX..XXXXXXX 100644 | ||
655 | --- a/io/channel-file.c | ||
656 | +++ b/io/channel-file.c | ||
657 | @@ -XXX,XX +XXX,XX @@ static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc, | ||
658 | void *opaque) | ||
659 | { | ||
660 | QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); | ||
661 | - aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, | ||
662 | - NULL, NULL, opaque); | ||
663 | + aio_set_fd_handler(ctx, fioc->fd, io_read, io_write, NULL, NULL, opaque); | ||
664 | } | ||
665 | |||
666 | static GSource *qio_channel_file_create_watch(QIOChannel *ioc, | ||
667 | diff --git a/io/channel-socket.c b/io/channel-socket.c | ||
668 | index XXXXXXX..XXXXXXX 100644 | ||
669 | --- a/io/channel-socket.c | ||
670 | +++ b/io/channel-socket.c | ||
671 | @@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc, | ||
672 | void *opaque) | ||
673 | { | ||
674 | QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); | ||
675 | - aio_set_fd_handler(ctx, sioc->fd, false, | ||
676 | - io_read, io_write, NULL, NULL, opaque); | ||
677 | + aio_set_fd_handler(ctx, sioc->fd, io_read, io_write, NULL, NULL, opaque); | ||
678 | } | ||
679 | |||
680 | static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, | ||
681 | diff --git a/migration/rdma.c b/migration/rdma.c | ||
682 | index XXXXXXX..XXXXXXX 100644 | ||
683 | --- a/migration/rdma.c | ||
684 | +++ b/migration/rdma.c | ||
685 | @@ -XXX,XX +XXX,XX @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, | ||
686 | { | ||
687 | QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); | ||
688 | if (io_read) { | ||
689 | - aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, | ||
690 | - false, io_read, io_write, NULL, NULL, opaque); | ||
691 | - aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, | ||
692 | - false, io_read, io_write, NULL, NULL, opaque); | ||
693 | + aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, io_read, | ||
694 | + io_write, NULL, NULL, opaque); | ||
695 | + aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, io_read, | ||
696 | + io_write, NULL, NULL, opaque); | ||
697 | } else { | ||
698 | - aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, | ||
699 | - false, io_read, io_write, NULL, NULL, opaque); | ||
700 | - aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, | ||
701 | - false, io_read, io_write, NULL, NULL, opaque); | ||
702 | + aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, io_read, | ||
703 | + io_write, NULL, NULL, opaque); | ||
704 | + aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, io_read, | ||
705 | + io_write, NULL, NULL, opaque); | ||
706 | } | ||
707 | } | ||
708 | |||
709 | diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c | ||
710 | index XXXXXXX..XXXXXXX 100644 | ||
711 | --- a/tests/unit/test-aio.c | ||
712 | +++ b/tests/unit/test-aio.c | ||
713 | @@ -XXX,XX +XXX,XX @@ static void *test_acquire_thread(void *opaque) | ||
714 | static void set_event_notifier(AioContext *ctx, EventNotifier *notifier, | ||
715 | EventNotifierHandler *handler) | ||
716 | { | ||
717 | - aio_set_event_notifier(ctx, notifier, false, handler, NULL, NULL); | ||
718 | + aio_set_event_notifier(ctx, notifier, handler, NULL, NULL); | ||
719 | } | ||
720 | |||
721 | static void dummy_notifier_read(EventNotifier *n) | ||
722 | @@ -XXX,XX +XXX,XX @@ static void test_flush_event_notifier(void) | ||
723 | event_notifier_cleanup(&data.e); | ||
724 | } | ||
725 | |||
726 | -static void test_aio_external_client(void) | ||
727 | -{ | 32 | -{ |
728 | - int i, j; | 33 | - BdrvChild *child; |
729 | - | 34 | - |
730 | - for (i = 1; i < 3; i++) { | 35 | - if (atomic_read(&bs->in_flight)) { |
731 | - EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true }; | 36 | - return true; |
732 | - event_notifier_init(&data.e, false); | 37 | - } |
733 | - aio_set_event_notifier(ctx, &data.e, true, event_ready_cb, NULL, NULL); | 38 | - |
734 | - event_notifier_set(&data.e); | 39 | - QLIST_FOREACH(child, &bs->children, next) { |
735 | - for (j = 0; j < i; j++) { | 40 | - if (bdrv_requests_pending(child->bs)) { |
736 | - aio_disable_external(ctx); | 41 | - return true; |
737 | - } | 42 | - } |
738 | - for (j = 0; j < i; j++) { | ||
739 | - assert(!aio_poll(ctx, false)); | ||
740 | - assert(event_notifier_test_and_clear(&data.e)); | ||
741 | - event_notifier_set(&data.e); | ||
742 | - aio_enable_external(ctx); | ||
743 | - } | ||
744 | - assert(aio_poll(ctx, false)); | ||
745 | - set_event_notifier(ctx, &data.e, NULL); | ||
746 | - event_notifier_cleanup(&data.e); | ||
747 | - } | 43 | - } |
44 | - | ||
45 | - return false; | ||
748 | -} | 46 | -} |
749 | - | 47 | - |
750 | static void test_wait_event_notifier_noflush(void) | 48 | typedef struct { |
751 | { | 49 | Coroutine *co; |
752 | EventNotifierTestData data = { .n = 0 }; | 50 | BlockDriverState *bs; |
753 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
754 | g_test_add_func("/aio/event/wait", test_wait_event_notifier); | ||
755 | g_test_add_func("/aio/event/wait/no-flush-cb", test_wait_event_notifier_noflush); | ||
756 | g_test_add_func("/aio/event/flush", test_flush_event_notifier); | ||
757 | - g_test_add_func("/aio/external-client", test_aio_external_client); | ||
758 | g_test_add_func("/aio/timer/schedule", test_timer_schedule); | ||
759 | |||
760 | g_test_add_func("/aio/coroutine/queue-chaining", test_queue_chaining); | ||
761 | diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c | ||
762 | index XXXXXXX..XXXXXXX 100644 | ||
763 | --- a/tests/unit/test-bdrv-drain.c | ||
764 | +++ b/tests/unit/test-bdrv-drain.c | ||
765 | @@ -XXX,XX +XXX,XX @@ static void test_graph_change_drain_all(void) | ||
766 | |||
767 | g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
768 | g_assert_cmpint(b_s->drain_count, ==, 0); | ||
769 | - g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0); | ||
770 | |||
771 | bdrv_unref(bs_b); | ||
772 | blk_unref(blk_b); | ||
773 | diff --git a/tests/unit/test-fdmon-epoll.c b/tests/unit/test-fdmon-epoll.c | ||
774 | deleted file mode 100644 | ||
775 | index XXXXXXX..XXXXXXX | ||
776 | --- a/tests/unit/test-fdmon-epoll.c | ||
777 | +++ /dev/null | ||
778 | @@ -XXX,XX +XXX,XX @@ | ||
779 | -/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
780 | -/* | ||
781 | - * fdmon-epoll tests | ||
782 | - * | ||
783 | - * Copyright (c) 2020 Red Hat, Inc. | ||
784 | - */ | ||
785 | - | ||
786 | -#include "qemu/osdep.h" | ||
787 | -#include "block/aio.h" | ||
788 | -#include "qapi/error.h" | ||
789 | -#include "qemu/main-loop.h" | ||
790 | - | ||
791 | -static AioContext *ctx; | ||
792 | - | ||
793 | -static void dummy_fd_handler(EventNotifier *notifier) | ||
794 | -{ | ||
795 | - event_notifier_test_and_clear(notifier); | ||
796 | -} | ||
797 | - | ||
798 | -static void add_event_notifiers(EventNotifier *notifiers, size_t n) | ||
799 | -{ | ||
800 | - for (size_t i = 0; i < n; i++) { | ||
801 | - event_notifier_init(¬ifiers[i], false); | ||
802 | - aio_set_event_notifier(ctx, ¬ifiers[i], false, | ||
803 | - dummy_fd_handler, NULL, NULL); | ||
804 | - } | ||
805 | -} | ||
806 | - | ||
807 | -static void remove_event_notifiers(EventNotifier *notifiers, size_t n) | ||
808 | -{ | ||
809 | - for (size_t i = 0; i < n; i++) { | ||
810 | - aio_set_event_notifier(ctx, ¬ifiers[i], false, NULL, NULL, NULL); | ||
811 | - event_notifier_cleanup(¬ifiers[i]); | ||
812 | - } | ||
813 | -} | ||
814 | - | ||
815 | -/* Check that fd handlers work when external clients are disabled */ | ||
816 | -static void test_external_disabled(void) | ||
817 | -{ | ||
818 | - EventNotifier notifiers[100]; | ||
819 | - | ||
820 | - /* fdmon-epoll is only enabled when many fd handlers are registered */ | ||
821 | - add_event_notifiers(notifiers, G_N_ELEMENTS(notifiers)); | ||
822 | - | ||
823 | - event_notifier_set(¬ifiers[0]); | ||
824 | - assert(aio_poll(ctx, true)); | ||
825 | - | ||
826 | - aio_disable_external(ctx); | ||
827 | - event_notifier_set(¬ifiers[0]); | ||
828 | - assert(aio_poll(ctx, true)); | ||
829 | - aio_enable_external(ctx); | ||
830 | - | ||
831 | - remove_event_notifiers(notifiers, G_N_ELEMENTS(notifiers)); | ||
832 | -} | ||
833 | - | ||
834 | -int main(int argc, char **argv) | ||
835 | -{ | ||
836 | - /* | ||
837 | - * This code relies on the fact that fdmon-io_uring disables itself when | ||
838 | - * the glib main loop is in use. The main loop uses fdmon-poll and upgrades | ||
839 | - * to fdmon-epoll when the number of fds exceeds a threshold. | ||
840 | - */ | ||
841 | - qemu_init_main_loop(&error_fatal); | ||
842 | - ctx = qemu_get_aio_context(); | ||
843 | - | ||
844 | - while (g_main_context_iteration(NULL, false)) { | ||
845 | - /* Do nothing */ | ||
846 | - } | ||
847 | - | ||
848 | - g_test_init(&argc, &argv, NULL); | ||
849 | - g_test_add_func("/fdmon-epoll/external-disabled", test_external_disabled); | ||
850 | - return g_test_run(); | ||
851 | -} | ||
852 | diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c | ||
853 | index XXXXXXX..XXXXXXX 100644 | ||
854 | --- a/tests/unit/test-nested-aio-poll.c | ||
855 | +++ b/tests/unit/test-nested-aio-poll.c | ||
856 | @@ -XXX,XX +XXX,XX @@ static void test(void) | ||
857 | |||
858 | /* Make the event notifier active (set) right away */ | ||
859 | event_notifier_init(&td.poll_notifier, 1); | ||
860 | - aio_set_event_notifier(td.ctx, &td.poll_notifier, false, | ||
861 | + aio_set_event_notifier(td.ctx, &td.poll_notifier, | ||
862 | io_read, io_poll_true, io_poll_ready); | ||
863 | |||
864 | /* This event notifier will be used later */ | ||
865 | event_notifier_init(&td.dummy_notifier, 0); | ||
866 | - aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, | ||
867 | + aio_set_event_notifier(td.ctx, &td.dummy_notifier, | ||
868 | io_read, io_poll_false, io_poll_never_ready); | ||
869 | |||
870 | /* Consume aio_notify() */ | ||
871 | @@ -XXX,XX +XXX,XX @@ static void test(void) | ||
872 | /* Run io_poll()/io_poll_ready() one more time to show it keeps working */ | ||
873 | g_assert(aio_poll(td.ctx, true)); | ||
874 | |||
875 | - aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, | ||
876 | - NULL, NULL, NULL); | ||
877 | - aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL); | ||
878 | + aio_set_event_notifier(td.ctx, &td.dummy_notifier, NULL, NULL, NULL); | ||
879 | + aio_set_event_notifier(td.ctx, &td.poll_notifier, NULL, NULL, NULL); | ||
880 | event_notifier_cleanup(&td.dummy_notifier); | ||
881 | event_notifier_cleanup(&td.poll_notifier); | ||
882 | aio_context_unref(td.ctx); | ||
883 | diff --git a/util/aio-posix.c b/util/aio-posix.c | ||
884 | index XXXXXXX..XXXXXXX 100644 | ||
885 | --- a/util/aio-posix.c | ||
886 | +++ b/util/aio-posix.c | ||
887 | @@ -XXX,XX +XXX,XX @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node) | ||
888 | |||
889 | void aio_set_fd_handler(AioContext *ctx, | ||
890 | int fd, | ||
891 | - bool is_external, | ||
892 | IOHandler *io_read, | ||
893 | IOHandler *io_write, | ||
894 | AioPollFn *io_poll, | ||
895 | @@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx, | ||
896 | new_node->io_poll = io_poll; | ||
897 | new_node->io_poll_ready = io_poll_ready; | ||
898 | new_node->opaque = opaque; | ||
899 | - new_node->is_external = is_external; | ||
900 | |||
901 | if (is_new) { | ||
902 | new_node->pfd.fd = fd; | ||
903 | @@ -XXX,XX +XXX,XX @@ static void aio_set_fd_poll(AioContext *ctx, int fd, | ||
904 | |||
905 | void aio_set_event_notifier(AioContext *ctx, | ||
906 | EventNotifier *notifier, | ||
907 | - bool is_external, | ||
908 | EventNotifierHandler *io_read, | ||
909 | AioPollFn *io_poll, | ||
910 | EventNotifierHandler *io_poll_ready) | ||
911 | { | ||
912 | - aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external, | ||
913 | + aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), | ||
914 | (IOHandler *)io_read, NULL, io_poll, | ||
915 | (IOHandler *)io_poll_ready, notifier); | ||
916 | } | ||
917 | @@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx) | ||
918 | |||
919 | /* TODO should this check poll ready? */ | ||
920 | revents = node->pfd.revents & node->pfd.events; | ||
921 | - if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read && | ||
922 | - aio_node_check(ctx, node->is_external)) { | ||
923 | + if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) { | ||
924 | result = true; | ||
925 | break; | ||
926 | } | ||
927 | - if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write && | ||
928 | - aio_node_check(ctx, node->is_external)) { | ||
929 | + if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) { | ||
930 | result = true; | ||
931 | break; | ||
932 | } | ||
933 | @@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) | ||
934 | QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); | ||
935 | } | ||
936 | if (!QLIST_IS_INSERTED(node, node_deleted) && | ||
937 | - poll_ready && revents == 0 && | ||
938 | - aio_node_check(ctx, node->is_external) && | ||
939 | - node->io_poll_ready) { | ||
940 | + poll_ready && revents == 0 && node->io_poll_ready) { | ||
941 | /* | ||
942 | * Remove temporarily to avoid infinite loops when ->io_poll_ready() | ||
943 | * calls aio_poll() before clearing the condition that made the poll | ||
944 | @@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) | ||
945 | |||
946 | if (!QLIST_IS_INSERTED(node, node_deleted) && | ||
947 | (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && | ||
948 | - aio_node_check(ctx, node->is_external) && | ||
949 | node->io_read) { | ||
950 | node->io_read(node->opaque); | ||
951 | |||
952 | @@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) | ||
953 | } | ||
954 | if (!QLIST_IS_INSERTED(node, node_deleted) && | ||
955 | (revents & (G_IO_OUT | G_IO_ERR)) && | ||
956 | - aio_node_check(ctx, node->is_external) && | ||
957 | node->io_write) { | ||
958 | node->io_write(node->opaque); | ||
959 | progress = true; | ||
960 | @@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx, | ||
961 | AioHandler *tmp; | ||
962 | |||
963 | QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) { | ||
964 | - if (aio_node_check(ctx, node->is_external) && | ||
965 | - node->io_poll(node->opaque)) { | ||
966 | + if (node->io_poll(node->opaque)) { | ||
967 | aio_add_poll_ready_handler(ready_list, node); | ||
968 | |||
969 | node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS; | ||
970 | diff --git a/util/aio-win32.c b/util/aio-win32.c | ||
971 | index XXXXXXX..XXXXXXX 100644 | ||
972 | --- a/util/aio-win32.c | ||
973 | +++ b/util/aio-win32.c | ||
974 | @@ -XXX,XX +XXX,XX @@ struct AioHandler { | ||
975 | GPollFD pfd; | ||
976 | int deleted; | ||
977 | void *opaque; | ||
978 | - bool is_external; | ||
979 | QLIST_ENTRY(AioHandler) node; | ||
980 | }; | ||
981 | |||
982 | @@ -XXX,XX +XXX,XX @@ static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node) | ||
983 | |||
984 | void aio_set_fd_handler(AioContext *ctx, | ||
985 | int fd, | ||
986 | - bool is_external, | ||
987 | IOHandler *io_read, | ||
988 | IOHandler *io_write, | ||
989 | AioPollFn *io_poll, | ||
990 | @@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx, | ||
991 | node->opaque = opaque; | ||
992 | node->io_read = io_read; | ||
993 | node->io_write = io_write; | ||
994 | - node->is_external = is_external; | ||
995 | |||
996 | if (io_read) { | ||
997 | bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE; | ||
998 | @@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx, | ||
999 | |||
1000 | void aio_set_event_notifier(AioContext *ctx, | ||
1001 | EventNotifier *e, | ||
1002 | - bool is_external, | ||
1003 | EventNotifierHandler *io_notify, | ||
1004 | AioPollFn *io_poll, | ||
1005 | EventNotifierHandler *io_poll_ready) | ||
1006 | @@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier(AioContext *ctx, | ||
1007 | node->e = e; | ||
1008 | node->pfd.fd = (uintptr_t)event_notifier_get_handle(e); | ||
1009 | node->pfd.events = G_IO_IN; | ||
1010 | - node->is_external = is_external; | ||
1011 | QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node); | ||
1012 | |||
1013 | g_source_add_poll(&ctx->source, &node->pfd); | ||
1014 | @@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking) | ||
1015 | /* fill fd sets */ | ||
1016 | count = 0; | ||
1017 | QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { | ||
1018 | - if (!node->deleted && node->io_notify | ||
1019 | - && aio_node_check(ctx, node->is_external)) { | ||
1020 | + if (!node->deleted && node->io_notify) { | ||
1021 | assert(count < MAXIMUM_WAIT_OBJECTS); | ||
1022 | events[count++] = event_notifier_get_handle(node->e); | ||
1023 | } | ||
1024 | diff --git a/util/async.c b/util/async.c | ||
1025 | index XXXXXXX..XXXXXXX 100644 | ||
1026 | --- a/util/async.c | ||
1027 | +++ b/util/async.c | ||
1028 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) | ||
1029 | g_free(bh); | ||
1030 | } | ||
1031 | |||
1032 | - aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL, NULL); | ||
1033 | + aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL); | ||
1034 | event_notifier_cleanup(&ctx->notifier); | ||
1035 | qemu_rec_mutex_destroy(&ctx->lock); | ||
1036 | qemu_lockcnt_destroy(&ctx->list_lock); | ||
1037 | @@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp) | ||
1038 | QSLIST_INIT(&ctx->scheduled_coroutines); | ||
1039 | |||
1040 | aio_set_event_notifier(ctx, &ctx->notifier, | ||
1041 | - false, | ||
1042 | aio_context_notifier_cb, | ||
1043 | aio_context_notifier_poll, | ||
1044 | aio_context_notifier_poll_ready); | ||
1045 | diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c | ||
1046 | index XXXXXXX..XXXXXXX 100644 | ||
1047 | --- a/util/fdmon-epoll.c | ||
1048 | +++ b/util/fdmon-epoll.c | ||
1049 | @@ -XXX,XX +XXX,XX @@ static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list, | ||
1050 | int i, ret = 0; | ||
1051 | struct epoll_event events[128]; | ||
1052 | |||
1053 | - /* Fall back while external clients are disabled */ | ||
1054 | - if (qatomic_read(&ctx->external_disable_cnt)) { | ||
1055 | - return fdmon_poll_ops.wait(ctx, ready_list, timeout); | ||
1056 | - } | ||
1057 | - | ||
1058 | if (timeout > 0) { | ||
1059 | ret = qemu_poll_ns(&pfd, 1, timeout); | ||
1060 | if (ret > 0) { | ||
1061 | @@ -XXX,XX +XXX,XX @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) | ||
1062 | return false; | ||
1063 | } | ||
1064 | |||
1065 | - /* Do not upgrade while external clients are disabled */ | ||
1066 | - if (qatomic_read(&ctx->external_disable_cnt)) { | ||
1067 | - return false; | ||
1068 | - } | ||
1069 | - | ||
1070 | if (npfd < EPOLL_ENABLE_THRESHOLD) { | ||
1071 | return false; | ||
1072 | } | ||
1073 | diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c | ||
1074 | index XXXXXXX..XXXXXXX 100644 | ||
1075 | --- a/util/fdmon-io_uring.c | ||
1076 | +++ b/util/fdmon-io_uring.c | ||
1077 | @@ -XXX,XX +XXX,XX @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list, | ||
1078 | unsigned wait_nr = 1; /* block until at least one cqe is ready */ | ||
1079 | int ret; | ||
1080 | |||
1081 | - /* Fall back while external clients are disabled */ | ||
1082 | - if (qatomic_read(&ctx->external_disable_cnt)) { | ||
1083 | - return fdmon_poll_ops.wait(ctx, ready_list, timeout); | ||
1084 | - } | ||
1085 | - | ||
1086 | if (timeout == 0) { | ||
1087 | wait_nr = 0; /* non-blocking */ | ||
1088 | } else if (timeout > 0) { | ||
1089 | @@ -XXX,XX +XXX,XX @@ static bool fdmon_io_uring_need_wait(AioContext *ctx) | ||
1090 | return true; | ||
1091 | } | ||
1092 | |||
1093 | - /* Are we falling back to fdmon-poll? */ | ||
1094 | - return qatomic_read(&ctx->external_disable_cnt); | ||
1095 | + return false; | ||
1096 | } | ||
1097 | |||
1098 | static const FDMonOps fdmon_io_uring_ops = { | ||
1099 | diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c | ||
1100 | index XXXXXXX..XXXXXXX 100644 | ||
1101 | --- a/util/fdmon-poll.c | ||
1102 | +++ b/util/fdmon-poll.c | ||
1103 | @@ -XXX,XX +XXX,XX @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list, | ||
1104 | assert(npfd == 0); | ||
1105 | |||
1106 | QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { | ||
1107 | - if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events | ||
1108 | - && aio_node_check(ctx, node->is_external)) { | ||
1109 | + if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) { | ||
1110 | add_pollfd(node); | ||
1111 | } | ||
1112 | } | ||
1113 | diff --git a/util/main-loop.c b/util/main-loop.c | ||
1114 | index XXXXXXX..XXXXXXX 100644 | ||
1115 | --- a/util/main-loop.c | ||
1116 | +++ b/util/main-loop.c | ||
1117 | @@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd, | ||
1118 | void *opaque) | ||
1119 | { | ||
1120 | iohandler_init(); | ||
1121 | - aio_set_fd_handler(iohandler_ctx, fd, false, | ||
1122 | - fd_read, fd_write, NULL, NULL, opaque); | ||
1123 | + aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, NULL, NULL, | ||
1124 | + opaque); | ||
1125 | } | ||
1126 | |||
1127 | void event_notifier_set_handler(EventNotifier *e, | ||
1128 | EventNotifierHandler *handler) | ||
1129 | { | ||
1130 | iohandler_init(); | ||
1131 | - aio_set_event_notifier(iohandler_ctx, e, false, | ||
1132 | - handler, NULL, NULL); | ||
1133 | + aio_set_event_notifier(iohandler_ctx, e, handler, NULL, NULL); | ||
1134 | } | ||
1135 | diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c | ||
1136 | index XXXXXXX..XXXXXXX 100644 | ||
1137 | --- a/util/qemu-coroutine-io.c | ||
1138 | +++ b/util/qemu-coroutine-io.c | ||
1139 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
1140 | static void fd_coroutine_enter(void *opaque) | ||
1141 | { | ||
1142 | FDYieldUntilData *data = opaque; | ||
1143 | - aio_set_fd_handler(data->ctx, data->fd, false, | ||
1144 | - NULL, NULL, NULL, NULL, NULL); | ||
1145 | + aio_set_fd_handler(data->ctx, data->fd, NULL, NULL, NULL, NULL, NULL); | ||
1146 | qemu_coroutine_enter(data->co); | ||
1147 | } | ||
1148 | |||
1149 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn yield_until_fd_readable(int fd) | ||
1150 | data.ctx = qemu_get_current_aio_context(); | ||
1151 | data.co = qemu_coroutine_self(); | ||
1152 | data.fd = fd; | ||
1153 | - aio_set_fd_handler( | ||
1154 | - data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, NULL, &data); | ||
1155 | + aio_set_fd_handler(data.ctx, fd, fd_coroutine_enter, NULL, NULL, NULL, | ||
1156 | + &data); | ||
1157 | qemu_coroutine_yield(); | ||
1158 | } | ||
1159 | diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c | ||
1160 | index XXXXXXX..XXXXXXX 100644 | ||
1161 | --- a/util/vhost-user-server.c | ||
1162 | +++ b/util/vhost-user-server.c | ||
1163 | @@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt, | ||
1164 | vu_fd_watch->fd = fd; | ||
1165 | vu_fd_watch->cb = cb; | ||
1166 | qemu_socket_set_nonblock(fd); | ||
1167 | - aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler, | ||
1168 | + aio_set_fd_handler(server->ioc->ctx, fd, kick_handler, | ||
1169 | NULL, NULL, NULL, vu_fd_watch); | ||
1170 | vu_fd_watch->vu_dev = vu_dev; | ||
1171 | vu_fd_watch->pvt = pvt; | ||
1172 | @@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd) | ||
1173 | if (!vu_fd_watch) { | ||
1174 | return; | ||
1175 | } | ||
1176 | - aio_set_fd_handler(server->ioc->ctx, fd, false, | ||
1177 | - NULL, NULL, NULL, NULL, NULL); | ||
1178 | + aio_set_fd_handler(server->ioc->ctx, fd, NULL, NULL, NULL, NULL, NULL); | ||
1179 | |||
1180 | QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next); | ||
1181 | g_free(vu_fd_watch); | ||
1182 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server) | ||
1183 | VuFdWatch *vu_fd_watch; | ||
1184 | |||
1185 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
1186 | - aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false, | ||
1187 | + aio_set_fd_handler(server->ctx, vu_fd_watch->fd, | ||
1188 | NULL, NULL, NULL, NULL, vu_fd_watch); | ||
1189 | } | ||
1190 | |||
1191 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx) | ||
1192 | qio_channel_attach_aio_context(server->ioc, ctx); | ||
1193 | |||
1194 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
1195 | - aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL, | ||
1196 | + aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL, | ||
1197 | NULL, NULL, vu_fd_watch); | ||
1198 | } | ||
1199 | |||
1200 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server) | ||
1201 | VuFdWatch *vu_fd_watch; | ||
1202 | |||
1203 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
1204 | - aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false, | ||
1205 | + aio_set_fd_handler(server->ctx, vu_fd_watch->fd, | ||
1206 | NULL, NULL, NULL, NULL, vu_fd_watch); | ||
1207 | } | ||
1208 | |||
1209 | diff --git a/tests/unit/meson.build b/tests/unit/meson.build | ||
1210 | index XXXXXXX..XXXXXXX 100644 | ||
1211 | --- a/tests/unit/meson.build | ||
1212 | +++ b/tests/unit/meson.build | ||
1213 | @@ -XXX,XX +XXX,XX @@ if have_block | ||
1214 | if nettle.found() or gcrypt.found() | ||
1215 | tests += {'test-crypto-pbkdf': [io]} | ||
1216 | endif | ||
1217 | - if config_host_data.get('CONFIG_EPOLL_CREATE1') | ||
1218 | - tests += {'test-fdmon-epoll': [testblock]} | ||
1219 | - endif | ||
1220 | endif | ||
1221 | |||
1222 | if have_system | ||
1223 | -- | 51 | -- |
1224 | 2.40.1 | 52 | 2.13.6 |
1225 | 53 | ||
1226 | 54 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
2 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
3 | --- | ||
4 | block/io.c | 6 ++++++ | ||
5 | 1 file changed, 6 insertions(+) | ||
1 | 6 | ||
7 | diff --git a/block/io.c b/block/io.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/block/io.c | ||
10 | +++ b/block/io.c | ||
11 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
12 | BdrvNextIterator it; | ||
13 | GSList *aio_ctxs = NULL, *ctx; | ||
14 | |||
15 | + /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread | ||
16 | + * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on | ||
17 | + * nodes in several different AioContexts, so make sure we're in the main | ||
18 | + * context. */ | ||
19 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
20 | + | ||
21 | block_job_pause_all(); | ||
22 | |||
23 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
24 | -- | ||
25 | 2.13.6 | ||
26 | |||
27 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively |
---|---|---|---|
2 | and also doesn't notify other parent nodes of children, which both means | ||
3 | that the child nodes are not actually drained, and bdrv_drained_begin() | ||
4 | is providing useful functionality only on a single node. | ||
2 | 5 | ||
3 | Each vhost-user-blk request runs in a coroutine. When the BlockBackend | 6 | To keep things consistent, we also shouldn't call the block driver |
4 | enters a drained section we need to enter a quiescent state. Currently | 7 | callbacks recursively. |
5 | any in-flight requests race with bdrv_drained_begin() because it is | ||
6 | unaware of vhost-user-blk requests. | ||
7 | 8 | ||
8 | When blk_co_preadv/pwritev()/etc returns it wakes the | 9 | A proper recursive drain version that provides an actually working |
9 | bdrv_drained_begin() thread but vhost-user-blk request processing has | 10 | drained section for child nodes will be introduced later. |
10 | not yet finished. The request coroutine continues executing while the | ||
11 | main loop thread thinks it is in a drained section. | ||
12 | 11 | ||
13 | One example where this is unsafe is for blk_set_aio_context() where | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | bdrv_drained_begin() is called before .aio_context_detached() and | 13 | Reviewed-by: Fam Zheng <famz@redhat.com> |
15 | .aio_context_attach(). If request coroutines are still running after | 14 | --- |
16 | bdrv_drained_begin(), then the AioContext could change underneath them | 15 | block/io.c | 16 +++++++++------- |
17 | and they race with new requests processed in the new AioContext. This | 16 | 1 file changed, 9 insertions(+), 7 deletions(-) |
18 | could lead to virtqueue corruption, for example. | ||
19 | 17 | ||
20 | (This example is theoretical, I came across this while reading the | 18 | diff --git a/block/io.c b/block/io.c |
21 | code and have not tried to reproduce it.) | ||
22 | |||
23 | It's easy to make bdrv_drained_begin() wait for in-flight requests: add | ||
24 | a .drained_poll() callback that checks the VuServer's in-flight counter. | ||
25 | VuServer just needs an API that returns true when there are requests in | ||
26 | flight. The in-flight counter needs to be atomic. | ||
27 | |||
28 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
29 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
30 | Message-Id: <20230516190238.8401-7-stefanha@redhat.com> | ||
31 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
32 | --- | ||
33 | include/qemu/vhost-user-server.h | 4 +++- | ||
34 | block/export/vhost-user-blk-server.c | 13 +++++++++++++ | ||
35 | util/vhost-user-server.c | 18 ++++++++++++------ | ||
36 | 3 files changed, 28 insertions(+), 7 deletions(-) | ||
37 | |||
38 | diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h | ||
39 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
40 | --- a/include/qemu/vhost-user-server.h | 20 | --- a/block/io.c |
41 | +++ b/include/qemu/vhost-user-server.h | 21 | +++ b/block/io.c |
42 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 22 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
43 | int max_queues; | ||
44 | const VuDevIface *vu_iface; | ||
45 | |||
46 | + unsigned int in_flight; /* atomic */ | ||
47 | + | ||
48 | /* Protected by ctx lock */ | ||
49 | - unsigned int in_flight; | ||
50 | bool wait_idle; | ||
51 | VuDev vu_dev; | ||
52 | QIOChannel *ioc; /* The I/O channel with the client */ | ||
53 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server); | ||
54 | |||
55 | void vhost_user_server_inc_in_flight(VuServer *server); | ||
56 | void vhost_user_server_dec_in_flight(VuServer *server); | ||
57 | +bool vhost_user_server_has_in_flight(VuServer *server); | ||
58 | |||
59 | void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); | ||
60 | void vhost_user_server_detach_aio_context(VuServer *server); | ||
61 | diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/block/export/vhost-user-blk-server.c | ||
64 | +++ b/block/export/vhost-user-blk-server.c | ||
65 | @@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque) | ||
66 | vu_config_change_msg(&vexp->vu_server.vu_dev); | ||
67 | } | 23 | } |
68 | 24 | ||
69 | +/* | 25 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
70 | + * Ensures that bdrv_drained_begin() waits until in-flight requests complete. | 26 | -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
71 | + * | 27 | +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) |
72 | + * Called with vexp->export.ctx acquired. | ||
73 | + */ | ||
74 | +static bool vu_blk_drained_poll(void *opaque) | ||
75 | +{ | ||
76 | + VuBlkExport *vexp = opaque; | ||
77 | + | ||
78 | + return vhost_user_server_has_in_flight(&vexp->vu_server); | ||
79 | +} | ||
80 | + | ||
81 | static const BlockDevOps vu_blk_dev_ops = { | ||
82 | + .drained_poll = vu_blk_drained_poll, | ||
83 | .resize_cb = vu_blk_exp_resize, | ||
84 | }; | ||
85 | |||
86 | diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/util/vhost-user-server.c | ||
89 | +++ b/util/vhost-user-server.c | ||
90 | @@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf) | ||
91 | void vhost_user_server_inc_in_flight(VuServer *server) | ||
92 | { | 28 | { |
93 | assert(!server->wait_idle); | 29 | BdrvChild *child, *tmp; |
94 | - server->in_flight++; | 30 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
95 | + qatomic_inc(&server->in_flight); | 31 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
96 | } | 32 | bdrv_coroutine_enter(bs, data.co); |
97 | 33 | BDRV_POLL_WHILE(bs, !data.done); | |
98 | void vhost_user_server_dec_in_flight(VuServer *server) | 34 | |
99 | { | 35 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
100 | - server->in_flight--; | 36 | - bdrv_drain_invoke(child->bs, begin); |
101 | - if (server->wait_idle && !server->in_flight) { | 37 | + if (recursive) { |
102 | - aio_co_wake(server->co_trip); | 38 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
103 | + if (qatomic_fetch_dec(&server->in_flight) == 1) { | 39 | + bdrv_drain_invoke(child->bs, begin, true); |
104 | + if (server->wait_idle) { | ||
105 | + aio_co_wake(server->co_trip); | ||
106 | + } | 40 | + } |
107 | } | 41 | } |
108 | } | 42 | } |
109 | 43 | ||
110 | +bool vhost_user_server_has_in_flight(VuServer *server) | 44 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
111 | +{ | 45 | bdrv_parent_drained_begin(bs); |
112 | + return qatomic_load_acquire(&server->in_flight) > 0; | ||
113 | +} | ||
114 | + | ||
115 | static bool coroutine_fn | ||
116 | vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) | ||
117 | { | ||
118 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque) | ||
119 | /* Keep running */ | ||
120 | } | 46 | } |
121 | 47 | ||
122 | - if (server->in_flight) { | 48 | - bdrv_drain_invoke(bs, true); |
123 | + if (vhost_user_server_has_in_flight(server)) { | 49 | + bdrv_drain_invoke(bs, true, false); |
124 | /* Wait for requests to complete before we can unmap the memory */ | 50 | bdrv_drain_recurse(bs); |
125 | server->wait_idle = true; | 51 | } |
126 | qemu_coroutine_yield(); | 52 | |
127 | server->wait_idle = false; | 53 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
128 | } | 54 | } |
129 | - assert(server->in_flight == 0); | 55 | |
130 | + assert(!vhost_user_server_has_in_flight(server)); | 56 | /* Re-enable things in child-to-parent order */ |
131 | 57 | - bdrv_drain_invoke(bs, false); | |
132 | vu_deinit(vu_dev); | 58 | + bdrv_drain_invoke(bs, false, false); |
133 | 59 | bdrv_parent_drained_end(bs); | |
60 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
61 | } | ||
62 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
63 | aio_context_acquire(aio_context); | ||
64 | aio_disable_external(aio_context); | ||
65 | bdrv_parent_drained_begin(bs); | ||
66 | - bdrv_drain_invoke(bs, true); | ||
67 | + bdrv_drain_invoke(bs, true, true); | ||
68 | aio_context_release(aio_context); | ||
69 | |||
70 | if (!g_slist_find(aio_ctxs, aio_context)) { | ||
71 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
72 | |||
73 | /* Re-enable things in child-to-parent order */ | ||
74 | aio_context_acquire(aio_context); | ||
75 | - bdrv_drain_invoke(bs, false); | ||
76 | + bdrv_drain_invoke(bs, false, true); | ||
77 | bdrv_parent_drained_end(bs); | ||
78 | aio_enable_external(aio_context); | ||
79 | aio_context_release(aio_context); | ||
134 | -- | 80 | -- |
135 | 2.40.1 | 81 | 2.13.6 |
82 | |||
83 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | The existing test is for bdrv_drain_all_begin/end() only. Generalise the |
---|---|---|---|
2 | test case so that it can be run for the other variants as well. At the | ||
3 | moment this is only bdrv_drain_begin/end(), but in a while, we'll add | ||
4 | another one. | ||
2 | 5 | ||
3 | The virtio-scsi Host Bus Adapter provides access to devices on a SCSI | 6 | Also, add a backing file to the test node to test whether the operations |
4 | bus. Those SCSI devices typically have a BlockBackend. When the | 7 | work recursively. |
5 | BlockBackend enters a drained section, the SCSI device must temporarily | ||
6 | stop submitting new I/O requests. | ||
7 | 8 | ||
8 | Implement this behavior by temporarily stopping virtio-scsi virtqueue | ||
9 | processing when one of the SCSI devices enters a drained section. The | ||
10 | new scsi_device_drained_begin() API allows scsi-disk to message the | ||
11 | virtio-scsi HBA. | ||
12 | |||
13 | scsi_device_drained_begin() uses a drain counter so that multiple SCSI | ||
14 | devices can have overlapping drained sections. The HBA only sees one | ||
15 | pair of .drained_begin/end() calls. | ||
16 | |||
17 | After this commit, virtio-scsi no longer depends on hw/virtio's | ||
18 | ioeventfd aio_set_event_notifier(is_external=true). This commit is a | ||
19 | step towards removing the aio_disable_external() API. | ||
20 | |||
21 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
22 | Message-Id: <20230516190238.8401-19-stefanha@redhat.com> | ||
23 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
24 | --- | 10 | --- |
25 | include/hw/scsi/scsi.h | 14 ++++++++++++ | 11 | tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- |
26 | hw/scsi/scsi-bus.c | 40 +++++++++++++++++++++++++++++++++ | 12 | 1 file changed, 62 insertions(+), 7 deletions(-) |
27 | hw/scsi/scsi-disk.c | 27 +++++++++++++++++----- | ||
28 | hw/scsi/virtio-scsi-dataplane.c | 18 +++++++++------ | ||
29 | hw/scsi/virtio-scsi.c | 38 +++++++++++++++++++++++++++++++ | ||
30 | hw/scsi/trace-events | 2 ++ | ||
31 | 6 files changed, 127 insertions(+), 12 deletions(-) | ||
32 | 13 | ||
33 | diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
34 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/include/hw/scsi/scsi.h | 16 | --- a/tests/test-bdrv-drain.c |
36 | +++ b/include/hw/scsi/scsi.h | 17 | +++ b/tests/test-bdrv-drain.c |
37 | @@ -XXX,XX +XXX,XX @@ struct SCSIBusInfo { | 18 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { |
38 | void (*save_request)(QEMUFile *f, SCSIRequest *req); | 19 | |
39 | void *(*load_request)(QEMUFile *f, SCSIRequest *req); | 20 | .bdrv_co_drain_begin = bdrv_test_co_drain_begin, |
40 | void (*free_request)(SCSIBus *bus, void *priv); | 21 | .bdrv_co_drain_end = bdrv_test_co_drain_end, |
41 | + | 22 | + |
42 | + /* | 23 | + .bdrv_child_perm = bdrv_format_default_perms, |
43 | + * Temporarily stop submitting new requests between drained_begin() and | ||
44 | + * drained_end(). Called from the main loop thread with the BQL held. | ||
45 | + * | ||
46 | + * Implement these callbacks if request processing is triggered by a file | ||
47 | + * descriptor like an EventNotifier. Otherwise set them to NULL. | ||
48 | + */ | ||
49 | + void (*drained_begin)(SCSIBus *bus); | ||
50 | + void (*drained_end)(SCSIBus *bus); | ||
51 | }; | 24 | }; |
52 | 25 | ||
53 | #define TYPE_SCSI_BUS "SCSI" | 26 | static void aio_ret_cb(void *opaque, int ret) |
54 | @@ -XXX,XX +XXX,XX @@ struct SCSIBus { | 27 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
55 | 28 | *aio_ret = ret; | |
56 | SCSISense unit_attention; | 29 | } |
57 | const SCSIBusInfo *info; | 30 | |
31 | -static void test_drv_cb_drain_all(void) | ||
32 | +enum drain_type { | ||
33 | + BDRV_DRAIN_ALL, | ||
34 | + BDRV_DRAIN, | ||
35 | +}; | ||
58 | + | 36 | + |
59 | + int drain_count; /* protected by BQL */ | 37 | +static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
60 | }; | ||
61 | |||
62 | /** | ||
63 | @@ -XXX,XX +XXX,XX @@ void scsi_req_cancel_complete(SCSIRequest *req); | ||
64 | void scsi_req_cancel(SCSIRequest *req); | ||
65 | void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); | ||
66 | void scsi_req_retry(SCSIRequest *req); | ||
67 | +void scsi_device_drained_begin(SCSIDevice *sdev); | ||
68 | +void scsi_device_drained_end(SCSIDevice *sdev); | ||
69 | void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); | ||
70 | void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); | ||
71 | void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); | ||
72 | diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/hw/scsi/scsi-bus.c | ||
75 | +++ b/hw/scsi/scsi-bus.c | ||
76 | @@ -XXX,XX +XXX,XX @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) | ||
77 | scsi_device_set_ua(sdev, sense); | ||
78 | } | ||
79 | |||
80 | +void scsi_device_drained_begin(SCSIDevice *sdev) | ||
81 | +{ | 38 | +{ |
82 | + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus); | 39 | + switch (drain_type) { |
83 | + if (!bus) { | 40 | + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; |
84 | + return; | 41 | + case BDRV_DRAIN: bdrv_drained_begin(bs); break; |
85 | + } | 42 | + default: g_assert_not_reached(); |
86 | + | ||
87 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
88 | + assert(bus->drain_count < INT_MAX); | ||
89 | + | ||
90 | + /* | ||
91 | + * Multiple BlockBackends can be on a SCSIBus and each may begin/end | ||
92 | + * draining at any time. Keep a counter so HBAs only see begin/end once. | ||
93 | + */ | ||
94 | + if (bus->drain_count++ == 0) { | ||
95 | + trace_scsi_bus_drained_begin(bus, sdev); | ||
96 | + if (bus->info->drained_begin) { | ||
97 | + bus->info->drained_begin(bus); | ||
98 | + } | ||
99 | + } | 43 | + } |
100 | +} | 44 | +} |
101 | + | 45 | + |
102 | +void scsi_device_drained_end(SCSIDevice *sdev) | 46 | +static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) |
103 | +{ | 47 | +{ |
104 | + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus); | 48 | + switch (drain_type) { |
105 | + if (!bus) { | 49 | + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; |
106 | + return; | 50 | + case BDRV_DRAIN: bdrv_drained_end(bs); break; |
107 | + } | 51 | + default: g_assert_not_reached(); |
108 | + | ||
109 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
110 | + assert(bus->drain_count > 0); | ||
111 | + | ||
112 | + if (bus->drain_count-- == 1) { | ||
113 | + trace_scsi_bus_drained_end(bus, sdev); | ||
114 | + if (bus->info->drained_end) { | ||
115 | + bus->info->drained_end(bus); | ||
116 | + } | ||
117 | + } | 52 | + } |
118 | +} | 53 | +} |
119 | + | 54 | + |
120 | static char *scsibus_get_dev_path(DeviceState *dev) | 55 | +static void test_drv_cb_common(enum drain_type drain_type, bool recursive) |
121 | { | 56 | { |
122 | SCSIDevice *d = SCSI_DEVICE(dev); | 57 | BlockBackend *blk; |
123 | diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c | 58 | - BlockDriverState *bs; |
124 | index XXXXXXX..XXXXXXX 100644 | 59 | - BDRVTestState *s; |
125 | --- a/hw/scsi/scsi-disk.c | 60 | + BlockDriverState *bs, *backing; |
126 | +++ b/hw/scsi/scsi-disk.c | 61 | + BDRVTestState *s, *backing_s; |
127 | @@ -XXX,XX +XXX,XX @@ static void scsi_disk_reset(DeviceState *dev) | 62 | BlockAIOCB *acb; |
128 | s->qdev.scsi_version = s->qdev.default_scsi_version; | 63 | int aio_ret; |
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
66 | s = bs->opaque; | ||
67 | blk_insert_bs(blk, bs, &error_abort); | ||
68 | |||
69 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
70 | + backing_s = backing->opaque; | ||
71 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
72 | + | ||
73 | /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
74 | g_assert_cmpint(s->drain_count, ==, 0); | ||
75 | - bdrv_drain_all_begin(); | ||
76 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
77 | + | ||
78 | + do_drain_begin(drain_type, bs); | ||
79 | + | ||
80 | g_assert_cmpint(s->drain_count, ==, 1); | ||
81 | - bdrv_drain_all_end(); | ||
82 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
83 | + | ||
84 | + do_drain_end(drain_type, bs); | ||
85 | + | ||
86 | g_assert_cmpint(s->drain_count, ==, 0); | ||
87 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
88 | |||
89 | /* Now do the same while a request is pending */ | ||
90 | aio_ret = -EINPROGRESS; | ||
91 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
92 | g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | ||
93 | |||
94 | g_assert_cmpint(s->drain_count, ==, 0); | ||
95 | - bdrv_drain_all_begin(); | ||
96 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
97 | + | ||
98 | + do_drain_begin(drain_type, bs); | ||
99 | + | ||
100 | g_assert_cmpint(aio_ret, ==, 0); | ||
101 | g_assert_cmpint(s->drain_count, ==, 1); | ||
102 | - bdrv_drain_all_end(); | ||
103 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
104 | + | ||
105 | + do_drain_end(drain_type, bs); | ||
106 | + | ||
107 | g_assert_cmpint(s->drain_count, ==, 0); | ||
108 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
109 | |||
110 | + bdrv_unref(backing); | ||
111 | bdrv_unref(bs); | ||
112 | blk_unref(blk); | ||
129 | } | 113 | } |
130 | 114 | ||
131 | +static void scsi_disk_drained_begin(void *opaque) | 115 | +static void test_drv_cb_drain_all(void) |
132 | +{ | 116 | +{ |
133 | + SCSIDiskState *s = opaque; | 117 | + test_drv_cb_common(BDRV_DRAIN_ALL, true); |
134 | + | ||
135 | + scsi_device_drained_begin(&s->qdev); | ||
136 | +} | 118 | +} |
137 | + | 119 | + |
138 | +static void scsi_disk_drained_end(void *opaque) | 120 | +static void test_drv_cb_drain(void) |
139 | +{ | 121 | +{ |
140 | + SCSIDiskState *s = opaque; | 122 | + test_drv_cb_common(BDRV_DRAIN, false); |
141 | + | ||
142 | + scsi_device_drained_end(&s->qdev); | ||
143 | +} | 123 | +} |
144 | + | 124 | + |
145 | static void scsi_disk_resize_cb(void *opaque) | 125 | int main(int argc, char **argv) |
146 | { | 126 | { |
147 | SCSIDiskState *s = opaque; | 127 | bdrv_init(); |
148 | @@ -XXX,XX +XXX,XX @@ static bool scsi_cd_is_medium_locked(void *opaque) | 128 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
129 | g_test_init(&argc, &argv, NULL); | ||
130 | |||
131 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
132 | + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
133 | |||
134 | return g_test_run(); | ||
149 | } | 135 | } |
150 | |||
151 | static const BlockDevOps scsi_disk_removable_block_ops = { | ||
152 | - .change_media_cb = scsi_cd_change_media_cb, | ||
153 | + .change_media_cb = scsi_cd_change_media_cb, | ||
154 | + .drained_begin = scsi_disk_drained_begin, | ||
155 | + .drained_end = scsi_disk_drained_end, | ||
156 | .eject_request_cb = scsi_cd_eject_request_cb, | ||
157 | - .is_tray_open = scsi_cd_is_tray_open, | ||
158 | .is_medium_locked = scsi_cd_is_medium_locked, | ||
159 | - | ||
160 | - .resize_cb = scsi_disk_resize_cb, | ||
161 | + .is_tray_open = scsi_cd_is_tray_open, | ||
162 | + .resize_cb = scsi_disk_resize_cb, | ||
163 | }; | ||
164 | |||
165 | static const BlockDevOps scsi_disk_block_ops = { | ||
166 | - .resize_cb = scsi_disk_resize_cb, | ||
167 | + .drained_begin = scsi_disk_drained_begin, | ||
168 | + .drained_end = scsi_disk_drained_end, | ||
169 | + .resize_cb = scsi_disk_resize_cb, | ||
170 | }; | ||
171 | |||
172 | static void scsi_disk_unit_attention_reported(SCSIDevice *dev) | ||
173 | diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c | ||
174 | index XXXXXXX..XXXXXXX 100644 | ||
175 | --- a/hw/scsi/virtio-scsi-dataplane.c | ||
176 | +++ b/hw/scsi/virtio-scsi-dataplane.c | ||
177 | @@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) | ||
178 | s->dataplane_starting = false; | ||
179 | s->dataplane_started = true; | ||
180 | |||
181 | - aio_context_acquire(s->ctx); | ||
182 | - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); | ||
183 | - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); | ||
184 | + if (s->bus.drain_count == 0) { | ||
185 | + aio_context_acquire(s->ctx); | ||
186 | + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); | ||
187 | + virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); | ||
188 | |||
189 | - for (i = 0; i < vs->conf.num_queues; i++) { | ||
190 | - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); | ||
191 | + for (i = 0; i < vs->conf.num_queues; i++) { | ||
192 | + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); | ||
193 | + } | ||
194 | + aio_context_release(s->ctx); | ||
195 | } | ||
196 | - aio_context_release(s->ctx); | ||
197 | return 0; | ||
198 | |||
199 | fail_host_notifiers: | ||
200 | @@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) | ||
201 | } | ||
202 | s->dataplane_stopping = true; | ||
203 | |||
204 | - aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | ||
205 | + if (s->bus.drain_count == 0) { | ||
206 | + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); | ||
207 | + } | ||
208 | |||
209 | blk_drain_all(); /* ensure there are no in-flight requests */ | ||
210 | |||
211 | diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c | ||
212 | index XXXXXXX..XXXXXXX 100644 | ||
213 | --- a/hw/scsi/virtio-scsi.c | ||
214 | +++ b/hw/scsi/virtio-scsi.c | ||
215 | @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, | ||
216 | } | ||
217 | } | ||
218 | |||
219 | +/* Suspend virtqueue ioeventfd processing during drain */ | ||
220 | +static void virtio_scsi_drained_begin(SCSIBus *bus) | ||
221 | +{ | ||
222 | + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); | ||
223 | + VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
224 | + uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + | ||
225 | + s->parent_obj.conf.num_queues; | ||
226 | + | ||
227 | + if (!s->dataplane_started) { | ||
228 | + return; | ||
229 | + } | ||
230 | + | ||
231 | + for (uint32_t i = 0; i < total_queues; i++) { | ||
232 | + VirtQueue *vq = virtio_get_queue(vdev, i); | ||
233 | + virtio_queue_aio_detach_host_notifier(vq, s->ctx); | ||
234 | + } | ||
235 | +} | ||
236 | + | ||
237 | +/* Resume virtqueue ioeventfd processing after drain */ | ||
238 | +static void virtio_scsi_drained_end(SCSIBus *bus) | ||
239 | +{ | ||
240 | + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); | ||
241 | + VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
242 | + uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + | ||
243 | + s->parent_obj.conf.num_queues; | ||
244 | + | ||
245 | + if (!s->dataplane_started) { | ||
246 | + return; | ||
247 | + } | ||
248 | + | ||
249 | + for (uint32_t i = 0; i < total_queues; i++) { | ||
250 | + VirtQueue *vq = virtio_get_queue(vdev, i); | ||
251 | + virtio_queue_aio_attach_host_notifier(vq, s->ctx); | ||
252 | + } | ||
253 | +} | ||
254 | + | ||
255 | static struct SCSIBusInfo virtio_scsi_scsi_info = { | ||
256 | .tcq = true, | ||
257 | .max_channel = VIRTIO_SCSI_MAX_CHANNEL, | ||
258 | @@ -XXX,XX +XXX,XX @@ static struct SCSIBusInfo virtio_scsi_scsi_info = { | ||
259 | .get_sg_list = virtio_scsi_get_sg_list, | ||
260 | .save_request = virtio_scsi_save_request, | ||
261 | .load_request = virtio_scsi_load_request, | ||
262 | + .drained_begin = virtio_scsi_drained_begin, | ||
263 | + .drained_end = virtio_scsi_drained_end, | ||
264 | }; | ||
265 | |||
266 | void virtio_scsi_common_realize(DeviceState *dev, | ||
267 | diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events | ||
268 | index XXXXXXX..XXXXXXX 100644 | ||
269 | --- a/hw/scsi/trace-events | ||
270 | +++ b/hw/scsi/trace-events | ||
271 | @@ -XXX,XX +XXX,XX @@ scsi_req_cancel(int target, int lun, int tag) "target %d lun %d tag %d" | ||
272 | scsi_req_data(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d" | ||
273 | scsi_req_data_canceled(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d" | ||
274 | scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d" | ||
275 | +scsi_bus_drained_begin(void *bus, void *sdev) "bus %p sdev %p" | ||
276 | +scsi_bus_drained_end(void *bus, void *sdev) "bus %p sdev %p" | ||
277 | scsi_req_continue(int target, int lun, int tag) "target %d lun %d tag %d" | ||
278 | scsi_req_continue_canceled(int target, int lun, int tag) "target %d lun %d tag %d" | ||
279 | scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer) "target %d lun %d tag %d command %d dir %d length %d" | ||
280 | -- | 136 | -- |
281 | 2.40.1 | 137 | 2.13.6 |
138 | |||
139 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | This is currently only working correctly for bdrv_drain(), not for |
---|---|---|---|
2 | bdrv_drain_all(). Leave a comment for the drain_all case, we'll address | ||
3 | it later. | ||
2 | 4 | ||
3 | This is part of ongoing work to remove the aio_disable_external() API. | ||
4 | |||
5 | Use BlockDevOps .drained_begin/end/poll() instead of | ||
6 | aio_set_fd_handler(is_external=true). | ||
7 | |||
8 | As a side-effect the FUSE export now follows AioContext changes like the | ||
9 | other export types. | ||
10 | |||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Message-Id: <20230516190238.8401-16-stefanha@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 6 | --- |
16 | block/export/fuse.c | 56 +++++++++++++++++++++++++++++++++++++++++++-- | 7 | tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ |
17 | 1 file changed, 54 insertions(+), 2 deletions(-) | 8 | 1 file changed, 45 insertions(+) |
18 | 9 | ||
19 | diff --git a/block/export/fuse.c b/block/export/fuse.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
20 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/export/fuse.c | 12 | --- a/tests/test-bdrv-drain.c |
22 | +++ b/block/export/fuse.c | 13 | +++ b/tests/test-bdrv-drain.c |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct FuseExport { | 14 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) |
24 | 15 | test_drv_cb_common(BDRV_DRAIN, false); | |
25 | struct fuse_session *fuse_session; | 16 | } |
26 | struct fuse_buf fuse_buf; | 17 | |
27 | + unsigned int in_flight; /* atomic */ | 18 | +static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
28 | bool mounted, fd_handler_set_up; | ||
29 | |||
30 | char *mountpoint; | ||
31 | @@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque); | ||
32 | static bool is_regular_file(const char *path, Error **errp); | ||
33 | |||
34 | |||
35 | +static void fuse_export_drained_begin(void *opaque) | ||
36 | +{ | 19 | +{ |
37 | + FuseExport *exp = opaque; | 20 | + BlockBackend *blk; |
21 | + BlockDriverState *bs, *backing; | ||
38 | + | 22 | + |
39 | + aio_set_fd_handler(exp->common.ctx, | 23 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
40 | + fuse_session_fd(exp->fuse_session), false, | 24 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
41 | + NULL, NULL, NULL, NULL, NULL); | 25 | + &error_abort); |
42 | + exp->fd_handler_set_up = false; | 26 | + blk_insert_bs(blk, bs, &error_abort); |
27 | + | ||
28 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
29 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
30 | + | ||
31 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
32 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
33 | + | ||
34 | + do_drain_begin(drain_type, bs); | ||
35 | + | ||
36 | + g_assert_cmpint(bs->quiesce_counter, ==, 1); | ||
37 | + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); | ||
38 | + | ||
39 | + do_drain_end(drain_type, bs); | ||
40 | + | ||
41 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
43 | + | ||
44 | + bdrv_unref(backing); | ||
45 | + bdrv_unref(bs); | ||
46 | + blk_unref(blk); | ||
43 | +} | 47 | +} |
44 | + | 48 | + |
45 | +static void fuse_export_drained_end(void *opaque) | 49 | +static void test_quiesce_drain_all(void) |
46 | +{ | 50 | +{ |
47 | + FuseExport *exp = opaque; | 51 | + // XXX drain_all doesn't quiesce |
48 | + | 52 | + //test_quiesce_common(BDRV_DRAIN_ALL, true); |
49 | + /* Refresh AioContext in case it changed */ | ||
50 | + exp->common.ctx = blk_get_aio_context(exp->common.blk); | ||
51 | + | ||
52 | + aio_set_fd_handler(exp->common.ctx, | ||
53 | + fuse_session_fd(exp->fuse_session), false, | ||
54 | + read_from_fuse_export, NULL, NULL, NULL, exp); | ||
55 | + exp->fd_handler_set_up = true; | ||
56 | +} | 53 | +} |
57 | + | 54 | + |
58 | +static bool fuse_export_drained_poll(void *opaque) | 55 | +static void test_quiesce_drain(void) |
59 | +{ | 56 | +{ |
60 | + FuseExport *exp = opaque; | 57 | + test_quiesce_common(BDRV_DRAIN, false); |
61 | + | ||
62 | + return qatomic_read(&exp->in_flight) > 0; | ||
63 | +} | 58 | +} |
64 | + | 59 | + |
65 | +static const BlockDevOps fuse_export_blk_dev_ops = { | 60 | int main(int argc, char **argv) |
66 | + .drained_begin = fuse_export_drained_begin, | 61 | { |
67 | + .drained_end = fuse_export_drained_end, | 62 | bdrv_init(); |
68 | + .drained_poll = fuse_export_drained_poll, | 63 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
69 | +}; | 64 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
65 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
66 | |||
67 | + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
68 | + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
70 | + | 69 | + |
71 | static int fuse_export_create(BlockExport *blk_exp, | 70 | return g_test_run(); |
72 | BlockExportOptions *blk_exp_args, | ||
73 | Error **errp) | ||
74 | @@ -XXX,XX +XXX,XX @@ static int fuse_export_create(BlockExport *blk_exp, | ||
75 | } | ||
76 | } | ||
77 | |||
78 | + blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp); | ||
79 | + | ||
80 | + /* | ||
81 | + * We handle draining ourselves using an in-flight counter and by disabling | ||
82 | + * the FUSE fd handler. Do not queue BlockBackend requests, they need to | ||
83 | + * complete so the in-flight counter reaches zero. | ||
84 | + */ | ||
85 | + blk_set_disable_request_queuing(exp->common.blk, true); | ||
86 | + | ||
87 | init_exports_table(); | ||
88 | |||
89 | /* | ||
90 | @@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, | ||
91 | g_hash_table_insert(exports, g_strdup(mountpoint), NULL); | ||
92 | |||
93 | aio_set_fd_handler(exp->common.ctx, | ||
94 | - fuse_session_fd(exp->fuse_session), true, | ||
95 | + fuse_session_fd(exp->fuse_session), false, | ||
96 | read_from_fuse_export, NULL, NULL, NULL, exp); | ||
97 | exp->fd_handler_set_up = true; | ||
98 | |||
99 | @@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque) | ||
100 | |||
101 | blk_exp_ref(&exp->common); | ||
102 | |||
103 | + qatomic_inc(&exp->in_flight); | ||
104 | + | ||
105 | do { | ||
106 | ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf); | ||
107 | } while (ret == -EINTR); | ||
108 | @@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque) | ||
109 | fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf); | ||
110 | |||
111 | out: | ||
112 | + if (qatomic_fetch_dec(&exp->in_flight) == 1) { | ||
113 | + aio_wait_kick(); /* wake AIO_WAIT_WHILE() */ | ||
114 | + } | ||
115 | + | ||
116 | blk_exp_unref(&exp->common); | ||
117 | } | 71 | } |
118 | |||
119 | @@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp) | ||
120 | |||
121 | if (exp->fd_handler_set_up) { | ||
122 | aio_set_fd_handler(exp->common.ctx, | ||
123 | - fuse_session_fd(exp->fuse_session), true, | ||
124 | + fuse_session_fd(exp->fuse_session), false, | ||
125 | NULL, NULL, NULL, NULL, NULL); | ||
126 | exp->fd_handler_set_up = false; | ||
127 | } | ||
128 | -- | 72 | -- |
129 | 2.40.1 | 73 | 2.13.6 |
74 | |||
75 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Block jobs already paused themselves when their main BlockBackend |
---|---|---|---|
2 | entered a drained section. This is not good enough: We also want to | ||
3 | pause a block job and may not submit new requests if, for example, the | ||
4 | mirror target node should be drained. | ||
2 | 5 | ||
3 | blk_set_aio_context() is not fully transactional because | 6 | This implements .drained_begin/end callbacks in child_job in order to |
4 | blk_do_set_aio_context() updates blk->ctx outside the transaction. Most | 7 | consider all block nodes related to the job, and removes the |
5 | of the time this goes unnoticed but a BlockDevOps.drained_end() callback | 8 | BlockBackend callbacks which are unnecessary now because the root of the |
6 | that invokes blk_get_aio_context() fails assert(ctx == blk->ctx). This | 9 | job main BlockBackend is always referenced with a child_job, too. |
7 | happens because blk->ctx is only assigned after | ||
8 | BlockDevOps.drained_end() is called and we're in an intermediate state | ||
9 | where BlockDrvierState nodes already have the new context and the | ||
10 | BlockBackend still has the old context. | ||
11 | 10 | ||
12 | Making blk_set_aio_context() fully transactional solves this assertion | ||
13 | failure because the BlockBackend's context is updated as part of the | ||
14 | transaction (before BlockDevOps.drained_end() is called). | ||
15 | |||
16 | Split blk_do_set_aio_context() in order to solve this assertion failure. | ||
17 | This helper function actually serves two different purposes: | ||
18 | 1. It drives blk_set_aio_context(). | ||
19 | 2. It responds to BdrvChildClass->change_aio_ctx(). | ||
20 | |||
21 | Get rid of the helper function. Do #1 inside blk_set_aio_context() and | ||
22 | do #2 inside blk_root_set_aio_ctx_commit(). This simplifies the code. | ||
23 | |||
24 | The only drawback of the fully transactional approach is that | ||
25 | blk_set_aio_context() must contend with blk_root_set_aio_ctx_commit() | ||
26 | being invoked as part of the AioContext change propagation. This can be | ||
27 | solved by temporarily setting blk->allow_aio_context_change to true. | ||
28 | |||
29 | Future patches call blk_get_aio_context() from | ||
30 | BlockDevOps->drained_end(), so this patch will become necessary. | ||
31 | |||
32 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
33 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
34 | Message-Id: <20230516190238.8401-2-stefanha@redhat.com> | ||
35 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
36 | --- | 12 | --- |
37 | block/block-backend.c | 61 ++++++++++++++++--------------------------- | 13 | blockjob.c | 22 +++++++++------------- |
38 | 1 file changed, 23 insertions(+), 38 deletions(-) | 14 | 1 file changed, 9 insertions(+), 13 deletions(-) |
39 | 15 | ||
40 | diff --git a/block/block-backend.c b/block/block-backend.c | 16 | diff --git a/blockjob.c b/blockjob.c |
41 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/block/block-backend.c | 18 | --- a/blockjob.c |
43 | +++ b/block/block-backend.c | 19 | +++ b/blockjob.c |
44 | @@ -XXX,XX +XXX,XX @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) | 20 | @@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c) |
45 | return blk_get_aio_context(blk_acb->blk); | 21 | job->id); |
46 | } | 22 | } |
47 | 23 | ||
48 | -static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, | 24 | -static const BdrvChildRole child_job = { |
49 | - bool update_root_node, Error **errp) | 25 | - .get_parent_desc = child_job_get_parent_desc, |
50 | +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, | 26 | - .stay_at_node = true, |
51 | + Error **errp) | 27 | -}; |
28 | - | ||
29 | -static void block_job_drained_begin(void *opaque) | ||
30 | +static void child_job_drained_begin(BdrvChild *c) | ||
52 | { | 31 | { |
53 | + bool old_allow_change; | 32 | - BlockJob *job = opaque; |
54 | BlockDriverState *bs = blk_bs(blk); | 33 | + BlockJob *job = c->opaque; |
55 | - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; | 34 | block_job_pause(job); |
56 | int ret; | ||
57 | |||
58 | - if (bs) { | ||
59 | - bdrv_ref(bs); | ||
60 | - | ||
61 | - if (update_root_node) { | ||
62 | - /* | ||
63 | - * update_root_node MUST be false for blk_root_set_aio_ctx_commit(), | ||
64 | - * as we are already in the commit function of a transaction. | ||
65 | - */ | ||
66 | - ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp); | ||
67 | - if (ret < 0) { | ||
68 | - bdrv_unref(bs); | ||
69 | - return ret; | ||
70 | - } | ||
71 | - } | ||
72 | - /* | ||
73 | - * Make blk->ctx consistent with the root node before we invoke any | ||
74 | - * other operations like drain that might inquire blk->ctx | ||
75 | - */ | ||
76 | - blk->ctx = new_context; | ||
77 | - if (tgm->throttle_state) { | ||
78 | - bdrv_drained_begin(bs); | ||
79 | - throttle_group_detach_aio_context(tgm); | ||
80 | - throttle_group_attach_aio_context(tgm, new_context); | ||
81 | - bdrv_drained_end(bs); | ||
82 | - } | ||
83 | + GLOBAL_STATE_CODE(); | ||
84 | |||
85 | - bdrv_unref(bs); | ||
86 | - } else { | ||
87 | + if (!bs) { | ||
88 | blk->ctx = new_context; | ||
89 | + return 0; | ||
90 | } | ||
91 | |||
92 | - return 0; | ||
93 | -} | ||
94 | + bdrv_ref(bs); | ||
95 | |||
96 | -int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, | ||
97 | - Error **errp) | ||
98 | -{ | ||
99 | - GLOBAL_STATE_CODE(); | ||
100 | - return blk_do_set_aio_context(blk, new_context, true, errp); | ||
101 | + old_allow_change = blk->allow_aio_context_change; | ||
102 | + blk->allow_aio_context_change = true; | ||
103 | + | ||
104 | + ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp); | ||
105 | + | ||
106 | + blk->allow_aio_context_change = old_allow_change; | ||
107 | + | ||
108 | + bdrv_unref(bs); | ||
109 | + return ret; | ||
110 | } | 35 | } |
111 | 36 | ||
112 | typedef struct BdrvStateBlkRootContext { | 37 | -static void block_job_drained_end(void *opaque) |
113 | @@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx_commit(void *opaque) | 38 | +static void child_job_drained_end(BdrvChild *c) |
114 | { | 39 | { |
115 | BdrvStateBlkRootContext *s = opaque; | 40 | - BlockJob *job = opaque; |
116 | BlockBackend *blk = s->blk; | 41 | + BlockJob *job = c->opaque; |
117 | + AioContext *new_context = s->new_ctx; | 42 | block_job_resume(job); |
118 | + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; | ||
119 | |||
120 | - blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort); | ||
121 | + blk->ctx = new_context; | ||
122 | + if (tgm->throttle_state) { | ||
123 | + throttle_group_detach_aio_context(tgm); | ||
124 | + throttle_group_attach_aio_context(tgm, new_context); | ||
125 | + } | ||
126 | } | 43 | } |
127 | 44 | ||
128 | static TransactionActionDrv set_blk_root_context = { | 45 | -static const BlockDevOps block_job_dev_ops = { |
46 | - .drained_begin = block_job_drained_begin, | ||
47 | - .drained_end = block_job_drained_end, | ||
48 | +static const BdrvChildRole child_job = { | ||
49 | + .get_parent_desc = child_job_get_parent_desc, | ||
50 | + .drained_begin = child_job_drained_begin, | ||
51 | + .drained_end = child_job_drained_end, | ||
52 | + .stay_at_node = true, | ||
53 | }; | ||
54 | |||
55 | void block_job_remove_all_bdrv(BlockJob *job) | ||
56 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, | ||
57 | block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | ||
58 | bs->job = job; | ||
59 | |||
60 | - blk_set_dev_ops(blk, &block_job_dev_ops, job); | ||
61 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | ||
62 | |||
63 | QLIST_INSERT_HEAD(&block_jobs, job, job_list); | ||
129 | -- | 64 | -- |
130 | 2.40.1 | 65 | 2.13.6 |
66 | |||
67 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Block jobs must be paused if any of the involved nodes are drained. |
---|---|---|---|
2 | 2 | ||
3 | Detach event channels during drained sections to stop I/O submission | ||
4 | from the ring. xen-block is no longer reliant on aio_disable_external() | ||
5 | after this patch. This will allow us to remove the | ||
6 | aio_disable_external() API once all other code that relies on it is | ||
7 | converted. | ||
8 | |||
9 | Extend xen_device_set_event_channel_context() to allow ctx=NULL. The | ||
10 | event channel still exists but the event loop does not monitor the file | ||
11 | descriptor. Event channel processing can resume by calling | ||
12 | xen_device_set_event_channel_context() with a non-NULL ctx. | ||
13 | |||
14 | Factor out xen_device_set_event_channel_context() calls in | ||
15 | hw/block/dataplane/xen-block.c into attach/detach helper functions. | ||
16 | Incidentally, these don't require the AioContext lock because | ||
17 | aio_set_fd_handler() is thread-safe. | ||
18 | |||
19 | It's safer to register BlockDevOps after the dataplane instance has been | ||
20 | created. The BlockDevOps .drained_begin/end() callbacks depend on the | ||
21 | dataplane instance, so move the blk_set_dev_ops() call after | ||
22 | xen_block_dataplane_create(). | ||
23 | |||
24 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
25 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
26 | Message-Id: <20230516190238.8401-12-stefanha@redhat.com> | ||
27 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
28 | --- | 4 | --- |
29 | hw/block/dataplane/xen-block.h | 2 ++ | 5 | tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ |
30 | hw/block/dataplane/xen-block.c | 42 +++++++++++++++++++++++++--------- | 6 | 1 file changed, 121 insertions(+) |
31 | hw/block/xen-block.c | 24 ++++++++++++++++--- | ||
32 | hw/xen/xen-bus.c | 7 ++++-- | ||
33 | 4 files changed, 59 insertions(+), 16 deletions(-) | ||
34 | 7 | ||
35 | diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
36 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/hw/block/dataplane/xen-block.h | 10 | --- a/tests/test-bdrv-drain.c |
38 | +++ b/hw/block/dataplane/xen-block.h | 11 | +++ b/tests/test-bdrv-drain.c |
39 | @@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, | 12 | @@ -XXX,XX +XXX,XX @@ |
40 | unsigned int protocol, | 13 | |
41 | Error **errp); | 14 | #include "qemu/osdep.h" |
42 | void xen_block_dataplane_stop(XenBlockDataPlane *dataplane); | 15 | #include "block/block.h" |
43 | +void xen_block_dataplane_attach(XenBlockDataPlane *dataplane); | 16 | +#include "block/blockjob_int.h" |
44 | +void xen_block_dataplane_detach(XenBlockDataPlane *dataplane); | 17 | #include "sysemu/block-backend.h" |
45 | 18 | #include "qapi/error.h" | |
46 | #endif /* HW_BLOCK_DATAPLANE_XEN_BLOCK_H */ | 19 | |
47 | diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c | 20 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
48 | index XXXXXXX..XXXXXXX 100644 | 21 | test_quiesce_common(BDRV_DRAIN, false); |
49 | --- a/hw/block/dataplane/xen-block.c | ||
50 | +++ b/hw/block/dataplane/xen-block.c | ||
51 | @@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane) | ||
52 | g_free(dataplane); | ||
53 | } | 22 | } |
54 | 23 | ||
55 | +void xen_block_dataplane_detach(XenBlockDataPlane *dataplane) | 24 | + |
25 | +typedef struct TestBlockJob { | ||
26 | + BlockJob common; | ||
27 | + bool should_complete; | ||
28 | +} TestBlockJob; | ||
29 | + | ||
30 | +static void test_job_completed(BlockJob *job, void *opaque) | ||
56 | +{ | 31 | +{ |
57 | + if (!dataplane || !dataplane->event_channel) { | 32 | + block_job_completed(job, 0); |
58 | + return; | 33 | +} |
34 | + | ||
35 | +static void coroutine_fn test_job_start(void *opaque) | ||
36 | +{ | ||
37 | + TestBlockJob *s = opaque; | ||
38 | + | ||
39 | + while (!s->should_complete) { | ||
40 | + block_job_sleep_ns(&s->common, 100000); | ||
59 | + } | 41 | + } |
60 | + | 42 | + |
61 | + /* Only reason for failure is a NULL channel */ | 43 | + block_job_defer_to_main_loop(&s->common, test_job_completed, NULL); |
62 | + xen_device_set_event_channel_context(dataplane->xendev, | ||
63 | + dataplane->event_channel, | ||
64 | + NULL, &error_abort); | ||
65 | +} | 44 | +} |
66 | + | 45 | + |
67 | +void xen_block_dataplane_attach(XenBlockDataPlane *dataplane) | 46 | +static void test_job_complete(BlockJob *job, Error **errp) |
68 | +{ | 47 | +{ |
69 | + if (!dataplane || !dataplane->event_channel) { | 48 | + TestBlockJob *s = container_of(job, TestBlockJob, common); |
70 | + return; | 49 | + s->should_complete = true; |
71 | + } | ||
72 | + | ||
73 | + /* Only reason for failure is a NULL channel */ | ||
74 | + xen_device_set_event_channel_context(dataplane->xendev, | ||
75 | + dataplane->event_channel, | ||
76 | + dataplane->ctx, &error_abort); | ||
77 | +} | 50 | +} |
78 | + | 51 | + |
79 | void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) | 52 | +BlockJobDriver test_job_driver = { |
80 | { | 53 | + .instance_size = sizeof(TestBlockJob), |
81 | XenDevice *xendev; | 54 | + .start = test_job_start, |
82 | @@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) | 55 | + .complete = test_job_complete, |
83 | 56 | +}; | |
84 | xendev = dataplane->xendev; | ||
85 | |||
86 | - aio_context_acquire(dataplane->ctx); | ||
87 | - if (dataplane->event_channel) { | ||
88 | - /* Only reason for failure is a NULL channel */ | ||
89 | - xen_device_set_event_channel_context(xendev, dataplane->event_channel, | ||
90 | - qemu_get_aio_context(), | ||
91 | - &error_abort); | ||
92 | + if (!blk_in_drain(dataplane->blk)) { | ||
93 | + xen_block_dataplane_detach(dataplane); | ||
94 | } | ||
95 | + | 57 | + |
96 | + aio_context_acquire(dataplane->ctx); | 58 | +static void test_blockjob_common(enum drain_type drain_type) |
97 | /* Xen doesn't have multiple users for nodes, so this can't fail */ | 59 | +{ |
98 | blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); | 60 | + BlockBackend *blk_src, *blk_target; |
99 | aio_context_release(dataplane->ctx); | 61 | + BlockDriverState *src, *target; |
100 | @@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, | 62 | + BlockJob *job; |
101 | blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); | 63 | + int ret; |
102 | aio_context_release(old_context); | 64 | + |
103 | 65 | + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, | |
104 | - /* Only reason for failure is a NULL channel */ | 66 | + &error_abort); |
105 | - aio_context_acquire(dataplane->ctx); | 67 | + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
106 | - xen_device_set_event_channel_context(xendev, dataplane->event_channel, | 68 | + blk_insert_bs(blk_src, src, &error_abort); |
107 | - dataplane->ctx, &error_abort); | 69 | + |
108 | - aio_context_release(dataplane->ctx); | 70 | + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, |
109 | + if (!blk_in_drain(dataplane->blk)) { | 71 | + &error_abort); |
110 | + xen_block_dataplane_attach(dataplane); | 72 | + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
73 | + blk_insert_bs(blk_target, target, &error_abort); | ||
74 | + | ||
75 | + job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0, | ||
76 | + 0, NULL, NULL, &error_abort); | ||
77 | + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); | ||
78 | + block_job_start(job); | ||
79 | + | ||
80 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
81 | + g_assert_false(job->paused); | ||
82 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
83 | + | ||
84 | + do_drain_begin(drain_type, src); | ||
85 | + | ||
86 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
87 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
88 | + * additional block_job_pause_all() */ | ||
89 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
90 | + } else { | ||
91 | + g_assert_cmpint(job->pause_count, ==, 1); | ||
111 | + } | 92 | + } |
112 | 93 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | |
113 | return; | 94 | + /* g_assert_true(job->paused); */ |
114 | 95 | + g_assert_false(job->busy); /* The job is paused */ | |
115 | diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/hw/block/xen-block.c | ||
118 | +++ b/hw/block/xen-block.c | ||
119 | @@ -XXX,XX +XXX,XX @@ static void xen_block_resize_cb(void *opaque) | ||
120 | xen_device_backend_printf(xendev, "state", "%u", state); | ||
121 | } | ||
122 | |||
123 | +/* Suspend request handling */ | ||
124 | +static void xen_block_drained_begin(void *opaque) | ||
125 | +{ | ||
126 | + XenBlockDevice *blockdev = opaque; | ||
127 | + | 96 | + |
128 | + xen_block_dataplane_detach(blockdev->dataplane); | 97 | + do_drain_end(drain_type, src); |
98 | + | ||
99 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
100 | + g_assert_false(job->paused); | ||
101 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
102 | + | ||
103 | + do_drain_begin(drain_type, target); | ||
104 | + | ||
105 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
106 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
107 | + * additional block_job_pause_all() */ | ||
108 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
109 | + } else { | ||
110 | + g_assert_cmpint(job->pause_count, ==, 1); | ||
111 | + } | ||
112 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
113 | + /* g_assert_true(job->paused); */ | ||
114 | + g_assert_false(job->busy); /* The job is paused */ | ||
115 | + | ||
116 | + do_drain_end(drain_type, target); | ||
117 | + | ||
118 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
119 | + g_assert_false(job->paused); | ||
120 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
121 | + | ||
122 | + ret = block_job_complete_sync(job, &error_abort); | ||
123 | + g_assert_cmpint(ret, ==, 0); | ||
124 | + | ||
125 | + blk_unref(blk_src); | ||
126 | + blk_unref(blk_target); | ||
127 | + bdrv_unref(src); | ||
128 | + bdrv_unref(target); | ||
129 | +} | 129 | +} |
130 | + | 130 | + |
131 | +/* Resume request handling */ | 131 | +static void test_blockjob_drain_all(void) |
132 | +static void xen_block_drained_end(void *opaque) | ||
133 | +{ | 132 | +{ |
134 | + XenBlockDevice *blockdev = opaque; | 133 | + test_blockjob_common(BDRV_DRAIN_ALL); |
135 | + | ||
136 | + xen_block_dataplane_attach(blockdev->dataplane); | ||
137 | +} | 134 | +} |
138 | + | 135 | + |
139 | static const BlockDevOps xen_block_dev_ops = { | 136 | +static void test_blockjob_drain(void) |
140 | - .resize_cb = xen_block_resize_cb, | 137 | +{ |
141 | + .resize_cb = xen_block_resize_cb, | 138 | + test_blockjob_common(BDRV_DRAIN); |
142 | + .drained_begin = xen_block_drained_begin, | 139 | +} |
143 | + .drained_end = xen_block_drained_end, | ||
144 | }; | ||
145 | |||
146 | static void xen_block_realize(XenDevice *xendev, Error **errp) | ||
147 | @@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp) | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | - blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev); | ||
152 | - | ||
153 | if (conf->discard_granularity == -1) { | ||
154 | conf->discard_granularity = conf->physical_block_size; | ||
155 | } | ||
156 | @@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp) | ||
157 | blockdev->dataplane = | ||
158 | xen_block_dataplane_create(xendev, blk, conf->logical_block_size, | ||
159 | blockdev->props.iothread); | ||
160 | + | 140 | + |
161 | + blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev); | 141 | int main(int argc, char **argv) |
142 | { | ||
143 | bdrv_init(); | ||
144 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
145 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
146 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
147 | |||
148 | + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
149 | + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
150 | + | ||
151 | return g_test_run(); | ||
162 | } | 152 | } |
163 | |||
164 | static void xen_block_frontend_changed(XenDevice *xendev, | ||
165 | diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/hw/xen/xen-bus.c | ||
168 | +++ b/hw/xen/xen-bus.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev, | ||
170 | NULL, NULL, NULL, NULL, NULL); | ||
171 | |||
172 | channel->ctx = ctx; | ||
173 | - aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, | ||
174 | - xen_device_event, NULL, xen_device_poll, NULL, channel); | ||
175 | + if (ctx) { | ||
176 | + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), | ||
177 | + true, xen_device_event, NULL, xen_device_poll, NULL, | ||
178 | + channel); | ||
179 | + } | ||
180 | } | ||
181 | |||
182 | XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, | ||
183 | -- | 153 | -- |
184 | 2.40.1 | 154 | 2.13.6 |
155 | |||
156 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Block jobs are already paused using the BdrvChildRole drain callbacks, |
---|---|---|---|
2 | so we don't need an additional block_job_pause_all() call. | ||
2 | 3 | ||
3 | Host notifiers can now use is_external=false since virtio-blk and | ||
4 | virtio-scsi no longer rely on is_external=true for drained sections. | ||
5 | |||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-Id: <20230516190238.8401-20-stefanha@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 5 | --- |
10 | hw/virtio/virtio.c | 6 +++--- | 6 | block/io.c | 4 ---- |
11 | 1 file changed, 3 insertions(+), 3 deletions(-) | 7 | tests/test-bdrv-drain.c | 10 ++++------ |
8 | 2 files changed, 4 insertions(+), 10 deletions(-) | ||
12 | 9 | ||
13 | diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c | 10 | diff --git a/block/io.c b/block/io.c |
14 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/hw/virtio/virtio.c | 12 | --- a/block/io.c |
16 | +++ b/hw/virtio/virtio.c | 13 | +++ b/block/io.c |
17 | @@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
18 | 15 | * context. */ | |
19 | void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) | 16 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); |
20 | { | 17 | |
21 | - aio_set_event_notifier(ctx, &vq->host_notifier, true, | 18 | - block_job_pause_all(); |
22 | + aio_set_event_notifier(ctx, &vq->host_notifier, false, | 19 | - |
23 | virtio_queue_host_notifier_read, | 20 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
24 | virtio_queue_host_notifier_aio_poll, | 21 | AioContext *aio_context = bdrv_get_aio_context(bs); |
25 | virtio_queue_host_notifier_aio_poll_ready); | 22 | |
26 | @@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) | 23 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
27 | */ | 24 | aio_enable_external(aio_context); |
28 | void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) | 25 | aio_context_release(aio_context); |
29 | { | 26 | } |
30 | - aio_set_event_notifier(ctx, &vq->host_notifier, true, | 27 | - |
31 | + aio_set_event_notifier(ctx, &vq->host_notifier, false, | 28 | - block_job_resume_all(); |
32 | virtio_queue_host_notifier_read, | ||
33 | NULL, NULL); | ||
34 | } | 29 | } |
35 | 30 | ||
36 | void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) | 31 | void bdrv_drain_all(void) |
37 | { | 32 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
38 | - aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); | 33 | index XXXXXXX..XXXXXXX 100644 |
39 | + aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL); | 34 | --- a/tests/test-bdrv-drain.c |
40 | } | 35 | +++ b/tests/test-bdrv-drain.c |
41 | 36 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | |
42 | void virtio_queue_host_notifier_read(EventNotifier *n) | 37 | do_drain_begin(drain_type, src); |
38 | |||
39 | if (drain_type == BDRV_DRAIN_ALL) { | ||
40 | - /* bdrv_drain_all() drains both src and target, and involves an | ||
41 | - * additional block_job_pause_all() */ | ||
42 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
43 | + /* bdrv_drain_all() drains both src and target */ | ||
44 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
45 | } else { | ||
46 | g_assert_cmpint(job->pause_count, ==, 1); | ||
47 | } | ||
48 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
49 | do_drain_begin(drain_type, target); | ||
50 | |||
51 | if (drain_type == BDRV_DRAIN_ALL) { | ||
52 | - /* bdrv_drain_all() drains both src and target, and involves an | ||
53 | - * additional block_job_pause_all() */ | ||
54 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
55 | + /* bdrv_drain_all() drains both src and target */ | ||
56 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
57 | } else { | ||
58 | g_assert_cmpint(job->pause_count, ==, 1); | ||
59 | } | ||
43 | -- | 60 | -- |
44 | 2.40.1 | 61 | 2.13.6 |
62 | |||
63 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | bdrv_do_drained_begin() restricts the call of parent callbacks and |
---|---|---|---|
2 | aio_disable_external() to the outermost drain section, but the block | ||
3 | driver callbacks are always called. bdrv_do_drained_end() must match | ||
4 | this behaviour, otherwise nodes stay drained even if begin/end calls | ||
5 | were balanced. | ||
2 | 6 | ||
3 | For simplicity, always run BlockDevOps .drained_begin/end/poll() | ||
4 | callbacks in the main loop thread. This makes it easier to implement the | ||
5 | callbacks and avoids extra locks. | ||
6 | |||
7 | Move the function pointer declarations from the I/O Code section to the | ||
8 | Global State section for BlockDevOps, BdrvChildClass, and BlockDriver. | ||
9 | |||
10 | Narrow IO_OR_GS_CODE() to GLOBAL_STATE_CODE() where appropriate. | ||
11 | |||
12 | The test-bdrv-drain test case calls bdrv_drain() from an IOThread. This | ||
13 | is now only allowed from coroutine context, so update the test case to | ||
14 | run in a coroutine. | ||
15 | |||
16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | Message-Id: <20230516190238.8401-11-stefanha@redhat.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 8 | --- |
21 | include/block/block_int-common.h | 72 +++++++++++++-------------- | 9 | block/io.c | 12 +++++++----- |
22 | include/sysemu/block-backend-common.h | 25 +++++----- | 10 | 1 file changed, 7 insertions(+), 5 deletions(-) |
23 | block/io.c | 14 ++++-- | ||
24 | tests/unit/test-bdrv-drain.c | 14 +++--- | ||
25 | 4 files changed, 67 insertions(+), 58 deletions(-) | ||
26 | 11 | ||
27 | diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/block/block_int-common.h | ||
30 | +++ b/include/block/block_int-common.h | ||
31 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
32 | void (*bdrv_attach_aio_context)(BlockDriverState *bs, | ||
33 | AioContext *new_context); | ||
34 | |||
35 | + /** | ||
36 | + * bdrv_drain_begin is called if implemented in the beginning of a | ||
37 | + * drain operation to drain and stop any internal sources of requests in | ||
38 | + * the driver. | ||
39 | + * bdrv_drain_end is called if implemented at the end of the drain. | ||
40 | + * | ||
41 | + * They should be used by the driver to e.g. manage scheduled I/O | ||
42 | + * requests, or toggle an internal state. After the end of the drain new | ||
43 | + * requests will continue normally. | ||
44 | + * | ||
45 | + * Implementations of both functions must not call aio_poll(). | ||
46 | + */ | ||
47 | + void (*bdrv_drain_begin)(BlockDriverState *bs); | ||
48 | + void (*bdrv_drain_end)(BlockDriverState *bs); | ||
49 | + | ||
50 | /** | ||
51 | * Try to get @bs's logical and physical block size. | ||
52 | * On success, store them in @bsz and return zero. | ||
53 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
54 | void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)( | ||
55 | BlockDriverState *bs); | ||
56 | |||
57 | - /** | ||
58 | - * bdrv_drain_begin is called if implemented in the beginning of a | ||
59 | - * drain operation to drain and stop any internal sources of requests in | ||
60 | - * the driver. | ||
61 | - * bdrv_drain_end is called if implemented at the end of the drain. | ||
62 | - * | ||
63 | - * They should be used by the driver to e.g. manage scheduled I/O | ||
64 | - * requests, or toggle an internal state. After the end of the drain new | ||
65 | - * requests will continue normally. | ||
66 | - * | ||
67 | - * Implementations of both functions must not call aio_poll(). | ||
68 | - */ | ||
69 | - void (*bdrv_drain_begin)(BlockDriverState *bs); | ||
70 | - void (*bdrv_drain_end)(BlockDriverState *bs); | ||
71 | - | ||
72 | bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); | ||
73 | |||
74 | bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)( | ||
75 | @@ -XXX,XX +XXX,XX @@ struct BdrvChildClass { | ||
76 | void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child); | ||
77 | void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child); | ||
78 | |||
79 | + /* | ||
80 | + * If this pair of functions is implemented, the parent doesn't issue new | ||
81 | + * requests after returning from .drained_begin() until .drained_end() is | ||
82 | + * called. | ||
83 | + * | ||
84 | + * These functions must not change the graph (and therefore also must not | ||
85 | + * call aio_poll(), which could change the graph indirectly). | ||
86 | + * | ||
87 | + * Note that this can be nested. If drained_begin() was called twice, new | ||
88 | + * I/O is allowed only after drained_end() was called twice, too. | ||
89 | + */ | ||
90 | + void (*drained_begin)(BdrvChild *child); | ||
91 | + void (*drained_end)(BdrvChild *child); | ||
92 | + | ||
93 | + /* | ||
94 | + * Returns whether the parent has pending requests for the child. This | ||
95 | + * callback is polled after .drained_begin() has been called until all | ||
96 | + * activity on the child has stopped. | ||
97 | + */ | ||
98 | + bool (*drained_poll)(BdrvChild *child); | ||
99 | + | ||
100 | /* | ||
101 | * Notifies the parent that the filename of its child has changed (e.g. | ||
102 | * because the direct child was removed from the backing chain), so that it | ||
103 | @@ -XXX,XX +XXX,XX @@ struct BdrvChildClass { | ||
104 | const char *(*get_name)(BdrvChild *child); | ||
105 | |||
106 | AioContext *(*get_parent_aio_context)(BdrvChild *child); | ||
107 | - | ||
108 | - /* | ||
109 | - * If this pair of functions is implemented, the parent doesn't issue new | ||
110 | - * requests after returning from .drained_begin() until .drained_end() is | ||
111 | - * called. | ||
112 | - * | ||
113 | - * These functions must not change the graph (and therefore also must not | ||
114 | - * call aio_poll(), which could change the graph indirectly). | ||
115 | - * | ||
116 | - * Note that this can be nested. If drained_begin() was called twice, new | ||
117 | - * I/O is allowed only after drained_end() was called twice, too. | ||
118 | - */ | ||
119 | - void (*drained_begin)(BdrvChild *child); | ||
120 | - void (*drained_end)(BdrvChild *child); | ||
121 | - | ||
122 | - /* | ||
123 | - * Returns whether the parent has pending requests for the child. This | ||
124 | - * callback is polled after .drained_begin() has been called until all | ||
125 | - * activity on the child has stopped. | ||
126 | - */ | ||
127 | - bool (*drained_poll)(BdrvChild *child); | ||
128 | }; | ||
129 | |||
130 | extern const BdrvChildClass child_of_bds; | ||
131 | diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h | ||
132 | index XXXXXXX..XXXXXXX 100644 | ||
133 | --- a/include/sysemu/block-backend-common.h | ||
134 | +++ b/include/sysemu/block-backend-common.h | ||
135 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { | ||
136 | */ | ||
137 | bool (*is_medium_locked)(void *opaque); | ||
138 | |||
139 | + /* | ||
140 | + * Runs when the backend receives a drain request. | ||
141 | + */ | ||
142 | + void (*drained_begin)(void *opaque); | ||
143 | + /* | ||
144 | + * Runs when the backend's last drain request ends. | ||
145 | + */ | ||
146 | + void (*drained_end)(void *opaque); | ||
147 | + /* | ||
148 | + * Is the device still busy? | ||
149 | + */ | ||
150 | + bool (*drained_poll)(void *opaque); | ||
151 | + | ||
152 | /* | ||
153 | * I/O API functions. These functions are thread-safe. | ||
154 | * | ||
155 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { | ||
156 | * Runs when the size changed (e.g. monitor command block_resize) | ||
157 | */ | ||
158 | void (*resize_cb)(void *opaque); | ||
159 | - /* | ||
160 | - * Runs when the backend receives a drain request. | ||
161 | - */ | ||
162 | - void (*drained_begin)(void *opaque); | ||
163 | - /* | ||
164 | - * Runs when the backend's last drain request ends. | ||
165 | - */ | ||
166 | - void (*drained_end)(void *opaque); | ||
167 | - /* | ||
168 | - * Is the device still busy? | ||
169 | - */ | ||
170 | - bool (*drained_poll)(void *opaque); | ||
171 | } BlockDevOps; | ||
172 | |||
173 | /* | ||
174 | diff --git a/block/io.c b/block/io.c | 12 | diff --git a/block/io.c b/block/io.c |
175 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
176 | --- a/block/io.c | 14 | --- a/block/io.c |
177 | +++ b/block/io.c | 15 | +++ b/block/io.c |
178 | @@ -XXX,XX +XXX,XX @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) | 16 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
179 | 17 | ||
180 | void bdrv_parent_drained_end_single(BdrvChild *c) | 18 | void bdrv_drained_end(BlockDriverState *bs) |
181 | { | 19 | { |
182 | - IO_OR_GS_CODE(); | 20 | + int old_quiesce_counter; |
183 | + GLOBAL_STATE_CODE(); | ||
184 | |||
185 | assert(c->quiesced_parent); | ||
186 | c->quiesced_parent = false; | ||
187 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, | ||
188 | |||
189 | void bdrv_parent_drained_begin_single(BdrvChild *c) | ||
190 | { | ||
191 | - IO_OR_GS_CODE(); | ||
192 | + GLOBAL_STATE_CODE(); | ||
193 | |||
194 | assert(!c->quiesced_parent); | ||
195 | c->quiesced_parent = true; | ||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
197 | bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, | ||
198 | bool ignore_bds_parents) | ||
199 | { | ||
200 | - IO_OR_GS_CODE(); | ||
201 | + GLOBAL_STATE_CODE(); | ||
202 | |||
203 | if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { | ||
204 | return true; | ||
205 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
206 | if (ctx != co_ctx) { | ||
207 | aio_context_release(ctx); | ||
208 | } | ||
209 | - replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data); | ||
210 | + replay_bh_schedule_oneshot_event(qemu_get_aio_context(), | ||
211 | + bdrv_co_drain_bh_cb, &data); | ||
212 | |||
213 | qemu_coroutine_yield(); | ||
214 | /* If we are resumed from some other event (such as an aio completion or a | ||
215 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | + GLOBAL_STATE_CODE(); | ||
220 | + | ||
221 | /* Stop things in parent-to-child order */ | ||
222 | if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
223 | aio_disable_external(bdrv_get_aio_context(bs)); | ||
224 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
225 | { | ||
226 | int old_quiesce_counter; | ||
227 | |||
228 | + IO_OR_GS_CODE(); | ||
229 | + | 21 | + |
230 | if (qemu_in_coroutine()) { | 22 | if (qemu_in_coroutine()) { |
231 | bdrv_co_yield_to_drain(bs, false, parent, false); | 23 | bdrv_co_yield_to_drain(bs, false); |
232 | return; | 24 | return; |
233 | } | 25 | } |
234 | assert(bs->quiesce_counter > 0); | 26 | assert(bs->quiesce_counter > 0); |
235 | + GLOBAL_STATE_CODE(); | 27 | - if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { |
28 | - return; | ||
29 | - } | ||
30 | + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); | ||
236 | 31 | ||
237 | /* Re-enable things in child-to-parent order */ | 32 | /* Re-enable things in child-to-parent order */ |
238 | old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); | 33 | bdrv_drain_invoke(bs, false, false); |
239 | diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c | 34 | - bdrv_parent_drained_end(bs); |
240 | index XXXXXXX..XXXXXXX 100644 | 35 | - aio_enable_external(bdrv_get_aio_context(bs)); |
241 | --- a/tests/unit/test-bdrv-drain.c | 36 | + if (old_quiesce_counter == 1) { |
242 | +++ b/tests/unit/test-bdrv-drain.c | 37 | + bdrv_parent_drained_end(bs); |
243 | @@ -XXX,XX +XXX,XX @@ struct test_iothread_data { | 38 | + aio_enable_external(bdrv_get_aio_context(bs)); |
244 | BlockDriverState *bs; | 39 | + } |
245 | enum drain_type drain_type; | ||
246 | int *aio_ret; | ||
247 | + bool co_done; | ||
248 | }; | ||
249 | |||
250 | -static void test_iothread_drain_entry(void *opaque) | ||
251 | +static void coroutine_fn test_iothread_drain_co_entry(void *opaque) | ||
252 | { | ||
253 | struct test_iothread_data *data = opaque; | ||
254 | |||
255 | - aio_context_acquire(bdrv_get_aio_context(data->bs)); | ||
256 | do_drain_begin(data->drain_type, data->bs); | ||
257 | g_assert_cmpint(*data->aio_ret, ==, 0); | ||
258 | do_drain_end(data->drain_type, data->bs); | ||
259 | - aio_context_release(bdrv_get_aio_context(data->bs)); | ||
260 | |||
261 | - qemu_event_set(&done_event); | ||
262 | + data->co_done = true; | ||
263 | + aio_wait_kick(); | ||
264 | } | 40 | } |
265 | 41 | ||
266 | static void test_iothread_aio_cb(void *opaque, int ret) | 42 | /* |
267 | @@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) | ||
268 | BlockDriverState *bs; | ||
269 | BDRVTestState *s; | ||
270 | BlockAIOCB *acb; | ||
271 | + Coroutine *co; | ||
272 | int aio_ret; | ||
273 | struct test_iothread_data data; | ||
274 | |||
275 | @@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) | ||
276 | } | ||
277 | break; | ||
278 | case 1: | ||
279 | - aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); | ||
280 | - qemu_event_wait(&done_event); | ||
281 | + co = qemu_coroutine_create(test_iothread_drain_co_entry, &data); | ||
282 | + aio_co_enter(ctx_a, co); | ||
283 | + AIO_WAIT_WHILE_UNLOCKED(NULL, !data.co_done); | ||
284 | break; | ||
285 | default: | ||
286 | g_assert_not_reached(); | ||
287 | -- | 43 | -- |
288 | 2.40.1 | 44 | 2.13.6 |
45 | |||
46 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Add a helper function to check whether the device is realized without | ||
4 | requiring the Big QEMU Lock. The next patch adds a second caller. The | ||
5 | goal is to avoid spreading DeviceState field accesses throughout the | ||
6 | code. | ||
7 | |||
8 | Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Message-Id: <20230516190238.8401-3-stefanha@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 2 | --- |
15 | include/hw/qdev-core.h | 17 ++++++++++++++--- | 3 | tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ |
16 | hw/scsi/scsi-bus.c | 3 +-- | 4 | 1 file changed, 57 insertions(+) |
17 | 2 files changed, 15 insertions(+), 5 deletions(-) | ||
18 | 5 | ||
19 | diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
20 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/include/hw/qdev-core.h | 8 | --- a/tests/test-bdrv-drain.c |
22 | +++ b/include/hw/qdev-core.h | 9 | +++ b/tests/test-bdrv-drain.c |
23 | @@ -XXX,XX +XXX,XX @@ | 10 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
24 | #ifndef QDEV_CORE_H | 11 | enum drain_type { |
25 | #define QDEV_CORE_H | 12 | BDRV_DRAIN_ALL, |
26 | 13 | BDRV_DRAIN, | |
27 | +#include "qemu/atomic.h" | 14 | + DRAIN_TYPE_MAX, |
28 | #include "qemu/queue.h" | 15 | }; |
29 | #include "qemu/bitmap.h" | 16 | |
30 | #include "qemu/rcu.h" | 17 | static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
31 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 18 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
32 | 19 | test_quiesce_common(BDRV_DRAIN, false); | |
33 | /** | 20 | } |
34 | * DeviceState: | 21 | |
35 | - * @realized: Indicates whether the device has been fully constructed. | 22 | +static void test_nested(void) |
36 | - * When accessed outside big qemu lock, must be accessed with | ||
37 | - * qatomic_load_acquire() | ||
38 | * @reset: ResettableState for the device; handled by Resettable interface. | ||
39 | * | ||
40 | * This structure should not be accessed directly. We declare it here | ||
41 | @@ -XXX,XX +XXX,XX @@ DeviceState *qdev_new(const char *name); | ||
42 | */ | ||
43 | DeviceState *qdev_try_new(const char *name); | ||
44 | |||
45 | +/** | ||
46 | + * qdev_is_realized: | ||
47 | + * @dev: The device to check. | ||
48 | + * | ||
49 | + * May be called outside big qemu lock. | ||
50 | + * | ||
51 | + * Returns: %true% if the device has been fully constructed, %false% otherwise. | ||
52 | + */ | ||
53 | +static inline bool qdev_is_realized(DeviceState *dev) | ||
54 | +{ | 23 | +{ |
55 | + return qatomic_load_acquire(&dev->realized); | 24 | + BlockBackend *blk; |
25 | + BlockDriverState *bs, *backing; | ||
26 | + BDRVTestState *s, *backing_s; | ||
27 | + enum drain_type outer, inner; | ||
28 | + | ||
29 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
30 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
31 | + &error_abort); | ||
32 | + s = bs->opaque; | ||
33 | + blk_insert_bs(blk, bs, &error_abort); | ||
34 | + | ||
35 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
36 | + backing_s = backing->opaque; | ||
37 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
38 | + | ||
39 | + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { | ||
40 | + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { | ||
41 | + /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
42 | + int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
43 | + (inner != BDRV_DRAIN_ALL); | ||
44 | + int backing_quiesce = 0; | ||
45 | + int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
46 | + (inner != BDRV_DRAIN); | ||
47 | + | ||
48 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
49 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
50 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
51 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
52 | + | ||
53 | + do_drain_begin(outer, bs); | ||
54 | + do_drain_begin(inner, bs); | ||
55 | + | ||
56 | + g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); | ||
57 | + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); | ||
58 | + g_assert_cmpint(s->drain_count, ==, 2); | ||
59 | + g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); | ||
60 | + | ||
61 | + do_drain_end(inner, bs); | ||
62 | + do_drain_end(outer, bs); | ||
63 | + | ||
64 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
65 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
66 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
67 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
68 | + } | ||
69 | + } | ||
70 | + | ||
71 | + bdrv_unref(backing); | ||
72 | + bdrv_unref(bs); | ||
73 | + blk_unref(blk); | ||
56 | +} | 74 | +} |
57 | + | 75 | + |
58 | /** | 76 | |
59 | * qdev_realize: Realize @dev. | 77 | typedef struct TestBlockJob { |
60 | * @dev: device to realize | 78 | BlockJob common; |
61 | diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c | 79 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
62 | index XXXXXXX..XXXXXXX 100644 | 80 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
63 | --- a/hw/scsi/scsi-bus.c | 81 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
64 | +++ b/hw/scsi/scsi-bus.c | 82 | |
65 | @@ -XXX,XX +XXX,XX @@ static SCSIDevice *do_scsi_device_find(SCSIBus *bus, | 83 | + g_test_add_func("/bdrv-drain/nested", test_nested); |
66 | * the user access the device. | 84 | + |
67 | */ | 85 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
68 | 86 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | |
69 | - if (retval && !include_unrealized && | ||
70 | - !qatomic_load_acquire(&retval->qdev.realized)) { | ||
71 | + if (retval && !include_unrealized && !qdev_is_realized(&retval->qdev)) { | ||
72 | retval = NULL; | ||
73 | } | ||
74 | 87 | ||
75 | -- | 88 | -- |
76 | 2.40.1 | 89 | 2.13.6 |
77 | 90 | ||
78 | 91 | diff view generated by jsdifflib |
1 | The function documentation already says that all callers must hold the | 1 | This is in preparation for subtree drains, i.e. drained sections that |
---|---|---|---|
2 | main AioContext lock, but not all of them do. This can cause assertion | 2 | affect not only a single node, but recursively all child nodes, too. |
3 | failures when functions called by bdrv_open() try to drop the lock. Fix | 3 | |
4 | a few more callers to take the lock before calling bdrv_open(). | 4 | Calling the parent callbacks for drain is pointless when we just came |
5 | 5 | from that parent node recursively and leads to multiple increases of | |
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 6 | bs->quiesce_counter in a single drain call. Don't do it. |
7 | Message-Id: <20230525124713.401149-4-kwolf@redhat.com> | 7 | |
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | In order for this to work correctly, the parent callback must be called |
9 | for every bdrv_drain_begin/end() call, not only for the outermost one: | ||
10 | |||
11 | If we have a node N with two parents A and B, recursive draining of A | ||
12 | should cause the quiesce_counter of B to increase because its child N is | ||
13 | drained independently of B. If now B is recursively drained, too, A must | ||
14 | increase its quiesce_counter because N is drained independently of A | ||
15 | only now, even if N is going from quiesce_counter 1 to 2. | ||
16 | |||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 18 | --- |
11 | block.c | 3 +++ | 19 | include/block/block.h | 4 ++-- |
12 | block/block-backend.c | 2 ++ | 20 | block.c | 13 +++++++++---- |
13 | block/qapi-sysemu.c | 3 +++ | 21 | block/io.c | 47 ++++++++++++++++++++++++++++++++++------------- |
14 | blockdev.c | 29 +++++++++++++++++++++++------ | 22 | 3 files changed, 45 insertions(+), 19 deletions(-) |
15 | qemu-nbd.c | 4 ++++ | 23 | |
16 | tests/unit/test-block-iothread.c | 3 +++ | 24 | diff --git a/include/block/block.h b/include/block/block.h |
17 | 6 files changed, 38 insertions(+), 6 deletions(-) | 25 | index XXXXXXX..XXXXXXX 100644 |
18 | 26 | --- a/include/block/block.h | |
27 | +++ b/include/block/block.h | ||
28 | @@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs); | ||
29 | * Begin a quiesced section of all users of @bs. This is part of | ||
30 | * bdrv_drained_begin. | ||
31 | */ | ||
32 | -void bdrv_parent_drained_begin(BlockDriverState *bs); | ||
33 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); | ||
34 | |||
35 | /** | ||
36 | * bdrv_parent_drained_end: | ||
37 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs); | ||
38 | * End a quiesced section of all users of @bs. This is part of | ||
39 | * bdrv_drained_end. | ||
40 | */ | ||
41 | -void bdrv_parent_drained_end(BlockDriverState *bs); | ||
42 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); | ||
43 | |||
44 | /** | ||
45 | * bdrv_drained_begin: | ||
19 | diff --git a/block.c b/block.c | 46 | diff --git a/block.c b/block.c |
20 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block.c | 48 | --- a/block.c |
22 | +++ b/block.c | 49 | +++ b/block.c |
23 | @@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt, | 50 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
51 | BlockDriverState *new_bs) | ||
52 | { | ||
53 | BlockDriverState *old_bs = child->bs; | ||
54 | + int i; | ||
55 | |||
56 | if (old_bs && new_bs) { | ||
57 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | ||
58 | } | ||
59 | if (old_bs) { | ||
60 | if (old_bs->quiesce_counter && child->role->drained_end) { | ||
61 | - child->role->drained_end(child); | ||
62 | + for (i = 0; i < old_bs->quiesce_counter; i++) { | ||
63 | + child->role->drained_end(child); | ||
64 | + } | ||
65 | } | ||
66 | if (child->role->detach) { | ||
67 | child->role->detach(child); | ||
68 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
69 | if (new_bs) { | ||
70 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | ||
71 | if (new_bs->quiesce_counter && child->role->drained_begin) { | ||
72 | - child->role->drained_begin(child); | ||
73 | + for (i = 0; i < new_bs->quiesce_counter; i++) { | ||
74 | + child->role->drained_begin(child); | ||
75 | + } | ||
76 | } | ||
77 | |||
78 | if (child->role->attach) { | ||
79 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
80 | AioContext *ctx = bdrv_get_aio_context(bs); | ||
81 | |||
82 | aio_disable_external(ctx); | ||
83 | - bdrv_parent_drained_begin(bs); | ||
84 | + bdrv_parent_drained_begin(bs, NULL); | ||
85 | bdrv_drain(bs); /* ensure there are no in-flight requests */ | ||
86 | |||
87 | while (aio_poll(ctx, false)) { | ||
88 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
89 | */ | ||
90 | aio_context_acquire(new_context); | ||
91 | bdrv_attach_aio_context(bs, new_context); | ||
92 | - bdrv_parent_drained_end(bs); | ||
93 | + bdrv_parent_drained_end(bs, NULL); | ||
94 | aio_enable_external(ctx); | ||
95 | aio_context_release(new_context); | ||
96 | } | ||
97 | diff --git a/block/io.c b/block/io.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/block/io.c | ||
100 | +++ b/block/io.c | ||
101 | @@ -XXX,XX +XXX,XX @@ | ||
102 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
103 | int64_t offset, int bytes, BdrvRequestFlags flags); | ||
104 | |||
105 | -void bdrv_parent_drained_begin(BlockDriverState *bs) | ||
106 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) | ||
107 | { | ||
108 | BdrvChild *c, *next; | ||
109 | |||
110 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
111 | + if (c == ignore) { | ||
112 | + continue; | ||
113 | + } | ||
114 | if (c->role->drained_begin) { | ||
115 | c->role->drained_begin(c); | ||
116 | } | ||
117 | } | ||
118 | } | ||
119 | |||
120 | -void bdrv_parent_drained_end(BlockDriverState *bs) | ||
121 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) | ||
122 | { | ||
123 | BdrvChild *c, *next; | ||
124 | |||
125 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
126 | + if (c == ignore) { | ||
127 | + continue; | ||
128 | + } | ||
129 | if (c->role->drained_end) { | ||
130 | c->role->drained_end(c); | ||
131 | } | ||
132 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
133 | BlockDriverState *bs; | ||
134 | bool done; | ||
135 | bool begin; | ||
136 | + BdrvChild *parent; | ||
137 | } BdrvCoDrainData; | ||
138 | |||
139 | static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
141 | return waited; | ||
142 | } | ||
143 | |||
144 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); | ||
145 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); | ||
146 | + | ||
147 | static void bdrv_co_drain_bh_cb(void *opaque) | ||
148 | { | ||
149 | BdrvCoDrainData *data = opaque; | ||
150 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
151 | |||
152 | bdrv_dec_in_flight(bs); | ||
153 | if (data->begin) { | ||
154 | - bdrv_drained_begin(bs); | ||
155 | + bdrv_do_drained_begin(bs, data->parent); | ||
156 | } else { | ||
157 | - bdrv_drained_end(bs); | ||
158 | + bdrv_do_drained_end(bs, data->parent); | ||
159 | } | ||
160 | |||
161 | data->done = true; | ||
162 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
163 | } | ||
164 | |||
165 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
166 | - bool begin) | ||
167 | + bool begin, BdrvChild *parent) | ||
168 | { | ||
169 | BdrvCoDrainData data; | ||
170 | |||
171 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
172 | .bs = bs, | ||
173 | .done = false, | ||
174 | .begin = begin, | ||
175 | + .parent = parent, | ||
176 | }; | ||
177 | bdrv_inc_in_flight(bs); | ||
178 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | ||
179 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
180 | assert(data.done); | ||
181 | } | ||
182 | |||
183 | -void bdrv_drained_begin(BlockDriverState *bs) | ||
184 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
185 | { | ||
186 | if (qemu_in_coroutine()) { | ||
187 | - bdrv_co_yield_to_drain(bs, true); | ||
188 | + bdrv_co_yield_to_drain(bs, true, parent); | ||
24 | return; | 189 | return; |
25 | } | 190 | } |
26 | 191 | ||
27 | + aio_context_acquire(qemu_get_aio_context()); | 192 | /* Stop things in parent-to-child order */ |
193 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
194 | aio_disable_external(bdrv_get_aio_context(bs)); | ||
195 | - bdrv_parent_drained_begin(bs); | ||
196 | } | ||
197 | |||
198 | + bdrv_parent_drained_begin(bs, parent); | ||
199 | bdrv_drain_invoke(bs, true, false); | ||
200 | bdrv_drain_recurse(bs); | ||
201 | } | ||
202 | |||
203 | -void bdrv_drained_end(BlockDriverState *bs) | ||
204 | +void bdrv_drained_begin(BlockDriverState *bs) | ||
205 | +{ | ||
206 | + bdrv_do_drained_begin(bs, NULL); | ||
207 | +} | ||
28 | + | 208 | + |
29 | /* Create parameter list */ | 209 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
30 | create_opts = qemu_opts_append(create_opts, drv->create_opts); | 210 | { |
31 | create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); | 211 | int old_quiesce_counter; |
32 | @@ -XXX,XX +XXX,XX @@ out: | 212 | |
33 | qemu_opts_del(opts); | 213 | if (qemu_in_coroutine()) { |
34 | qemu_opts_free(create_opts); | 214 | - bdrv_co_yield_to_drain(bs, false); |
35 | error_propagate(errp, local_err); | 215 | + bdrv_co_yield_to_drain(bs, false, parent); |
36 | + aio_context_release(qemu_get_aio_context()); | 216 | return; |
37 | } | 217 | } |
38 | 218 | assert(bs->quiesce_counter > 0); | |
39 | AioContext *bdrv_get_aio_context(BlockDriverState *bs) | 219 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
40 | diff --git a/block/block-backend.c b/block/block-backend.c | 220 | |
41 | index XXXXXXX..XXXXXXX 100644 | 221 | /* Re-enable things in child-to-parent order */ |
42 | --- a/block/block-backend.c | 222 | bdrv_drain_invoke(bs, false, false); |
43 | +++ b/block/block-backend.c | 223 | + bdrv_parent_drained_end(bs, parent); |
44 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference, | 224 | if (old_quiesce_counter == 1) { |
45 | } | 225 | - bdrv_parent_drained_end(bs); |
46 | 226 | aio_enable_external(bdrv_get_aio_context(bs)); | |
47 | blk = blk_new(qemu_get_aio_context(), perm, shared); | 227 | } |
48 | + aio_context_acquire(qemu_get_aio_context()); | 228 | } |
49 | bs = bdrv_open(filename, reference, options, flags, errp); | 229 | |
50 | + aio_context_release(qemu_get_aio_context()); | 230 | +void bdrv_drained_end(BlockDriverState *bs) |
51 | if (!bs) { | 231 | +{ |
52 | blk_unref(blk); | 232 | + bdrv_do_drained_end(bs, NULL); |
53 | return NULL; | 233 | +} |
54 | diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/block/qapi-sysemu.c | ||
57 | +++ b/block/qapi-sysemu.c | ||
58 | @@ -XXX,XX +XXX,XX @@ void qmp_blockdev_change_medium(const char *device, | ||
59 | qdict_put_str(options, "driver", format); | ||
60 | } | ||
61 | |||
62 | + aio_context_acquire(qemu_get_aio_context()); | ||
63 | medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); | ||
64 | + aio_context_release(qemu_get_aio_context()); | ||
65 | + | 234 | + |
66 | if (!medium_bs) { | 235 | /* |
67 | goto fail; | 236 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
68 | } | 237 | * and suspend block driver's internal I/O until next request arrives. |
69 | diff --git a/blockdev.c b/blockdev.c | 238 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
70 | index XXXXXXX..XXXXXXX 100644 | 239 | /* Stop things in parent-to-child order */ |
71 | --- a/blockdev.c | 240 | aio_context_acquire(aio_context); |
72 | +++ b/blockdev.c | 241 | aio_disable_external(aio_context); |
73 | @@ -XXX,XX +XXX,XX @@ err_no_opts: | 242 | - bdrv_parent_drained_begin(bs); |
74 | /* Takes the ownership of bs_opts */ | 243 | + bdrv_parent_drained_begin(bs, NULL); |
75 | BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) | 244 | bdrv_drain_invoke(bs, true, true); |
76 | { | 245 | aio_context_release(aio_context); |
77 | + BlockDriverState *bs; | 246 | |
78 | int bdrv_flags = 0; | 247 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
79 | 248 | /* Re-enable things in child-to-parent order */ | |
80 | GLOBAL_STATE_CODE(); | 249 | aio_context_acquire(aio_context); |
81 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) | 250 | bdrv_drain_invoke(bs, false, true); |
82 | bdrv_flags |= BDRV_O_INACTIVE; | 251 | - bdrv_parent_drained_end(bs); |
83 | } | 252 | + bdrv_parent_drained_end(bs, NULL); |
84 | 253 | aio_enable_external(aio_context); | |
85 | - return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); | 254 | aio_context_release(aio_context); |
86 | + aio_context_acquire(qemu_get_aio_context()); | 255 | } |
87 | + bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); | ||
88 | + aio_context_release(qemu_get_aio_context()); | ||
89 | + | ||
90 | + return bs; | ||
91 | } | ||
92 | |||
93 | void blockdev_close_all_bdrv_states(void) | ||
94 | @@ -XXX,XX +XXX,XX @@ static void external_snapshot_action(TransactionAction *action, | ||
95 | } | ||
96 | qdict_put_str(options, "driver", format); | ||
97 | } | ||
98 | + aio_context_release(aio_context); | ||
99 | |||
100 | + aio_context_acquire(qemu_get_aio_context()); | ||
101 | state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, | ||
102 | errp); | ||
103 | + aio_context_release(qemu_get_aio_context()); | ||
104 | + | ||
105 | /* We will manually add the backing_hd field to the bs later */ | ||
106 | if (!state->new_bs) { | ||
107 | - goto out; | ||
108 | + return; | ||
109 | } | ||
110 | |||
111 | + aio_context_acquire(aio_context); | ||
112 | + | ||
113 | /* | ||
114 | * Allow attaching a backing file to an overlay that's already in use only | ||
115 | * if the parents don't assume that they are already seeing a valid image. | ||
116 | @@ -XXX,XX +XXX,XX @@ static void drive_backup_action(DriveBackup *backup, | ||
117 | if (format) { | ||
118 | qdict_put_str(options, "driver", format); | ||
119 | } | ||
120 | + aio_context_release(aio_context); | ||
121 | |||
122 | + aio_context_acquire(qemu_get_aio_context()); | ||
123 | target_bs = bdrv_open(backup->target, NULL, options, flags, errp); | ||
124 | + aio_context_release(qemu_get_aio_context()); | ||
125 | + | ||
126 | if (!target_bs) { | ||
127 | - goto out; | ||
128 | + return; | ||
129 | } | ||
130 | |||
131 | /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ | ||
132 | old_context = bdrv_get_aio_context(target_bs); | ||
133 | - aio_context_release(aio_context); | ||
134 | aio_context_acquire(old_context); | ||
135 | |||
136 | ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); | ||
137 | @@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) | ||
138 | if (format) { | ||
139 | qdict_put_str(options, "driver", format); | ||
140 | } | ||
141 | + aio_context_release(aio_context); | ||
142 | |||
143 | /* Mirroring takes care of copy-on-write using the source's backing | ||
144 | * file. | ||
145 | */ | ||
146 | + aio_context_acquire(qemu_get_aio_context()); | ||
147 | target_bs = bdrv_open(arg->target, NULL, options, flags, errp); | ||
148 | + aio_context_release(qemu_get_aio_context()); | ||
149 | + | ||
150 | if (!target_bs) { | ||
151 | - goto out; | ||
152 | + return; | ||
153 | } | ||
154 | |||
155 | zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && | ||
156 | @@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) | ||
157 | |||
158 | /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ | ||
159 | old_context = bdrv_get_aio_context(target_bs); | ||
160 | - aio_context_release(aio_context); | ||
161 | aio_context_acquire(old_context); | ||
162 | |||
163 | ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); | ||
164 | diff --git a/qemu-nbd.c b/qemu-nbd.c | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/qemu-nbd.c | ||
167 | +++ b/qemu-nbd.c | ||
168 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
169 | qdict_put_str(raw_opts, "driver", "raw"); | ||
170 | qdict_put_str(raw_opts, "file", bs->node_name); | ||
171 | qdict_put_int(raw_opts, "offset", dev_offset); | ||
172 | + | ||
173 | + aio_context_acquire(qemu_get_aio_context()); | ||
174 | bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal); | ||
175 | + aio_context_release(qemu_get_aio_context()); | ||
176 | + | ||
177 | blk_remove_bs(blk); | ||
178 | blk_insert_bs(blk, bs, &error_fatal); | ||
179 | bdrv_unref(bs); | ||
180 | diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c | ||
181 | index XXXXXXX..XXXXXXX 100644 | ||
182 | --- a/tests/unit/test-block-iothread.c | ||
183 | +++ b/tests/unit/test-block-iothread.c | ||
184 | @@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void) | ||
185 | qdict_put_str(options, "driver", "raw"); | ||
186 | qdict_put_str(options, "file", "base"); | ||
187 | |||
188 | + /* FIXME raw_open() should take ctx's lock internally */ | ||
189 | aio_context_acquire(ctx); | ||
190 | + aio_context_acquire(main_ctx); | ||
191 | filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); | ||
192 | + aio_context_release(main_ctx); | ||
193 | aio_context_release(ctx); | ||
194 | |||
195 | g_assert(blk_get_aio_context(blk) == ctx); | ||
196 | -- | 256 | -- |
197 | 2.40.1 | 257 | 2.13.6 |
258 | |||
259 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | bdrv_drained_begin() waits for the completion of requests in the whole |
---|---|---|---|
2 | subtree, but it only actually keeps its immediate bs parameter quiesced | ||
3 | until bdrv_drained_end(). | ||
2 | 4 | ||
3 | The FUSE export calls blk_exp_ref/unref() without the AioContext lock. | 5 | Add a version that keeps the whole subtree drained. As of this commit, |
4 | Instead of fixing the FUSE export, adjust blk_exp_ref/unref() so they | 6 | graph changes cannot be allowed during a subtree drained section, but |
5 | work without the AioContext lock. This way it's less error-prone. | 7 | this will be fixed soon. |
6 | 8 | ||
7 | Suggested-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Message-Id: <20230516190238.8401-15-stefanha@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 10 | --- |
13 | include/block/export.h | 2 ++ | 11 | include/block/block.h | 13 +++++++++++++ |
14 | block/export/export.c | 13 ++++++------- | 12 | block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++----------- |
15 | block/export/vduse-blk.c | 4 ---- | 13 | 2 files changed, 56 insertions(+), 11 deletions(-) |
16 | 3 files changed, 8 insertions(+), 11 deletions(-) | ||
17 | 14 | ||
18 | diff --git a/include/block/export.h b/include/block/export.h | 15 | diff --git a/include/block/block.h b/include/block/block.h |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/block/export.h | 17 | --- a/include/block/block.h |
21 | +++ b/include/block/export.h | 18 | +++ b/include/block/block.h |
22 | @@ -XXX,XX +XXX,XX @@ struct BlockExport { | 19 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
23 | * Reference count for this block export. This includes strong references | 20 | void bdrv_drained_begin(BlockDriverState *bs); |
24 | * both from the owner (qemu-nbd or the monitor) and clients connected to | 21 | |
25 | * the export. | 22 | /** |
26 | + * | 23 | + * Like bdrv_drained_begin, but recursively begins a quiesced section for |
27 | + * Use atomics to access this field. | 24 | + * exclusive access to all child nodes as well. |
28 | */ | 25 | + * |
29 | int refcount; | 26 | + * Graph changes are not allowed during a subtree drain section. |
30 | 27 | + */ | |
31 | diff --git a/block/export/export.c b/block/export/export.c | 28 | +void bdrv_subtree_drained_begin(BlockDriverState *bs); |
29 | + | ||
30 | +/** | ||
31 | * bdrv_drained_end: | ||
32 | * | ||
33 | * End a quiescent section started by bdrv_drained_begin(). | ||
34 | */ | ||
35 | void bdrv_drained_end(BlockDriverState *bs); | ||
36 | |||
37 | +/** | ||
38 | + * End a quiescent section started by bdrv_subtree_drained_begin(). | ||
39 | + */ | ||
40 | +void bdrv_subtree_drained_end(BlockDriverState *bs); | ||
41 | + | ||
42 | void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, | ||
43 | Error **errp); | ||
44 | void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); | ||
45 | diff --git a/block/io.c b/block/io.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/block/export/export.c | 47 | --- a/block/io.c |
34 | +++ b/block/export/export.c | 48 | +++ b/block/io.c |
35 | @@ -XXX,XX +XXX,XX @@ fail: | 49 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
36 | return NULL; | 50 | BlockDriverState *bs; |
51 | bool done; | ||
52 | bool begin; | ||
53 | + bool recursive; | ||
54 | BdrvChild *parent; | ||
55 | } BdrvCoDrainData; | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
58 | return waited; | ||
37 | } | 59 | } |
38 | 60 | ||
39 | -/* Callers must hold exp->ctx lock */ | 61 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
40 | void blk_exp_ref(BlockExport *exp) | 62 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
63 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
64 | + BdrvChild *parent); | ||
65 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
66 | + BdrvChild *parent); | ||
67 | |||
68 | static void bdrv_co_drain_bh_cb(void *opaque) | ||
41 | { | 69 | { |
42 | - assert(exp->refcount > 0); | 70 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
43 | - exp->refcount++; | 71 | |
44 | + assert(qatomic_read(&exp->refcount) > 0); | 72 | bdrv_dec_in_flight(bs); |
45 | + qatomic_inc(&exp->refcount); | 73 | if (data->begin) { |
74 | - bdrv_do_drained_begin(bs, data->parent); | ||
75 | + bdrv_do_drained_begin(bs, data->recursive, data->parent); | ||
76 | } else { | ||
77 | - bdrv_do_drained_end(bs, data->parent); | ||
78 | + bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
79 | } | ||
80 | |||
81 | data->done = true; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
46 | } | 83 | } |
47 | 84 | ||
48 | /* Runs in the main thread */ | 85 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
49 | @@ -XXX,XX +XXX,XX @@ static void blk_exp_delete_bh(void *opaque) | 86 | - bool begin, BdrvChild *parent) |
50 | aio_context_release(aio_context); | 87 | + bool begin, bool recursive, |
88 | + BdrvChild *parent) | ||
89 | { | ||
90 | BdrvCoDrainData data; | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
93 | .bs = bs, | ||
94 | .done = false, | ||
95 | .begin = begin, | ||
96 | + .recursive = recursive, | ||
97 | .parent = parent, | ||
98 | }; | ||
99 | bdrv_inc_in_flight(bs); | ||
100 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
101 | assert(data.done); | ||
51 | } | 102 | } |
52 | 103 | ||
53 | -/* Callers must hold exp->ctx lock */ | 104 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) |
54 | void blk_exp_unref(BlockExport *exp) | 105 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
106 | + BdrvChild *parent) | ||
55 | { | 107 | { |
56 | - assert(exp->refcount > 0); | 108 | + BdrvChild *child, *next; |
57 | - if (--exp->refcount == 0) { | 109 | + |
58 | + assert(qatomic_read(&exp->refcount) > 0); | 110 | if (qemu_in_coroutine()) { |
59 | + if (qatomic_fetch_dec(&exp->refcount) == 1) { | 111 | - bdrv_co_yield_to_drain(bs, true, parent); |
60 | /* Touch the block_exports list only in the main thread */ | 112 | + bdrv_co_yield_to_drain(bs, true, recursive, parent); |
61 | aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh, | 113 | return; |
62 | exp); | ||
63 | @@ -XXX,XX +XXX,XX @@ void qmp_block_export_del(const char *id, | ||
64 | if (!has_mode) { | ||
65 | mode = BLOCK_EXPORT_REMOVE_MODE_SAFE; | ||
66 | } | 114 | } |
67 | - if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) { | 115 | |
68 | + if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && | 116 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) |
69 | + qatomic_read(&exp->refcount) > 1) { | 117 | bdrv_parent_drained_begin(bs, parent); |
70 | error_setg(errp, "export '%s' still in use", exp->id); | 118 | bdrv_drain_invoke(bs, true, false); |
71 | error_append_hint(errp, "Use mode='hard' to force client " | 119 | bdrv_drain_recurse(bs); |
72 | "disconnect\n"); | 120 | + |
73 | diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c | 121 | + if (recursive) { |
74 | index XXXXXXX..XXXXXXX 100644 | 122 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { |
75 | --- a/block/export/vduse-blk.c | 123 | + bdrv_do_drained_begin(child->bs, true, child); |
76 | +++ b/block/export/vduse-blk.c | 124 | + } |
77 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp) | 125 | + } |
126 | } | ||
127 | |||
128 | void bdrv_drained_begin(BlockDriverState *bs) | ||
78 | { | 129 | { |
79 | if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) { | 130 | - bdrv_do_drained_begin(bs, NULL); |
80 | /* Prevent export from being deleted */ | 131 | + bdrv_do_drained_begin(bs, false, NULL); |
81 | - aio_context_acquire(vblk_exp->export.ctx); | 132 | +} |
82 | blk_exp_ref(&vblk_exp->export); | 133 | + |
83 | - aio_context_release(vblk_exp->export.ctx); | 134 | +void bdrv_subtree_drained_begin(BlockDriverState *bs) |
135 | +{ | ||
136 | + bdrv_do_drained_begin(bs, true, NULL); | ||
137 | } | ||
138 | |||
139 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
140 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
141 | + BdrvChild *parent) | ||
142 | { | ||
143 | + BdrvChild *child, *next; | ||
144 | int old_quiesce_counter; | ||
145 | |||
146 | if (qemu_in_coroutine()) { | ||
147 | - bdrv_co_yield_to_drain(bs, false, parent); | ||
148 | + bdrv_co_yield_to_drain(bs, false, recursive, parent); | ||
149 | return; | ||
84 | } | 150 | } |
151 | assert(bs->quiesce_counter > 0); | ||
152 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
153 | if (old_quiesce_counter == 1) { | ||
154 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
155 | } | ||
156 | + | ||
157 | + if (recursive) { | ||
158 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
159 | + bdrv_do_drained_end(child->bs, true, child); | ||
160 | + } | ||
161 | + } | ||
85 | } | 162 | } |
86 | 163 | ||
87 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp) | 164 | void bdrv_drained_end(BlockDriverState *bs) |
88 | aio_wait_kick(); | 165 | { |
89 | 166 | - bdrv_do_drained_end(bs, NULL); | |
90 | /* Now the export can be deleted */ | 167 | + bdrv_do_drained_end(bs, false, NULL); |
91 | - aio_context_acquire(vblk_exp->export.ctx); | 168 | +} |
92 | blk_exp_unref(&vblk_exp->export); | 169 | + |
93 | - aio_context_release(vblk_exp->export.ctx); | 170 | +void bdrv_subtree_drained_end(BlockDriverState *bs) |
94 | } | 171 | +{ |
172 | + bdrv_do_drained_end(bs, true, NULL); | ||
95 | } | 173 | } |
96 | 174 | ||
175 | /* | ||
97 | -- | 176 | -- |
98 | 2.40.1 | 177 | 2.13.6 |
178 | |||
179 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Add a subtree drain version to the existing test cases. |
---|---|---|---|
2 | 2 | ||
3 | vhost-user activity must be suspended during bdrv_drained_begin/end(). | ||
4 | This prevents new requests from interfering with whatever is happening | ||
5 | in the drained section. | ||
6 | |||
7 | Previously this was done using aio_set_fd_handler()'s is_external | ||
8 | argument. In a multi-queue block layer world the aio_disable_external() | ||
9 | API cannot be used since multiple AioContext may be processing I/O, not | ||
10 | just one. | ||
11 | |||
12 | Switch to BlockDevOps->drained_begin/end() callbacks. | ||
13 | |||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
16 | Message-Id: <20230516190238.8401-8-stefanha@redhat.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | --- | 4 | --- |
19 | block/export/vhost-user-blk-server.c | 28 ++++++++++++++++++++++++++-- | 5 | tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++- |
20 | util/vhost-user-server.c | 10 +++++----- | 6 | 1 file changed, 26 insertions(+), 1 deletion(-) |
21 | 2 files changed, 31 insertions(+), 7 deletions(-) | ||
22 | 7 | ||
23 | diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
24 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/block/export/vhost-user-blk-server.c | 10 | --- a/tests/test-bdrv-drain.c |
26 | +++ b/block/export/vhost-user-blk-server.c | 11 | +++ b/tests/test-bdrv-drain.c |
27 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque) | 12 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
28 | { | 13 | enum drain_type { |
29 | VuBlkExport *vexp = opaque; | 14 | BDRV_DRAIN_ALL, |
30 | 15 | BDRV_DRAIN, | |
31 | + /* | 16 | + BDRV_SUBTREE_DRAIN, |
32 | + * The actual attach will happen in vu_blk_drained_end() and we just | 17 | DRAIN_TYPE_MAX, |
33 | + * restore ctx here. | 18 | }; |
34 | + */ | 19 | |
35 | vexp->export.ctx = ctx; | 20 | @@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
36 | - vhost_user_server_attach_aio_context(&vexp->vu_server, ctx); | 21 | switch (drain_type) { |
22 | case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; | ||
23 | case BDRV_DRAIN: bdrv_drained_begin(bs); break; | ||
24 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; | ||
25 | default: g_assert_not_reached(); | ||
26 | } | ||
37 | } | 27 | } |
38 | 28 | @@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | |
39 | static void blk_aio_detach(void *opaque) | 29 | switch (drain_type) { |
40 | { | 30 | case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; |
41 | VuBlkExport *vexp = opaque; | 31 | case BDRV_DRAIN: bdrv_drained_end(bs); break; |
42 | 32 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; | |
43 | - vhost_user_server_detach_aio_context(&vexp->vu_server); | 33 | default: g_assert_not_reached(); |
44 | + /* | 34 | } |
45 | + * The actual detach already happened in vu_blk_drained_begin() but from | ||
46 | + * this point on we must not access ctx anymore. | ||
47 | + */ | ||
48 | vexp->export.ctx = NULL; | ||
49 | } | 35 | } |
50 | 36 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) | |
51 | @@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque) | 37 | test_drv_cb_common(BDRV_DRAIN, false); |
52 | vu_config_change_msg(&vexp->vu_server.vu_dev); | ||
53 | } | 38 | } |
54 | 39 | ||
55 | +/* Called with vexp->export.ctx acquired */ | 40 | +static void test_drv_cb_drain_subtree(void) |
56 | +static void vu_blk_drained_begin(void *opaque) | ||
57 | +{ | 41 | +{ |
58 | + VuBlkExport *vexp = opaque; | 42 | + test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); |
59 | + | ||
60 | + vhost_user_server_detach_aio_context(&vexp->vu_server); | ||
61 | +} | 43 | +} |
62 | + | 44 | + |
63 | +/* Called with vexp->export.blk AioContext acquired */ | 45 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
64 | +static void vu_blk_drained_end(void *opaque) | 46 | { |
47 | BlockBackend *blk; | ||
48 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | ||
49 | test_quiesce_common(BDRV_DRAIN, false); | ||
50 | } | ||
51 | |||
52 | +static void test_quiesce_drain_subtree(void) | ||
65 | +{ | 53 | +{ |
66 | + VuBlkExport *vexp = opaque; | 54 | + test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
67 | + | ||
68 | + vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx); | ||
69 | +} | 55 | +} |
70 | + | 56 | + |
71 | /* | 57 | static void test_nested(void) |
72 | * Ensures that bdrv_drained_begin() waits until in-flight requests complete. | 58 | { |
73 | * | 59 | BlockBackend *blk; |
74 | @@ -XXX,XX +XXX,XX @@ static bool vu_blk_drained_poll(void *opaque) | 60 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
61 | /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
62 | int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
63 | (inner != BDRV_DRAIN_ALL); | ||
64 | - int backing_quiesce = 0; | ||
65 | + int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + | ||
66 | + (inner == BDRV_SUBTREE_DRAIN); | ||
67 | int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
68 | (inner != BDRV_DRAIN); | ||
69 | |||
70 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void) | ||
71 | test_blockjob_common(BDRV_DRAIN); | ||
75 | } | 72 | } |
76 | 73 | ||
77 | static const BlockDevOps vu_blk_dev_ops = { | 74 | +static void test_blockjob_drain_subtree(void) |
78 | + .drained_begin = vu_blk_drained_begin, | 75 | +{ |
79 | + .drained_end = vu_blk_drained_end, | 76 | + test_blockjob_common(BDRV_SUBTREE_DRAIN); |
80 | .drained_poll = vu_blk_drained_poll, | 77 | +} |
81 | .resize_cb = vu_blk_exp_resize, | 78 | + |
82 | }; | 79 | int main(int argc, char **argv) |
83 | diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c | 80 | { |
84 | index XXXXXXX..XXXXXXX 100644 | 81 | bdrv_init(); |
85 | --- a/util/vhost-user-server.c | 82 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
86 | +++ b/util/vhost-user-server.c | 83 | |
87 | @@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt, | 84 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
88 | vu_fd_watch->fd = fd; | 85 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
89 | vu_fd_watch->cb = cb; | 86 | + g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", |
90 | qemu_socket_set_nonblock(fd); | 87 | + test_drv_cb_drain_subtree); |
91 | - aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler, | 88 | |
92 | + aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler, | 89 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
93 | NULL, NULL, NULL, vu_fd_watch); | 90 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
94 | vu_fd_watch->vu_dev = vu_dev; | 91 | + g_test_add_func("/bdrv-drain/quiesce/drain_subtree", |
95 | vu_fd_watch->pvt = pvt; | 92 | + test_quiesce_drain_subtree); |
96 | @@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd) | 93 | |
97 | if (!vu_fd_watch) { | 94 | g_test_add_func("/bdrv-drain/nested", test_nested); |
98 | return; | 95 | |
99 | } | 96 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
100 | - aio_set_fd_handler(server->ioc->ctx, fd, true, | 97 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
101 | + aio_set_fd_handler(server->ioc->ctx, fd, false, | 98 | + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", |
102 | NULL, NULL, NULL, NULL, NULL); | 99 | + test_blockjob_drain_subtree); |
103 | 100 | ||
104 | QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next); | 101 | return g_test_run(); |
105 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server) | 102 | } |
106 | VuFdWatch *vu_fd_watch; | ||
107 | |||
108 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
109 | - aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true, | ||
110 | + aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false, | ||
111 | NULL, NULL, NULL, NULL, vu_fd_watch); | ||
112 | } | ||
113 | |||
114 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx) | ||
115 | qio_channel_attach_aio_context(server->ioc, ctx); | ||
116 | |||
117 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
118 | - aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL, | ||
119 | + aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL, | ||
120 | NULL, NULL, vu_fd_watch); | ||
121 | } | ||
122 | |||
123 | @@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server) | ||
124 | VuFdWatch *vu_fd_watch; | ||
125 | |||
126 | QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { | ||
127 | - aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true, | ||
128 | + aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false, | ||
129 | NULL, NULL, NULL, NULL, vu_fd_watch); | ||
130 | } | ||
131 | |||
132 | -- | 103 | -- |
133 | 2.40.1 | 104 | 2.13.6 |
105 | |||
106 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | If bdrv_do_drained_begin/end() are called in coroutine context, they |
---|---|---|---|
2 | first use a BH to get out of the coroutine context. Call some existing | ||
3 | tests again from a coroutine to cover this code path. | ||
2 | 4 | ||
3 | vduse_blk_detach_ctx() waits for in-flight requests using | ||
4 | AIO_WAIT_WHILE(). This is not allowed according to a comment in | ||
5 | bdrv_set_aio_context_commit(): | ||
6 | |||
7 | /* | ||
8 | * Take the old AioContex when detaching it from bs. | ||
9 | * At this point, new_context lock is already acquired, and we are now | ||
10 | * also taking old_context. This is safe as long as bdrv_detach_aio_context | ||
11 | * does not call AIO_POLL_WHILE(). | ||
12 | */ | ||
13 | |||
14 | Use this opportunity to rewrite the drain code in vduse-blk: | ||
15 | |||
16 | - Use the BlockExport refcount so that vduse_blk_exp_delete() is only | ||
17 | called when there are no more requests in flight. | ||
18 | |||
19 | - Implement .drained_poll() so in-flight request coroutines are stopped | ||
20 | by the time .bdrv_detach_aio_context() is called. | ||
21 | |||
22 | - Remove AIO_WAIT_WHILE() from vduse_blk_detach_ctx() to solve the | ||
23 | .bdrv_detach_aio_context() constraint violation. It's no longer | ||
24 | needed due to the previous changes. | ||
25 | |||
26 | - Always handle the VDUSE file descriptor, even in drained sections. The | ||
27 | VDUSE file descriptor doesn't submit I/O, so it's safe to handle it in | ||
28 | drained sections. This ensures that the VDUSE kernel code gets a fast | ||
29 | response. | ||
30 | |||
31 | - Suspend virtqueue fd handlers in .drained_begin() and resume them in | ||
32 | .drained_end(). This eliminates the need for the | ||
33 | aio_set_fd_handler(is_external=true) flag, which is being removed from | ||
34 | QEMU. | ||
35 | |||
36 | This is a long list but splitting it into individual commits would | ||
37 | probably lead to git bisect failures - the changes are all related. | ||
38 | |||
39 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
40 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
41 | Message-Id: <20230516190238.8401-14-stefanha@redhat.com> | ||
42 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
43 | --- | 6 | --- |
44 | block/export/vduse-blk.c | 132 +++++++++++++++++++++++++++------------ | 7 | tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ |
45 | 1 file changed, 93 insertions(+), 39 deletions(-) | 8 | 1 file changed, 59 insertions(+) |
46 | 9 | ||
47 | diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
48 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/block/export/vduse-blk.c | 12 | --- a/tests/test-bdrv-drain.c |
50 | +++ b/block/export/vduse-blk.c | 13 | +++ b/tests/test-bdrv-drain.c |
51 | @@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkExport { | 14 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
52 | VduseDev *dev; | 15 | *aio_ret = ret; |
53 | uint16_t num_queues; | ||
54 | char *recon_file; | ||
55 | - unsigned int inflight; | ||
56 | + unsigned int inflight; /* atomic */ | ||
57 | + bool vqs_started; | ||
58 | } VduseBlkExport; | ||
59 | |||
60 | typedef struct VduseBlkReq { | ||
61 | @@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkReq { | ||
62 | |||
63 | static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp) | ||
64 | { | ||
65 | - vblk_exp->inflight++; | ||
66 | + if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) { | ||
67 | + /* Prevent export from being deleted */ | ||
68 | + aio_context_acquire(vblk_exp->export.ctx); | ||
69 | + blk_exp_ref(&vblk_exp->export); | ||
70 | + aio_context_release(vblk_exp->export.ctx); | ||
71 | + } | ||
72 | } | 16 | } |
73 | 17 | ||
74 | static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp) | 18 | +typedef struct CallInCoroutineData { |
75 | { | 19 | + void (*entry)(void); |
76 | - if (--vblk_exp->inflight == 0) { | 20 | + bool done; |
77 | + if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) { | 21 | +} CallInCoroutineData; |
78 | + /* Wake AIO_WAIT_WHILE() */ | ||
79 | aio_wait_kick(); | ||
80 | + | 22 | + |
81 | + /* Now the export can be deleted */ | 23 | +static coroutine_fn void call_in_coroutine_entry(void *opaque) |
82 | + aio_context_acquire(vblk_exp->export.ctx); | 24 | +{ |
83 | + blk_exp_unref(&vblk_exp->export); | 25 | + CallInCoroutineData *data = opaque; |
84 | + aio_context_release(vblk_exp->export.ctx); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) | ||
89 | { | ||
90 | VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
91 | |||
92 | + if (!vblk_exp->vqs_started) { | ||
93 | + return; /* vduse_blk_drained_end() will start vqs later */ | ||
94 | + } | ||
95 | + | 26 | + |
96 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), | 27 | + data->entry(); |
97 | - true, on_vduse_vq_kick, NULL, NULL, NULL, vq); | 28 | + data->done = true; |
98 | + false, on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
99 | /* Make sure we don't miss any kick afer reconnecting */ | ||
100 | eventfd_write(vduse_queue_get_fd(vq), 1); | ||
101 | } | ||
102 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) | ||
103 | static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq) | ||
104 | { | ||
105 | VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
106 | + int fd = vduse_queue_get_fd(vq); | ||
107 | |||
108 | - aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), | ||
109 | - true, NULL, NULL, NULL, NULL, NULL); | ||
110 | + if (fd < 0) { | ||
111 | + return; | ||
112 | + } | ||
113 | + | ||
114 | + aio_set_fd_handler(vblk_exp->export.ctx, fd, false, | ||
115 | + NULL, NULL, NULL, NULL, NULL); | ||
116 | } | ||
117 | |||
118 | static const VduseOps vduse_blk_ops = { | ||
119 | @@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque) | ||
120 | |||
121 | static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx) | ||
122 | { | ||
123 | - int i; | ||
124 | - | ||
125 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
126 | - true, on_vduse_dev_kick, NULL, NULL, NULL, | ||
127 | + false, on_vduse_dev_kick, NULL, NULL, NULL, | ||
128 | vblk_exp->dev); | ||
129 | |||
130 | - for (i = 0; i < vblk_exp->num_queues; i++) { | ||
131 | - VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | ||
132 | - int fd = vduse_queue_get_fd(vq); | ||
133 | - | ||
134 | - if (fd < 0) { | ||
135 | - continue; | ||
136 | - } | ||
137 | - aio_set_fd_handler(vblk_exp->export.ctx, fd, true, | ||
138 | - on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
139 | - } | ||
140 | + /* Virtqueues are handled by vduse_blk_drained_end() */ | ||
141 | } | ||
142 | |||
143 | static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp) | ||
144 | { | ||
145 | - int i; | ||
146 | - | ||
147 | - for (i = 0; i < vblk_exp->num_queues; i++) { | ||
148 | - VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | ||
149 | - int fd = vduse_queue_get_fd(vq); | ||
150 | - | ||
151 | - if (fd < 0) { | ||
152 | - continue; | ||
153 | - } | ||
154 | - aio_set_fd_handler(vblk_exp->export.ctx, fd, | ||
155 | - true, NULL, NULL, NULL, NULL, NULL); | ||
156 | - } | ||
157 | aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
158 | - true, NULL, NULL, NULL, NULL, NULL); | ||
159 | + false, NULL, NULL, NULL, NULL, NULL); | ||
160 | |||
161 | - AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0); | ||
162 | + /* Virtqueues are handled by vduse_blk_drained_begin() */ | ||
163 | } | ||
164 | |||
165 | |||
166 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_resize(void *opaque) | ||
167 | (char *)&config.capacity); | ||
168 | } | ||
169 | |||
170 | +static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp) | ||
171 | +{ | ||
172 | + for (uint16_t i = 0; i < vblk_exp->num_queues; i++) { | ||
173 | + VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | ||
174 | + vduse_blk_disable_queue(vblk_exp->dev, vq); | ||
175 | + } | ||
176 | + | ||
177 | + vblk_exp->vqs_started = false; | ||
178 | +} | 29 | +} |
179 | + | 30 | + |
180 | +static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp) | 31 | +static void call_in_coroutine(void (*entry)(void)) |
181 | +{ | 32 | +{ |
182 | + vblk_exp->vqs_started = true; | 33 | + Coroutine *co; |
34 | + CallInCoroutineData data = { | ||
35 | + .entry = entry, | ||
36 | + .done = false, | ||
37 | + }; | ||
183 | + | 38 | + |
184 | + for (uint16_t i = 0; i < vblk_exp->num_queues; i++) { | 39 | + co = qemu_coroutine_create(call_in_coroutine_entry, &data); |
185 | + VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | 40 | + qemu_coroutine_enter(co); |
186 | + vduse_blk_enable_queue(vblk_exp->dev, vq); | 41 | + while (!data.done) { |
42 | + aio_poll(qemu_get_aio_context(), true); | ||
187 | + } | 43 | + } |
188 | +} | 44 | +} |
189 | + | 45 | + |
190 | +static void vduse_blk_drained_begin(void *opaque) | 46 | enum drain_type { |
47 | BDRV_DRAIN_ALL, | ||
48 | BDRV_DRAIN, | ||
49 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void) | ||
50 | test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | ||
51 | } | ||
52 | |||
53 | +static void test_drv_cb_co_drain(void) | ||
191 | +{ | 54 | +{ |
192 | + BlockExport *exp = opaque; | 55 | + call_in_coroutine(test_drv_cb_drain); |
193 | + VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
194 | + | ||
195 | + vduse_blk_stop_virtqueues(vblk_exp); | ||
196 | +} | 56 | +} |
197 | + | 57 | + |
198 | +static void vduse_blk_drained_end(void *opaque) | 58 | +static void test_drv_cb_co_drain_subtree(void) |
199 | +{ | 59 | +{ |
200 | + BlockExport *exp = opaque; | 60 | + call_in_coroutine(test_drv_cb_drain_subtree); |
201 | + VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
202 | + | ||
203 | + vduse_blk_start_virtqueues(vblk_exp); | ||
204 | +} | 61 | +} |
205 | + | 62 | + |
206 | +static bool vduse_blk_drained_poll(void *opaque) | 63 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
64 | { | ||
65 | BlockBackend *blk; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void) | ||
67 | test_quiesce_common(BDRV_SUBTREE_DRAIN, true); | ||
68 | } | ||
69 | |||
70 | +static void test_quiesce_co_drain(void) | ||
207 | +{ | 71 | +{ |
208 | + BlockExport *exp = opaque; | 72 | + call_in_coroutine(test_quiesce_drain); |
209 | + VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
210 | + | ||
211 | + return qatomic_read(&vblk_exp->inflight) > 0; | ||
212 | +} | 73 | +} |
213 | + | 74 | + |
214 | static const BlockDevOps vduse_block_ops = { | 75 | +static void test_quiesce_co_drain_subtree(void) |
215 | - .resize_cb = vduse_blk_resize, | 76 | +{ |
216 | + .resize_cb = vduse_blk_resize, | 77 | + call_in_coroutine(test_quiesce_drain_subtree); |
217 | + .drained_begin = vduse_blk_drained_begin, | 78 | +} |
218 | + .drained_end = vduse_blk_drained_end, | ||
219 | + .drained_poll = vduse_blk_drained_poll, | ||
220 | }; | ||
221 | |||
222 | static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, | ||
223 | @@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, | ||
224 | vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: ""); | ||
225 | vblk_exp->handler.logical_block_size = logical_block_size; | ||
226 | vblk_exp->handler.writable = opts->writable; | ||
227 | + vblk_exp->vqs_started = true; | ||
228 | |||
229 | config.capacity = | ||
230 | cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS); | ||
231 | @@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, | ||
232 | vduse_dev_setup_queue(vblk_exp->dev, i, queue_size); | ||
233 | } | ||
234 | |||
235 | - aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true, | ||
236 | + aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false, | ||
237 | on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev); | ||
238 | |||
239 | blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, | ||
240 | vblk_exp); | ||
241 | - | ||
242 | blk_set_dev_ops(exp->blk, &vduse_block_ops, exp); | ||
243 | |||
244 | + /* | ||
245 | + * We handle draining ourselves using an in-flight counter and by disabling | ||
246 | + * virtqueue fd handlers. Do not queue BlockBackend requests, they need to | ||
247 | + * complete so the in-flight counter reaches zero. | ||
248 | + */ | ||
249 | + blk_set_disable_request_queuing(exp->blk, true); | ||
250 | + | 79 | + |
251 | return 0; | 80 | static void test_nested(void) |
252 | err: | 81 | { |
253 | vduse_dev_destroy(vblk_exp->dev); | 82 | BlockBackend *blk; |
254 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp) | 83 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
255 | VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | 84 | g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", |
256 | int ret; | 85 | test_drv_cb_drain_subtree); |
257 | 86 | ||
258 | + assert(qatomic_read(&vblk_exp->inflight) == 0); | 87 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
88 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); | ||
89 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", | ||
90 | + test_drv_cb_co_drain_subtree); | ||
259 | + | 91 | + |
260 | + vduse_blk_detach_ctx(vblk_exp); | 92 | + |
261 | blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, | 93 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
262 | vblk_exp); | 94 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
263 | ret = vduse_dev_destroy(vblk_exp->dev); | 95 | g_test_add_func("/bdrv-drain/quiesce/drain_subtree", |
264 | @@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp) | 96 | test_quiesce_drain_subtree); |
265 | g_free(vblk_exp->handler.serial); | 97 | |
266 | } | 98 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
267 | 99 | + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); | |
268 | +/* Called with exp->ctx acquired */ | 100 | + g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", |
269 | static void vduse_blk_exp_request_shutdown(BlockExport *exp) | 101 | + test_quiesce_co_drain_subtree); |
270 | { | 102 | + |
271 | VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | 103 | g_test_add_func("/bdrv-drain/nested", test_nested); |
272 | 104 | ||
273 | - aio_context_acquire(vblk_exp->export.ctx); | 105 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
274 | - vduse_blk_detach_ctx(vblk_exp); | ||
275 | - aio_context_acquire(vblk_exp->export.ctx); | ||
276 | + vduse_blk_stop_virtqueues(vblk_exp); | ||
277 | } | ||
278 | |||
279 | const BlockExportDriver blk_exp_vduse_blk = { | ||
280 | -- | 106 | -- |
281 | 2.40.1 | 107 | 2.13.6 |
108 | |||
109 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Test that drain sections are correctly propagated through the graph. |
---|---|---|---|
2 | 2 | ||
3 | Detach ioeventfds during drained sections to stop I/O submission from | ||
4 | the guest. virtio-blk is no longer reliant on aio_disable_external() | ||
5 | after this patch. This will allow us to remove the | ||
6 | aio_disable_external() API once all other code that relies on it is | ||
7 | converted. | ||
8 | |||
9 | Take extra care to avoid attaching/detaching ioeventfds if the data | ||
10 | plane is started/stopped during a drained section. This should be rare, | ||
11 | but maybe the mirror block job can trigger it. | ||
12 | |||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Message-Id: <20230516190238.8401-18-stefanha@redhat.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
16 | --- | 4 | --- |
17 | hw/block/dataplane/virtio-blk.c | 16 ++++++++------ | 5 | tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ |
18 | hw/block/virtio-blk.c | 38 ++++++++++++++++++++++++++++++++- | 6 | 1 file changed, 74 insertions(+) |
19 | 2 files changed, 47 insertions(+), 7 deletions(-) | ||
20 | 7 | ||
21 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
22 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/hw/block/dataplane/virtio-blk.c | 10 | --- a/tests/test-bdrv-drain.c |
24 | +++ b/hw/block/dataplane/virtio-blk.c | 11 | +++ b/tests/test-bdrv-drain.c |
25 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | 12 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
26 | } | 13 | blk_unref(blk); |
27 | |||
28 | /* Get this show started by hooking up our callbacks */ | ||
29 | - aio_context_acquire(s->ctx); | ||
30 | - for (i = 0; i < nvqs; i++) { | ||
31 | - VirtQueue *vq = virtio_get_queue(s->vdev, i); | ||
32 | + if (!blk_in_drain(s->conf->conf.blk)) { | ||
33 | + aio_context_acquire(s->ctx); | ||
34 | + for (i = 0; i < nvqs; i++) { | ||
35 | + VirtQueue *vq = virtio_get_queue(s->vdev, i); | ||
36 | |||
37 | - virtio_queue_aio_attach_host_notifier(vq, s->ctx); | ||
38 | + virtio_queue_aio_attach_host_notifier(vq, s->ctx); | ||
39 | + } | ||
40 | + aio_context_release(s->ctx); | ||
41 | } | ||
42 | - aio_context_release(s->ctx); | ||
43 | return 0; | ||
44 | |||
45 | fail_aio_context: | ||
46 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | ||
47 | s->stopping = true; | ||
48 | trace_virtio_blk_data_plane_stop(s); | ||
49 | |||
50 | - aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | ||
51 | + if (!blk_in_drain(s->conf->conf.blk)) { | ||
52 | + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | ||
53 | + } | ||
54 | |||
55 | aio_context_acquire(s->ctx); | ||
56 | |||
57 | diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/hw/block/virtio-blk.c | ||
60 | +++ b/hw/block/virtio-blk.c | ||
61 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_resize(void *opaque) | ||
62 | aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); | ||
63 | } | 14 | } |
64 | 15 | ||
65 | +/* Suspend virtqueue ioeventfd processing during drain */ | 16 | +static void test_multiparent(void) |
66 | +static void virtio_blk_drained_begin(void *opaque) | ||
67 | +{ | 17 | +{ |
68 | + VirtIOBlock *s = opaque; | 18 | + BlockBackend *blk_a, *blk_b; |
69 | + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | 19 | + BlockDriverState *bs_a, *bs_b, *backing; |
70 | + AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); | 20 | + BDRVTestState *a_s, *b_s, *backing_s; |
71 | + | 21 | + |
72 | + if (!s->dataplane || !s->dataplane_started) { | 22 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
73 | + return; | 23 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
74 | + } | 24 | + &error_abort); |
25 | + a_s = bs_a->opaque; | ||
26 | + blk_insert_bs(blk_a, bs_a, &error_abort); | ||
75 | + | 27 | + |
76 | + for (uint16_t i = 0; i < s->conf.num_queues; i++) { | 28 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
77 | + VirtQueue *vq = virtio_get_queue(vdev, i); | 29 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, |
78 | + virtio_queue_aio_detach_host_notifier(vq, ctx); | 30 | + &error_abort); |
79 | + } | 31 | + b_s = bs_b->opaque; |
32 | + blk_insert_bs(blk_b, bs_b, &error_abort); | ||
33 | + | ||
34 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
35 | + backing_s = backing->opaque; | ||
36 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); | ||
37 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
38 | + | ||
39 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
40 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
41 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
43 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
44 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
45 | + | ||
46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
47 | + | ||
48 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
49 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
50 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
51 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
52 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
53 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
54 | + | ||
55 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
56 | + | ||
57 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 2); | ||
58 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
59 | + g_assert_cmpint(backing->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(a_s->drain_count, ==, 2); | ||
61 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
62 | + g_assert_cmpint(backing_s->drain_count, ==, 2); | ||
63 | + | ||
64 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
65 | + | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
74 | + | ||
75 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
76 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
77 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
78 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
79 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
80 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
81 | + | ||
82 | + bdrv_unref(backing); | ||
83 | + bdrv_unref(bs_a); | ||
84 | + bdrv_unref(bs_b); | ||
85 | + blk_unref(blk_a); | ||
86 | + blk_unref(blk_b); | ||
80 | +} | 87 | +} |
81 | + | 88 | + |
82 | +/* Resume virtqueue ioeventfd processing after drain */ | 89 | |
83 | +static void virtio_blk_drained_end(void *opaque) | 90 | typedef struct TestBlockJob { |
84 | +{ | 91 | BlockJob common; |
85 | + VirtIOBlock *s = opaque; | 92 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
86 | + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | 93 | test_quiesce_co_drain_subtree); |
87 | + AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); | 94 | |
88 | + | 95 | g_test_add_func("/bdrv-drain/nested", test_nested); |
89 | + if (!s->dataplane || !s->dataplane_started) { | 96 | + g_test_add_func("/bdrv-drain/multiparent", test_multiparent); |
90 | + return; | 97 | |
91 | + } | 98 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
92 | + | 99 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
93 | + for (uint16_t i = 0; i < s->conf.num_queues; i++) { | ||
94 | + VirtQueue *vq = virtio_get_queue(vdev, i); | ||
95 | + virtio_queue_aio_attach_host_notifier(vq, ctx); | ||
96 | + } | ||
97 | +} | ||
98 | + | ||
99 | static const BlockDevOps virtio_block_ops = { | ||
100 | - .resize_cb = virtio_blk_resize, | ||
101 | + .resize_cb = virtio_blk_resize, | ||
102 | + .drained_begin = virtio_blk_drained_begin, | ||
103 | + .drained_end = virtio_blk_drained_end, | ||
104 | }; | ||
105 | |||
106 | static void virtio_blk_device_realize(DeviceState *dev, Error **errp) | ||
107 | -- | 100 | -- |
108 | 2.40.1 | 101 | 2.13.6 |
102 | |||
103 | diff view generated by jsdifflib |
1 | These functions specify that the caller must hold the "@filename | 1 | We need to remember how many of the drain sections in which a node is |
---|---|---|---|
2 | AioContext lock". This doesn't make sense, file names don't have an | 2 | were recursive (i.e. subtree drain rather than node drain), so that they |
3 | AioContext. New BlockDriverStates always start in the main AioContext, | 3 | can be correctly applied when children are added or removed during the |
4 | so this is what we really need here. | 4 | drained section. |
5 | 5 | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 6 | With this change, it is safe to modify the graph even inside a |
7 | Message-Id: <20230525124713.401149-3-kwolf@redhat.com> | 7 | bdrv_subtree_drained_begin/end() section. |
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | |
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 10 | --- |
11 | block.c | 10 ++-------- | 11 | include/block/block.h | 2 -- |
12 | 1 file changed, 2 insertions(+), 8 deletions(-) | 12 | include/block/block_int.h | 5 +++++ |
13 | 13 | block.c | 32 +++++++++++++++++++++++++++++--- | |
14 | block/io.c | 28 ++++++++++++++++++++++++---- | ||
15 | 4 files changed, 58 insertions(+), 9 deletions(-) | ||
16 | |||
17 | diff --git a/include/block/block.h b/include/block/block.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/block/block.h | ||
20 | +++ b/include/block/block.h | ||
21 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs); | ||
22 | /** | ||
23 | * Like bdrv_drained_begin, but recursively begins a quiesced section for | ||
24 | * exclusive access to all child nodes as well. | ||
25 | - * | ||
26 | - * Graph changes are not allowed during a subtree drain section. | ||
27 | */ | ||
28 | void bdrv_subtree_drained_begin(BlockDriverState *bs); | ||
29 | |||
30 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/include/block/block_int.h | ||
33 | +++ b/include/block/block_int.h | ||
34 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
35 | |||
36 | /* Accessed with atomic ops. */ | ||
37 | int quiesce_counter; | ||
38 | + int recursive_quiesce_counter; | ||
39 | + | ||
40 | unsigned int write_gen; /* Current data generation */ | ||
41 | |||
42 | /* Protected by reqs_lock. */ | ||
43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | ||
44 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | ||
45 | BdrvRequestFlags flags); | ||
46 | |||
47 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); | ||
48 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); | ||
49 | + | ||
50 | int get_tmp_filename(char *filename, int size); | ||
51 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, | ||
52 | const char *filename); | ||
14 | diff --git a/block.c b/block.c | 53 | diff --git a/block.c b/block.c |
15 | index XXXXXXX..XXXXXXX 100644 | 54 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block.c | 55 | --- a/block.c |
17 | +++ b/block.c | 56 | +++ b/block.c |
18 | @@ -XXX,XX +XXX,XX @@ out: | 57 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child) |
19 | * should be opened. If specified, neither options nor a filename may be given, | 58 | bdrv_drained_end(bs); |
20 | * nor can an existing BDS be reused (that is, *pbs has to be NULL). | 59 | } |
21 | * | 60 | |
22 | - * The caller must always hold @filename AioContext lock, because this | 61 | +static void bdrv_child_cb_attach(BdrvChild *child) |
23 | - * function eventually calls bdrv_refresh_total_sectors() which polls | 62 | +{ |
24 | - * when called from non-coroutine context. | 63 | + BlockDriverState *bs = child->opaque; |
25 | + * The caller must always hold the main AioContext lock. | 64 | + bdrv_apply_subtree_drain(child, bs); |
26 | */ | 65 | +} |
27 | static BlockDriverState * no_coroutine_fn | 66 | + |
28 | bdrv_open_inherit(const char *filename, const char *reference, QDict *options, | 67 | +static void bdrv_child_cb_detach(BdrvChild *child) |
29 | @@ -XXX,XX +XXX,XX @@ close_and_fail: | 68 | +{ |
30 | return NULL; | 69 | + BlockDriverState *bs = child->opaque; |
31 | } | 70 | + bdrv_unapply_subtree_drain(child, bs); |
32 | 71 | +} | |
33 | -/* | 72 | + |
34 | - * The caller must always hold @filename AioContext lock, because this | 73 | static int bdrv_child_cb_inactivate(BdrvChild *child) |
35 | - * function eventually calls bdrv_refresh_total_sectors() which polls | ||
36 | - * when called from non-coroutine context. | ||
37 | - */ | ||
38 | +/* The caller must always hold the main AioContext lock. */ | ||
39 | BlockDriverState *bdrv_open(const char *filename, const char *reference, | ||
40 | QDict *options, int flags, Error **errp) | ||
41 | { | 74 | { |
75 | BlockDriverState *bs = child->opaque; | ||
76 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { | ||
77 | .inherit_options = bdrv_inherited_options, | ||
78 | .drained_begin = bdrv_child_cb_drained_begin, | ||
79 | .drained_end = bdrv_child_cb_drained_end, | ||
80 | + .attach = bdrv_child_cb_attach, | ||
81 | + .detach = bdrv_child_cb_detach, | ||
82 | .inactivate = bdrv_child_cb_inactivate, | ||
83 | }; | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { | ||
86 | .inherit_options = bdrv_inherited_fmt_options, | ||
87 | .drained_begin = bdrv_child_cb_drained_begin, | ||
88 | .drained_end = bdrv_child_cb_drained_end, | ||
89 | + .attach = bdrv_child_cb_attach, | ||
90 | + .detach = bdrv_child_cb_detach, | ||
91 | .inactivate = bdrv_child_cb_inactivate, | ||
92 | }; | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c) | ||
95 | parent->backing_blocker); | ||
96 | bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, | ||
97 | parent->backing_blocker); | ||
98 | + | ||
99 | + bdrv_child_cb_attach(c); | ||
100 | } | ||
101 | |||
102 | static void bdrv_backing_detach(BdrvChild *c) | ||
103 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c) | ||
104 | bdrv_op_unblock_all(c->bs, parent->backing_blocker); | ||
105 | error_free(parent->backing_blocker); | ||
106 | parent->backing_blocker = NULL; | ||
107 | + | ||
108 | + bdrv_child_cb_detach(c); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
113 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | ||
114 | } | ||
115 | if (old_bs) { | ||
116 | + /* Detach first so that the recursive drain sections coming from @child | ||
117 | + * are already gone and we only end the drain sections that came from | ||
118 | + * elsewhere. */ | ||
119 | + if (child->role->detach) { | ||
120 | + child->role->detach(child); | ||
121 | + } | ||
122 | if (old_bs->quiesce_counter && child->role->drained_end) { | ||
123 | for (i = 0; i < old_bs->quiesce_counter; i++) { | ||
124 | child->role->drained_end(child); | ||
125 | } | ||
126 | } | ||
127 | - if (child->role->detach) { | ||
128 | - child->role->detach(child); | ||
129 | - } | ||
130 | QLIST_REMOVE(child, next_parent); | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
134 | } | ||
135 | } | ||
136 | |||
137 | + /* Attach only after starting new drained sections, so that recursive | ||
138 | + * drain sections coming from @child don't get an extra .drained_begin | ||
139 | + * callback. */ | ||
140 | if (child->role->attach) { | ||
141 | child->role->attach(child); | ||
142 | } | ||
143 | diff --git a/block/io.c b/block/io.c | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/block/io.c | ||
146 | +++ b/block/io.c | ||
147 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
148 | assert(data.done); | ||
149 | } | ||
150 | |||
151 | -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
152 | - BdrvChild *parent) | ||
153 | +void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
154 | + BdrvChild *parent) | ||
155 | { | ||
156 | BdrvChild *child, *next; | ||
157 | |||
158 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
159 | bdrv_drain_recurse(bs); | ||
160 | |||
161 | if (recursive) { | ||
162 | + bs->recursive_quiesce_counter++; | ||
163 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
164 | bdrv_do_drained_begin(child->bs, true, child); | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
167 | bdrv_do_drained_begin(bs, true, NULL); | ||
168 | } | ||
169 | |||
170 | -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
171 | - BdrvChild *parent) | ||
172 | +void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
173 | + BdrvChild *parent) | ||
174 | { | ||
175 | BdrvChild *child, *next; | ||
176 | int old_quiesce_counter; | ||
177 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
178 | } | ||
179 | |||
180 | if (recursive) { | ||
181 | + bs->recursive_quiesce_counter--; | ||
182 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
183 | bdrv_do_drained_end(child->bs, true, child); | ||
184 | } | ||
185 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
186 | bdrv_do_drained_end(bs, true, NULL); | ||
187 | } | ||
188 | |||
189 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | ||
190 | +{ | ||
191 | + int i; | ||
192 | + | ||
193 | + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { | ||
194 | + bdrv_do_drained_begin(child->bs, true, child); | ||
195 | + } | ||
196 | +} | ||
197 | + | ||
198 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) | ||
199 | +{ | ||
200 | + int i; | ||
201 | + | ||
202 | + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { | ||
203 | + bdrv_do_drained_end(child->bs, true, child); | ||
204 | + } | ||
205 | +} | ||
206 | + | ||
207 | /* | ||
208 | * Wait for pending requests to complete on a single BlockDriverState subtree, | ||
209 | * and suspend block driver's internal I/O until next request arrives. | ||
42 | -- | 210 | -- |
43 | 2.40.1 | 211 | 2.13.6 |
212 | |||
213 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | ||
---|---|---|---|
2 | |||
3 | The BlockBackend quiesce_counter is greater than zero during drained | ||
4 | sections. Add an API to check whether the BlockBackend is in a drained | ||
5 | section. | ||
6 | |||
7 | The next patch will use this API. | ||
8 | |||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | Message-Id: <20230516190238.8401-10-stefanha@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 2 | --- |
14 | include/sysemu/block-backend-global-state.h | 1 + | 3 | tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ |
15 | block/block-backend.c | 7 +++++++ | 4 | 1 file changed, 80 insertions(+) |
16 | 2 files changed, 8 insertions(+) | ||
17 | 5 | ||
18 | diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
19 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/sysemu/block-backend-global-state.h | 8 | --- a/tests/test-bdrv-drain.c |
21 | +++ b/include/sysemu/block-backend-global-state.h | 9 | +++ b/tests/test-bdrv-drain.c |
22 | @@ -XXX,XX +XXX,XX @@ void blk_activate(BlockBackend *blk, Error **errp); | 10 | @@ -XXX,XX +XXX,XX @@ static void test_multiparent(void) |
23 | int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); | 11 | blk_unref(blk_b); |
24 | void blk_aio_cancel(BlockAIOCB *acb); | ||
25 | int blk_commit_all(void); | ||
26 | +bool blk_in_drain(BlockBackend *blk); | ||
27 | void blk_drain(BlockBackend *blk); | ||
28 | void blk_drain_all(void); | ||
29 | void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, | ||
30 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/block/block-backend.c | ||
33 | +++ b/block/block-backend.c | ||
34 | @@ -XXX,XX +XXX,XX @@ blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes) | ||
35 | return 0; | ||
36 | } | 12 | } |
37 | 13 | ||
38 | +/* Are we currently in a drained section? */ | 14 | +static void test_graph_change(void) |
39 | +bool blk_in_drain(BlockBackend *blk) | ||
40 | +{ | 15 | +{ |
41 | + GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */ | 16 | + BlockBackend *blk_a, *blk_b; |
42 | + return qatomic_read(&blk->quiesce_counter); | 17 | + BlockDriverState *bs_a, *bs_b, *backing; |
18 | + BDRVTestState *a_s, *b_s, *backing_s; | ||
19 | + | ||
20 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
21 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, | ||
22 | + &error_abort); | ||
23 | + a_s = bs_a->opaque; | ||
24 | + blk_insert_bs(blk_a, bs_a, &error_abort); | ||
25 | + | ||
26 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
27 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, | ||
28 | + &error_abort); | ||
29 | + b_s = bs_b->opaque; | ||
30 | + blk_insert_bs(blk_b, bs_b, &error_abort); | ||
31 | + | ||
32 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
33 | + backing_s = backing->opaque; | ||
34 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); | ||
35 | + | ||
36 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
37 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
38 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
39 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
40 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
41 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
42 | + | ||
43 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
44 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
45 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
47 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
48 | + | ||
49 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
50 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
51 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
52 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
53 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
54 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
55 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
56 | + | ||
57 | + bdrv_set_backing_hd(bs_b, NULL, &error_abort); | ||
58 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 3); | ||
59 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(backing->quiesce_counter, ==, 3); | ||
61 | + g_assert_cmpint(a_s->drain_count, ==, 3); | ||
62 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
63 | + g_assert_cmpint(backing_s->drain_count, ==, 3); | ||
64 | + | ||
65 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
74 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
75 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
76 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
77 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
78 | + | ||
79 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
80 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
81 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
82 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
83 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
84 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
85 | + | ||
86 | + bdrv_unref(backing); | ||
87 | + bdrv_unref(bs_a); | ||
88 | + bdrv_unref(bs_b); | ||
89 | + blk_unref(blk_a); | ||
90 | + blk_unref(blk_b); | ||
43 | +} | 91 | +} |
44 | + | 92 | + |
45 | /* To be called between exactly one pair of blk_inc/dec_in_flight() */ | 93 | |
46 | static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) | 94 | typedef struct TestBlockJob { |
47 | { | 95 | BlockJob common; |
96 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
97 | |||
98 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
99 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
100 | + g_test_add_func("/bdrv-drain/graph-change", test_graph_change); | ||
101 | |||
102 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
103 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
48 | -- | 104 | -- |
49 | 2.40.1 | 105 | 2.13.6 |
106 | |||
107 | diff view generated by jsdifflib |
1 | All of the functions that currently take a BlockDriverState, BdrvChild | 1 | Since commit bde70715, base is the only node that is reopened in |
---|---|---|---|
2 | or BlockBackend as their first parameter expect the associated | 2 | commit_start(). This means that the code, which still involves an |
3 | AioContext to be locked when they are called. In the case of | 3 | explicit BlockReopenQueue, can now be simplified by using bdrv_reopen(). |
4 | no_co_wrappers, they are called from bottom halves directly in the main | ||
5 | loop, so no other caller can be expected to take the lock for them. This | ||
6 | can result in assertion failures because a lock that isn't taken is | ||
7 | released in nested event loops. | ||
8 | |||
9 | Looking at the first parameter is already done by co_wrappers to decide | ||
10 | where the coroutine should run, so doing the same in no_co_wrappers is | ||
11 | only consistent. Take the lock in the generated bottom halves to fix the | ||
12 | problem. | ||
13 | 4 | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Message-Id: <20230525124713.401149-2-kwolf@redhat.com> | 6 | Reviewed-by: Fam Zheng <famz@redhat.com> |
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | --- | 7 | --- |
19 | include/block/block-common.h | 3 +++ | 8 | block/commit.c | 8 +------- |
20 | block/block-backend.c | 7 ++++++- | 9 | 1 file changed, 1 insertion(+), 7 deletions(-) |
21 | scripts/block-coroutine-wrapper.py | 25 +++++++++++++++---------- | ||
22 | 3 files changed, 24 insertions(+), 11 deletions(-) | ||
23 | 10 | ||
24 | diff --git a/include/block/block-common.h b/include/block/block-common.h | 11 | diff --git a/block/commit.c b/block/commit.c |
25 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/include/block/block-common.h | 13 | --- a/block/commit.c |
27 | +++ b/include/block/block-common.h | 14 | +++ b/block/commit.c |
28 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
29 | * scheduling a BH in the bottom half that runs the respective non-coroutine | 16 | const char *filter_node_name, Error **errp) |
30 | * function. The coroutine yields after scheduling the BH and is reentered when | ||
31 | * the wrapped function returns. | ||
32 | + * | ||
33 | + * If the first parameter of the function is a BlockDriverState, BdrvChild or | ||
34 | + * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. | ||
35 | */ | ||
36 | #define no_co_wrapper | ||
37 | |||
38 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/block/block-backend.c | ||
41 | +++ b/block/block-backend.c | ||
42 | @@ -XXX,XX +XXX,XX @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) | ||
43 | |||
44 | AioContext *blk_get_aio_context(BlockBackend *blk) | ||
45 | { | 17 | { |
46 | - BlockDriverState *bs = blk_bs(blk); | 18 | CommitBlockJob *s; |
47 | + BlockDriverState *bs; | 19 | - BlockReopenQueue *reopen_queue = NULL; |
48 | IO_CODE(); | 20 | int orig_base_flags; |
49 | 21 | BlockDriverState *iter; | |
50 | + if (!blk) { | 22 | BlockDriverState *commit_top_bs = NULL; |
51 | + return qemu_get_aio_context(); | 23 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
52 | + } | 24 | /* convert base to r/w, if necessary */ |
53 | + | 25 | orig_base_flags = bdrv_get_flags(base); |
54 | + bs = blk_bs(blk); | 26 | if (!(orig_base_flags & BDRV_O_RDWR)) { |
55 | if (bs) { | 27 | - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, |
56 | AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); | 28 | - orig_base_flags | BDRV_O_RDWR); |
57 | assert(ctx == blk->ctx); | 29 | - } |
58 | diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py | 30 | - |
59 | index XXXXXXX..XXXXXXX 100644 | 31 | - if (reopen_queue) { |
60 | --- a/scripts/block-coroutine-wrapper.py | 32 | - bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); |
61 | +++ b/scripts/block-coroutine-wrapper.py | 33 | + bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); |
62 | @@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str, | 34 | if (local_err != NULL) { |
63 | raise ValueError(f"no_co function can't be rdlock: {self.name}") | 35 | error_propagate(errp, local_err); |
64 | self.target_name = f'{subsystem}_{subname}' | 36 | goto fail; |
65 | |||
66 | - t = self.args[0].type | ||
67 | - if t == 'BlockDriverState *': | ||
68 | - ctx = 'bdrv_get_aio_context(bs)' | ||
69 | - elif t == 'BdrvChild *': | ||
70 | - ctx = 'bdrv_get_aio_context(child->bs)' | ||
71 | - elif t == 'BlockBackend *': | ||
72 | - ctx = 'blk_get_aio_context(blk)' | ||
73 | - else: | ||
74 | - ctx = 'qemu_get_aio_context()' | ||
75 | - self.ctx = ctx | ||
76 | + self.ctx = self.gen_ctx() | ||
77 | |||
78 | self.get_result = 's->ret = ' | ||
79 | self.ret = 'return s.ret;' | ||
80 | @@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str, | ||
81 | self.co_ret = '' | ||
82 | self.return_field = '' | ||
83 | |||
84 | + def gen_ctx(self, prefix: str = '') -> str: | ||
85 | + t = self.args[0].type | ||
86 | + if t == 'BlockDriverState *': | ||
87 | + return f'bdrv_get_aio_context({prefix}bs)' | ||
88 | + elif t == 'BdrvChild *': | ||
89 | + return f'bdrv_get_aio_context({prefix}child->bs)' | ||
90 | + elif t == 'BlockBackend *': | ||
91 | + return f'blk_get_aio_context({prefix}blk)' | ||
92 | + else: | ||
93 | + return 'qemu_get_aio_context()' | ||
94 | + | ||
95 | def gen_list(self, format: str) -> str: | ||
96 | return ', '.join(format.format_map(arg.__dict__) for arg in self.args) | ||
97 | |||
98 | @@ -XXX,XX +XXX,XX @@ def gen_no_co_wrapper(func: FuncDecl) -> str: | ||
99 | static void {name}_bh(void *opaque) | ||
100 | {{ | ||
101 | {struct_name} *s = opaque; | ||
102 | + AioContext *ctx = {func.gen_ctx('s->')}; | ||
103 | |||
104 | + aio_context_acquire(ctx); | ||
105 | {func.get_result}{name}({ func.gen_list('s->{name}') }); | ||
106 | + aio_context_release(ctx); | ||
107 | |||
108 | aio_co_wake(s->co); | ||
109 | }} | ||
110 | -- | 37 | -- |
111 | 2.40.1 | 38 | 2.13.6 |
39 | |||
40 | diff view generated by jsdifflib |
1 | bdrv_open_backing_file() calls bdrv_open_inherit(), so all callers must | 1 | The bdrv_reopen*() implementation doesn't like it if the graph is |
---|---|---|---|
2 | hold the main AioContext lock. | 2 | changed between queuing nodes for reopen and actually reopening them |
3 | (one of the reasons is that queuing can be recursive). | ||
4 | |||
5 | So instead of draining the device only in bdrv_reopen_multiple(), | ||
6 | require that callers already drained all affected nodes, and assert this | ||
7 | in bdrv_reopen_queue(). | ||
3 | 8 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Message-Id: <20230525124713.401149-6-kwolf@redhat.com> | 10 | Reviewed-by: Fam Zheng <famz@redhat.com> |
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | --- | 11 | --- |
9 | block.c | 2 ++ | 12 | block.c | 23 ++++++++++++++++------- |
10 | block/mirror.c | 6 ++++++ | 13 | block/replication.c | 6 ++++++ |
11 | 2 files changed, 8 insertions(+) | 14 | qemu-io-cmds.c | 3 +++ |
15 | 3 files changed, 25 insertions(+), 7 deletions(-) | ||
12 | 16 | ||
13 | diff --git a/block.c b/block.c | 17 | diff --git a/block.c b/block.c |
14 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block.c | 19 | --- a/block.c |
16 | +++ b/block.c | 20 | +++ b/block.c |
17 | @@ -XXX,XX +XXX,XX @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, | 21 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, |
18 | * itself, all options starting with "${bdref_key}." are considered part of the | 22 | * returns a pointer to bs_queue, which is either the newly allocated |
19 | * BlockdevRef. | 23 | * bs_queue, or the existing bs_queue being used. |
20 | * | 24 | * |
21 | + * The caller must hold the main AioContext lock. | 25 | + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). |
22 | + * | ||
23 | * TODO Can this be unified with bdrv_open_image()? | ||
24 | */ | 26 | */ |
25 | int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, | 27 | static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
26 | diff --git a/block/mirror.c b/block/mirror.c | 28 | BlockDriverState *bs, |
27 | index XXXXXXX..XXXXXXX 100644 | 29 | @@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
28 | --- a/block/mirror.c | 30 | BdrvChild *child; |
29 | +++ b/block/mirror.c | 31 | QDict *old_options, *explicit_options; |
30 | @@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job) | 32 | |
31 | bool abort = job->ret < 0; | 33 | + /* Make sure that the caller remembered to use a drained section. This is |
32 | int ret = 0; | 34 | + * important to avoid graph changes between the recursive queuing here and |
33 | 35 | + * bdrv_reopen_multiple(). */ | |
34 | + GLOBAL_STATE_CODE(); | 36 | + assert(bs->quiesce_counter > 0); |
35 | + | 37 | + |
36 | if (s->prepared) { | 38 | if (bs_queue == NULL) { |
37 | return 0; | 39 | bs_queue = g_new0(BlockReopenQueue, 1); |
40 | QSIMPLEQ_INIT(bs_queue); | ||
41 | @@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, | ||
42 | * If all devices prepare successfully, then the changes are committed | ||
43 | * to all devices. | ||
44 | * | ||
45 | + * All affected nodes must be drained between bdrv_reopen_queue() and | ||
46 | + * bdrv_reopen_multiple(). | ||
47 | */ | ||
48 | int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) | ||
49 | { | ||
50 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er | ||
51 | |||
52 | assert(bs_queue != NULL); | ||
53 | |||
54 | - aio_context_release(ctx); | ||
55 | - bdrv_drain_all_begin(); | ||
56 | - aio_context_acquire(ctx); | ||
57 | - | ||
58 | QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { | ||
59 | + assert(bs_entry->state.bs->quiesce_counter > 0); | ||
60 | if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { | ||
61 | error_propagate(errp, local_err); | ||
62 | goto cleanup; | ||
63 | @@ -XXX,XX +XXX,XX @@ cleanup: | ||
38 | } | 64 | } |
39 | s->prepared = true; | 65 | g_free(bs_queue); |
40 | 66 | ||
41 | + aio_context_acquire(qemu_get_aio_context()); | 67 | - bdrv_drain_all_end(); |
68 | - | ||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) | ||
73 | { | ||
74 | int ret = -1; | ||
75 | Error *local_err = NULL; | ||
76 | - BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); | ||
77 | + BlockReopenQueue *queue; | ||
78 | |||
79 | + bdrv_subtree_drained_begin(bs); | ||
42 | + | 80 | + |
43 | mirror_top_bs = s->mirror_top_bs; | 81 | + queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); |
44 | bs_opaque = mirror_top_bs->opaque; | 82 | ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); |
45 | src = mirror_top_bs->backing->bs; | 83 | if (local_err != NULL) { |
46 | @@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job) | 84 | error_propagate(errp, local_err); |
47 | bdrv_unref(mirror_top_bs); | 85 | } |
48 | bdrv_unref(src); | 86 | + |
49 | 87 | + bdrv_subtree_drained_end(bs); | |
50 | + aio_context_release(qemu_get_aio_context()); | ||
51 | + | 88 | + |
52 | return ret; | 89 | return ret; |
53 | } | 90 | } |
54 | 91 | ||
92 | diff --git a/block/replication.c b/block/replication.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/block/replication.c | ||
95 | +++ b/block/replication.c | ||
96 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, | ||
97 | new_secondary_flags = s->orig_secondary_flags; | ||
98 | } | ||
99 | |||
100 | + bdrv_subtree_drained_begin(s->hidden_disk->bs); | ||
101 | + bdrv_subtree_drained_begin(s->secondary_disk->bs); | ||
102 | + | ||
103 | if (orig_hidden_flags != new_hidden_flags) { | ||
104 | reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, | ||
105 | new_hidden_flags); | ||
106 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, | ||
107 | reopen_queue, &local_err); | ||
108 | error_propagate(errp, local_err); | ||
109 | } | ||
110 | + | ||
111 | + bdrv_subtree_drained_end(s->hidden_disk->bs); | ||
112 | + bdrv_subtree_drained_end(s->secondary_disk->bs); | ||
113 | } | ||
114 | |||
115 | static void backup_job_cleanup(BlockDriverState *bs) | ||
116 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/qemu-io-cmds.c | ||
119 | +++ b/qemu-io-cmds.c | ||
120 | @@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) | ||
121 | opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; | ||
122 | qemu_opts_reset(&reopen_opts); | ||
123 | |||
124 | + bdrv_subtree_drained_begin(bs); | ||
125 | brq = bdrv_reopen_queue(NULL, bs, opts, flags); | ||
126 | bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); | ||
127 | + bdrv_subtree_drained_end(bs); | ||
128 | + | ||
129 | if (local_err) { | ||
130 | error_report_err(local_err); | ||
131 | } else { | ||
55 | -- | 132 | -- |
56 | 2.40.1 | 133 | 2.13.6 |
134 | |||
135 | diff view generated by jsdifflib |