1 | The following changes since commit 22dbfdecc3c52228d3489da3fe81da92b21197bf: | 1 | The following changes since commit dd2db39d78431ab5a0b78777afaab3d61e94533e: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20191010.0' into staging (2019-10-14 15:09:08 +0100) | 3 | Merge remote-tracking branch 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-01 21:23:26 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to a1406a9262a087d9ec9627b88da13c4590b61dae: | 9 | for you to fetch changes up to b317006a3f1f04191a7981cef83417cb2477213b: |
10 | 10 | ||
11 | iotests: Test large write request to qcow2 file (2019-10-14 17:12:48 +0200) | 11 | docs/secure-coding-practices: Describe how to use 'null-co' block driver (2021-06-02 14:29:14 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches: | 14 | Block layer patches |
15 | 15 | ||
16 | - block: Fix crash with qcow2 partial cluster COW with small cluster | 16 | - NBD server: Fix crashes related to switching between AioContexts |
17 | sizes (misaligned write requests with BDRV_REQ_NO_FALLBACK) | 17 | - file-posix: Workaround for discard/write_zeroes on buggy filesystems |
18 | - qcow2: Fix integer overflow potentially causing corruption with huge | 18 | - Follow-up fixes for the reopen vs. permission changes |
19 | requests | 19 | - quorum: Fix error handling for flush |
20 | - vhdx: Detect truncated image files | 20 | - block-copy: Refactor copy_range handling |
21 | - tools: Support help options for --object | 21 | - docs: Describe how to use 'null-co' block driver |
22 | - Various block-related replay improvements | ||
23 | - iotests/028: Fix for long $TEST_DIRs | ||
24 | 22 | ||
25 | ---------------------------------------------------------------- | 23 | ---------------------------------------------------------------- |
26 | Alberto Garcia (1): | 24 | Lukas Straub (1): |
27 | block: Reject misaligned write requests with BDRV_REQ_NO_FALLBACK | 25 | block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk |
28 | 26 | ||
29 | Kevin Wolf (4): | 27 | Philippe Mathieu-Daudé (1): |
30 | vl: Split off user_creatable_print_help() | 28 | docs/secure-coding-practices: Describe how to use 'null-co' block driver |
31 | qemu-io: Support help options for --object | ||
32 | qemu-img: Support help options for --object | ||
33 | qemu-nbd: Support help options for --object | ||
34 | 29 | ||
35 | Max Reitz (3): | 30 | Sergio Lopez (2): |
36 | iotests/028: Fix for long $TEST_DIRs | 31 | block-backend: add drained_poll |
37 | qcow2: Limit total allocation range to INT_MAX | 32 | nbd/server: Use drained block ops to quiesce the server |
38 | iotests: Test large write request to qcow2 file | ||
39 | 33 | ||
40 | Pavel Dovgaluk (6): | 34 | Thomas Huth (2): |
41 | block: implement bdrv_snapshot_goto for blkreplay | 35 | block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS |
42 | replay: disable default snapshot for record/replay | 36 | block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE |
43 | replay: update docs for record/replay with block devices | ||
44 | replay: don't drain/flush bdrv queue while RR is working | ||
45 | replay: finish record/replay before closing the disks | ||
46 | replay: add BH oneshot event for block layer | ||
47 | 37 | ||
48 | Peter Lieven (1): | 38 | Vladimir Sementsov-Ogievskiy (14): |
49 | block/vhdx: add check for truncated image files | 39 | qemu-io-cmds: assert that we don't have .perm requested in no-blk case |
40 | block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash | ||
41 | block/vvfat: fix vvfat_child_perm crash | ||
42 | block: consistently use bdrv_is_read_only() | ||
43 | block: drop BlockDriverState::read_only | ||
44 | block: drop BlockBackendRootState::read_only | ||
45 | block: document child argument of bdrv_attach_child_common() | ||
46 | block-backend: improve blk_root_get_parent_desc() | ||
47 | block: improve bdrv_child_get_parent_desc() | ||
48 | block/vvfat: inherit child_vvfat_qcow from child_of_bds | ||
49 | block: simplify bdrv_child_user_desc() | ||
50 | block: improve permission conflict error message | ||
51 | block-copy: fix block_copy_task_entry() progress update | ||
52 | block-copy: refactor copy_range handling | ||
50 | 53 | ||
51 | docs/replay.txt | 12 +++- | 54 | docs/devel/secure-coding-practices.rst | 9 ++++ |
52 | include/qom/object_interfaces.h | 12 ++++ | 55 | include/block/block.h | 1 + |
53 | include/sysemu/replay.h | 4 ++ | 56 | include/block/block_int.h | 2 - |
54 | replay/replay-internal.h | 1 + | 57 | include/sysemu/block-backend.h | 4 ++ |
55 | block/blkreplay.c | 8 +++ | 58 | block.c | 82 ++++++++++++++++++++-------------- |
56 | block/block-backend.c | 9 ++- | 59 | block/block-backend.c | 26 +++++------ |
57 | block/io.c | 39 ++++++++++++- | 60 | block/block-copy.c | 80 ++++++++++++++++++++++----------- |
58 | block/iscsi.c | 5 +- | 61 | block/commit.c | 2 +- |
59 | block/nfs.c | 6 +- | 62 | block/file-posix.c | 29 ++++++++---- |
60 | block/null.c | 4 +- | 63 | block/io.c | 4 +- |
61 | block/nvme.c | 6 +- | 64 | block/qapi.c | 2 +- |
62 | block/qcow2-cluster.c | 5 +- | 65 | block/qcow2-snapshot.c | 2 +- |
63 | block/rbd.c | 5 +- | 66 | block/qcow2.c | 5 +-- |
64 | block/vhdx.c | 120 ++++++++++++++++++++++++++++++++++------ | 67 | block/quorum.c | 2 +- |
65 | block/vxhs.c | 5 +- | 68 | block/snapshot.c | 2 +- |
66 | cpus.c | 2 - | 69 | block/vhdx-log.c | 2 +- |
67 | qemu-img.c | 34 +++++++----- | 70 | block/vvfat.c | 14 +++--- |
68 | qemu-io.c | 9 ++- | 71 | blockdev.c | 3 +- |
69 | qemu-nbd.c | 9 ++- | 72 | nbd/server.c | 82 +++++++++++++++++++++++++--------- |
70 | qom/object_interfaces.c | 61 ++++++++++++++++++++ | 73 | qemu-io-cmds.c | 14 +++++- |
71 | replay/replay-events.c | 16 ++++++ | 74 | tests/unit/test-block-iothread.c | 6 --- |
72 | replay/replay.c | 2 + | 75 | tests/qemu-iotests/283.out | 2 +- |
73 | stubs/replay-user.c | 9 +++ | 76 | tests/qemu-iotests/307.out | 2 +- |
74 | vl.c | 63 ++++----------------- | 77 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- |
75 | stubs/Makefile.objs | 1 + | 78 | 24 files changed, 241 insertions(+), 138 deletions(-) |
76 | tests/qemu-iotests/028 | 11 +++- | ||
77 | tests/qemu-iotests/028.out | 1 - | ||
78 | tests/qemu-iotests/268 | 55 ++++++++++++++++++ | ||
79 | tests/qemu-iotests/268.out | 7 +++ | ||
80 | tests/qemu-iotests/270 | 83 +++++++++++++++++++++++++++ | ||
81 | tests/qemu-iotests/270.out | 9 +++ | ||
82 | tests/qemu-iotests/group | 2 + | ||
83 | 32 files changed, 504 insertions(+), 111 deletions(-) | ||
84 | create mode 100644 stubs/replay-user.c | ||
85 | create mode 100755 tests/qemu-iotests/268 | ||
86 | create mode 100644 tests/qemu-iotests/268.out | ||
87 | create mode 100755 tests/qemu-iotests/270 | ||
88 | create mode 100644 tests/qemu-iotests/270.out | ||
89 | 79 | ||
80 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Lukas Straub <lukasstraub2@web.de> | ||
1 | 2 | ||
3 | The quorum block driver uses a custom flush callback to handle the | ||
4 | case when some children return io errors. In that case it still | ||
5 | returns success if enough children are healthy. | ||
6 | However, it provides it as the .bdrv_co_flush_to_disk callback, not | ||
7 | as .bdrv_co_flush. This causes the block layer to do it's own | ||
8 | generic flushing for the children instead, which doesn't handle | ||
9 | errors properly. | ||
10 | |||
11 | Fix this by providing .bdrv_co_flush instead of | ||
12 | .bdrv_co_flush_to_disk so the block layer uses the custom flush | ||
13 | callback. | ||
14 | |||
15 | Signed-off-by: Lukas Straub <lukasstraub2@web.de> | ||
16 | Reported-by: Minghao Yuan <meeho@qq.com> | ||
17 | Message-Id: <20210518134214.11ccf05f@gecko.fritz.box> | ||
18 | Tested-by: Zhang Chen <chen.zhang@intel.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
20 | --- | ||
21 | block/quorum.c | 2 +- | ||
22 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
23 | |||
24 | diff --git a/block/quorum.c b/block/quorum.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/block/quorum.c | ||
27 | +++ b/block/quorum.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_quorum = { | ||
29 | .bdrv_dirname = quorum_dirname, | ||
30 | .bdrv_co_block_status = quorum_co_block_status, | ||
31 | |||
32 | - .bdrv_co_flush_to_disk = quorum_co_flush, | ||
33 | + .bdrv_co_flush = quorum_co_flush, | ||
34 | |||
35 | .bdrv_getlength = quorum_getlength, | ||
36 | |||
37 | -- | ||
38 | 2.30.2 | ||
39 | |||
40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | Coverity thinks blk may be NULL. It's a false-positive, as described in | ||
4 | a new comment. | ||
5 | |||
6 | Fixes: Coverity CID 1453194 | ||
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20210519090532.3753-1-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | qemu-io-cmds.c | 14 ++++++++++++-- | ||
12 | 1 file changed, 12 insertions(+), 2 deletions(-) | ||
13 | |||
14 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/qemu-io-cmds.c | ||
17 | +++ b/qemu-io-cmds.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, | ||
19 | return -EINVAL; | ||
20 | } | ||
21 | |||
22 | - /* Request additional permissions if necessary for this command. The caller | ||
23 | + /* | ||
24 | + * Request additional permissions if necessary for this command. The caller | ||
25 | * is responsible for restoring the original permissions afterwards if this | ||
26 | - * is what it wants. */ | ||
27 | + * is what it wants. | ||
28 | + * | ||
29 | + * Coverity thinks that blk may be NULL in the following if condition. It's | ||
30 | + * not so: in init_check_command() we fail if blk is NULL for command with | ||
31 | + * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in | ||
32 | + * qemuio_add_command() we assert that command with non-zero .perm field | ||
33 | + * doesn't set this flags. So, the following assertion is to silence | ||
34 | + * Coverity: | ||
35 | + */ | ||
36 | + assert(blk || !ct->perm); | ||
37 | if (ct->perm && blk_is_available(blk)) { | ||
38 | uint64_t orig_perm, orig_shared_perm; | ||
39 | blk_get_perm(blk, &orig_perm, &orig_shared_perm); | ||
40 | -- | ||
41 | 2.30.2 | ||
42 | |||
43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | Commit 3ca1f3225727419ba573673b744edac10904276f | ||
4 | "block: BdrvChildClass: add .get_parent_aio_context handler" introduced | ||
5 | new handler and commit 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
6 | "block: drop ctx argument from bdrv_root_attach_child" made a generic | ||
7 | use of it. But 3ca1f3225727419ba573673b744edac10904276f didn't update | ||
8 | child_vvfat_qcow. Fix that. | ||
9 | |||
10 | Before that fix the command | ||
11 | |||
12 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
13 | -drive file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
14 | |||
15 | crashes: | ||
16 | |||
17 | 1 bdrv_child_get_parent_aio_context (c=0x559d62426d20) | ||
18 | at ../block.c:1440 | ||
19 | 2 bdrv_attach_child_common | ||
20 | (child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", | ||
21 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
22 | perm=3, shared_perm=4, opaque=0x559d62445690, | ||
23 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) | ||
24 | at ../block.c:2795 | ||
25 | 3 bdrv_attach_child_noperm | ||
26 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
27 | child_name=0x559d606f9e3d "write-target", | ||
28 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
29 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at | ||
30 | ../block.c:2855 | ||
31 | 4 bdrv_attach_child | ||
32 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
33 | child_name=0x559d606f9e3d "write-target", | ||
34 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
35 | errp=0x7ffc74c2ae60) at ../block.c:2953 | ||
36 | 5 bdrv_open_child | ||
37 | (filename=0x559d62464b80 "/var/tmp/vl.h3TIS4", | ||
38 | options=0x559d6246ec20, bdref_key=0x559d606f9e3d "write-target", | ||
39 | parent=0x559d62445690, child_class=0x559d60c58d20 | ||
40 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
41 | errp=0x7ffc74c2ae60) at ../block.c:3351 | ||
42 | 6 enable_write_target (bs=0x559d62445690, errp=0x7ffc74c2ae60) at | ||
43 | ../block/vvfat.c:3176 | ||
44 | 7 vvfat_open (bs=0x559d62445690, options=0x559d6244adb0, flags=155650, | ||
45 | errp=0x7ffc74c2ae60) at ../block/vvfat.c:1236 | ||
46 | 8 bdrv_open_driver (bs=0x559d62445690, drv=0x559d60d4f7e0 | ||
47 | <bdrv_vvfat>, node_name=0x0, | ||
48 | options=0x559d6244adb0, open_flags=155650, | ||
49 | errp=0x7ffc74c2af70) at ../block.c:1557 | ||
50 | 9 bdrv_open_common (bs=0x559d62445690, file=0x0, | ||
51 | options=0x559d6244adb0, errp=0x7ffc74c2af70) at | ||
52 | ... | ||
53 | |||
54 | (gdb) fr 1 | ||
55 | #1 0x0000559d603ea3bf in bdrv_child_get_parent_aio_context | ||
56 | (c=0x559d62426d20) at ../block.c:1440 | ||
57 | 1440 return c->klass->get_parent_aio_context(c); | ||
58 | (gdb) p c->klass | ||
59 | $1 = (const BdrvChildClass *) 0x559d60c58d20 <child_vvfat_qcow> | ||
60 | (gdb) p c->klass->get_parent_aio_context | ||
61 | $2 = (AioContext *(*)(BdrvChild *)) 0x0 | ||
62 | |||
63 | Fixes: 3ca1f3225727419ba573673b744edac10904276f | ||
64 | Fixes: 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
65 | Reported-by: John Arbuckle <programmingkidx@gmail.com> | ||
66 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
67 | Message-Id: <20210524101257.119377-2-vsementsov@virtuozzo.com> | ||
68 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
69 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
70 | --- | ||
71 | include/block/block.h | 1 + | ||
72 | block.c | 4 ++-- | ||
73 | block/vvfat.c | 1 + | ||
74 | 3 files changed, 4 insertions(+), 2 deletions(-) | ||
75 | |||
76 | diff --git a/include/block/block.h b/include/block/block.h | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/include/block/block.h | ||
79 | +++ b/include/block/block.h | ||
80 | @@ -XXX,XX +XXX,XX @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, | ||
81 | bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, | ||
82 | GSList **ignore, Error **errp); | ||
83 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); | ||
84 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); | ||
85 | |||
86 | int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); | ||
87 | int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); | ||
88 | diff --git a/block.c b/block.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/block.c | ||
91 | +++ b/block.c | ||
92 | @@ -XXX,XX +XXX,XX @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | -static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) | ||
97 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) | ||
98 | { | ||
99 | BlockDriverState *bs = c->opaque; | ||
100 | |||
101 | @@ -XXX,XX +XXX,XX @@ const BdrvChildClass child_of_bds = { | ||
102 | .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, | ||
103 | .set_aio_ctx = bdrv_child_cb_set_aio_ctx, | ||
104 | .update_filename = bdrv_child_cb_update_filename, | ||
105 | - .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, | ||
106 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
107 | }; | ||
108 | |||
109 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) | ||
110 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/block/vvfat.c | ||
113 | +++ b/block/vvfat.c | ||
114 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | ||
115 | static const BdrvChildClass child_vvfat_qcow = { | ||
116 | .parent_is_bds = true, | ||
117 | .inherit_options = vvfat_qcow_options, | ||
118 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
119 | }; | ||
120 | |||
121 | static int enable_write_target(BlockDriverState *bs, Error **errp) | ||
122 | -- | ||
123 | 2.30.2 | ||
124 | |||
125 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | It's wrong to rely on s->qcow in vvfat_child_perm, as on permission | ||
4 | update during bdrv_open_child() call this field is not set yet. | ||
5 | |||
6 | Still prior to aa5a04c7db27eea6b36de32f241b155f0d9ce34d, it didn't | ||
7 | crash, as bdrv_open_child passed NULL as child to bdrv_child_perm(), | ||
8 | and NULL was equal to NULL in assertion (still, it was bad guarantee | ||
9 | for child being s->qcow, not backing :). | ||
10 | |||
11 | Since aa5a04c7db27eea6b36de32f241b155f0d9ce34d | ||
12 | "add bdrv_attach_child_noperm" bdrv_refresh_perms called on parent node | ||
13 | when attaching child, and new correct child pointer is passed to | ||
14 | .bdrv_child_perm. Still, s->qcow is NULL at the moment. Let's rely only | ||
15 | on role instead. | ||
16 | |||
17 | Without that fix, | ||
18 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
19 | -drive \ | ||
20 | file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
21 | |||
22 | crashes: | ||
23 | (gdb) bt | ||
24 | 0 raise () at /lib64/libc.so.6 | ||
25 | 1 abort () at /lib64/libc.so.6 | ||
26 | 2 _nl_load_domain.cold () at /lib64/libc.so.6 | ||
27 | 3 annobin_assert.c_end () at /lib64/libc.so.6 | ||
28 | 4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
29 | reopen_queue=0x0, perm=0, shared=31, | ||
30 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
31 | ../block/vvfat.c:3214 | ||
32 | 5 bdrv_child_perm (bs=0x559186f3d690, child_bs=0x559186f60190, | ||
33 | c=0x559186f1ed20, role=3, reopen_queue=0x0, | ||
34 | parent_perm=0, parent_shared=31, | ||
35 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) | ||
36 | at ../block.c:2094 | ||
37 | 6 bdrv_node_refresh_perm (bs=0x559186f3d690, q=0x0, | ||
38 | tran=0x559186f65850, errp=0x7ffe56f28530) at | ||
39 | ../block.c:2336 | ||
40 | 7 bdrv_list_refresh_perms (list=0x559186db5b90 = {...}, q=0x0, | ||
41 | tran=0x559186f65850, errp=0x7ffe56f28530) | ||
42 | at ../block.c:2358 | ||
43 | 8 bdrv_refresh_perms (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
44 | ../block.c:2419 | ||
45 | 9 bdrv_attach_child | ||
46 | (parent_bs=0x559186f3d690, child_bs=0x559186f60190, | ||
47 | child_name=0x559184d83e3d "write-target", | ||
48 | child_class=0x5591852f3b00 <child_vvfat_qcow>, child_role=3, | ||
49 | errp=0x7ffe56f28530) at ../block.c:2959 | ||
50 | 10 bdrv_open_child | ||
51 | (filename=0x559186f5cb80 "/var/tmp/vl.7WYmFU", | ||
52 | options=0x559186f66c20, bdref_key=0x559184d83e3d "write-target", | ||
53 | parent=0x559186f3d690, child_class=0x5591852f3b00 | ||
54 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
55 | errp=0x7ffe56f28530) at ../block.c:3351 | ||
56 | 11 enable_write_target (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
57 | ../block/vvfat.c:3177 | ||
58 | 12 vvfat_open (bs=0x559186f3d690, options=0x559186f42db0, flags=155650, | ||
59 | errp=0x7ffe56f28530) at ../block/vvfat.c:1236 | ||
60 | 13 bdrv_open_driver (bs=0x559186f3d690, drv=0x5591853d97e0 | ||
61 | <bdrv_vvfat>, node_name=0x0, | ||
62 | options=0x559186f42db0, open_flags=155650, | ||
63 | errp=0x7ffe56f28640) at ../block.c:1557 | ||
64 | 14 bdrv_open_common (bs=0x559186f3d690, file=0x0, | ||
65 | options=0x559186f42db0, errp=0x7ffe56f28640) at | ||
66 | ../block.c:1833 | ||
67 | ... | ||
68 | |||
69 | (gdb) fr 4 | ||
70 | #4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
71 | reopen_queue=0x0, perm=0, shared=31, | ||
72 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
73 | ../block/vvfat.c:3214 | ||
74 | 3214 assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
75 | (gdb) p role | ||
76 | $1 = 3 # BDRV_CHILD_DATA | BDRV_CHILD_METADATA | ||
77 | (gdb) p *c | ||
78 | $2 = {bs = 0x559186f60190, name = 0x559186f669d0 "write-target", klass | ||
79 | = 0x5591852f3b00 <child_vvfat_qcow>, role = 3, opaque = | ||
80 | 0x559186f3d690, perm = 3, shared_perm = 4, frozen = false, | ||
81 | parent_quiesce_counter = 0, next = {le_next = 0x0, le_prev = | ||
82 | 0x559186f41818}, next_parent = {le_next = 0x0, le_prev = | ||
83 | 0x559186f64320}} | ||
84 | (gdb) p s->qcow | ||
85 | $3 = (BdrvChild *) 0x0 | ||
86 | |||
87 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
88 | Message-Id: <20210524101257.119377-3-vsementsov@virtuozzo.com> | ||
89 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
90 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
91 | --- | ||
92 | block/vvfat.c | 7 ++----- | ||
93 | 1 file changed, 2 insertions(+), 5 deletions(-) | ||
94 | |||
95 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/block/vvfat.c | ||
98 | +++ b/block/vvfat.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
100 | uint64_t perm, uint64_t shared, | ||
101 | uint64_t *nperm, uint64_t *nshared) | ||
102 | { | ||
103 | - BDRVVVFATState *s = bs->opaque; | ||
104 | - | ||
105 | - assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
106 | - | ||
107 | - if (c == s->qcow) { | ||
108 | + if (role & BDRV_CHILD_DATA) { | ||
109 | /* This is a private node, nobody should try to attach to it */ | ||
110 | *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; | ||
111 | *nshared = BLK_PERM_WRITE_UNCHANGED; | ||
112 | } else { | ||
113 | + assert(role & BDRV_CHILD_COW); | ||
114 | /* The backing file is there so 'commit' can use it. vvfat doesn't | ||
115 | * access it in any way. */ | ||
116 | *nperm = 0; | ||
117 | -- | ||
118 | 2.30.2 | ||
119 | |||
120 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | The BDRV_REQ_NO_FALLBACK flag means that an operation should only be | 3 | It's better to use accessor function instead of bs->read_only directly. |
4 | performed if it can be offloaded or otherwise performed efficiently. | 4 | In some places use bdrv_is_writable() instead of |
5 | checking both BDRV_O_RDWR set and BDRV_O_INACTIVE not set. | ||
5 | 6 | ||
6 | However a misaligned write request requires a RMW so we should return | 7 | In bdrv_open_common() it's a bit strange to add one more variable, but |
7 | an error and let the caller decide how to proceed. | 8 | we are going to drop bs->read_only in the next patch, so new ro local |
9 | variable substitutes it here. | ||
8 | 10 | ||
9 | This hits an assertion since commit c8bb23cbdb if the required | 11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
10 | alignment is larger than the cluster size: | 12 | Message-Id: <20210527154056.70294-2-vsementsov@virtuozzo.com> |
11 | |||
12 | qemu-img create -f qcow2 -o cluster_size=2k img.qcow2 4G | ||
13 | qemu-io -c "open -o driver=qcow2,file.align=4k blkdebug::img.qcow2" \ | ||
14 | -c 'write 0 512' | ||
15 | qemu-io: block/io.c:1127: bdrv_driver_pwritev: Assertion `!(flags & BDRV_REQ_NO_FALLBACK)' failed. | ||
16 | Aborted | ||
17 | |||
18 | The reason is that when writing to an unallocated cluster we try to | ||
19 | skip the copy-on-write part and zeroize it using BDRV_REQ_NO_FALLBACK | ||
20 | instead, resulting in a write request that is too small (2KB cluster | ||
21 | size vs 4KB required alignment). | ||
22 | |||
23 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
24 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
25 | --- | 14 | --- |
26 | block/io.c | 7 +++++ | 15 | block.c | 11 +++++++---- |
27 | tests/qemu-iotests/268 | 55 ++++++++++++++++++++++++++++++++++++++ | 16 | block/block-backend.c | 2 +- |
28 | tests/qemu-iotests/268.out | 7 +++++ | 17 | block/commit.c | 2 +- |
29 | tests/qemu-iotests/group | 1 + | 18 | block/io.c | 4 ++-- |
30 | 4 files changed, 70 insertions(+) | 19 | block/qapi.c | 2 +- |
31 | create mode 100755 tests/qemu-iotests/268 | 20 | block/qcow2-snapshot.c | 2 +- |
32 | create mode 100644 tests/qemu-iotests/268.out | 21 | block/qcow2.c | 5 ++--- |
22 | block/snapshot.c | 2 +- | ||
23 | block/vhdx-log.c | 2 +- | ||
24 | 9 files changed, 17 insertions(+), 15 deletions(-) | ||
33 | 25 | ||
26 | diff --git a/block.c b/block.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/block.c | ||
29 | +++ b/block.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
31 | QemuOpts *opts; | ||
32 | BlockDriver *drv; | ||
33 | Error *local_err = NULL; | ||
34 | + bool ro; | ||
35 | |||
36 | assert(bs->file == NULL); | ||
37 | assert(options != NULL && bs->options != options); | ||
38 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
39 | |||
40 | bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
41 | |||
42 | - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { | ||
43 | - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { | ||
44 | + ro = bdrv_is_read_only(bs); | ||
45 | + | ||
46 | + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
47 | + if (!ro && bdrv_is_whitelisted(drv, true)) { | ||
48 | ret = bdrv_apply_auto_read_only(bs, NULL, NULL); | ||
49 | } else { | ||
50 | ret = -ENOTSUP; | ||
51 | } | ||
52 | if (ret < 0) { | ||
53 | error_setg(errp, | ||
54 | - !bs->read_only && bdrv_is_whitelisted(drv, true) | ||
55 | + !ro && bdrv_is_whitelisted(drv, true) | ||
56 | ? "Driver '%s' can only be used for read-only devices" | ||
57 | : "Driver '%s' is not whitelisted", | ||
58 | drv->format_name); | ||
59 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
60 | assert(qatomic_read(&bs->copy_on_read) == 0); | ||
61 | |||
62 | if (bs->open_flags & BDRV_O_COPY_ON_READ) { | ||
63 | - if (!bs->read_only) { | ||
64 | + if (!ro) { | ||
65 | bdrv_enable_copy_on_read(bs); | ||
66 | } else { | ||
67 | error_setg(errp, "Can't use copy-on-read on read-only device"); | ||
68 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/block/block-backend.c | ||
71 | +++ b/block/block-backend.c | ||
72 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | ||
73 | assert(blk->root); | ||
74 | |||
75 | blk->root_state.open_flags = blk->root->bs->open_flags; | ||
76 | - blk->root_state.read_only = blk->root->bs->read_only; | ||
77 | + blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | ||
78 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | ||
79 | } | ||
80 | |||
81 | diff --git a/block/commit.c b/block/commit.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/block/commit.c | ||
84 | +++ b/block/commit.c | ||
85 | @@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs) | ||
86 | return -EBUSY; | ||
87 | } | ||
88 | |||
89 | - ro = backing_file_bs->read_only; | ||
90 | + ro = bdrv_is_read_only(backing_file_bs); | ||
91 | |||
92 | if (ro) { | ||
93 | if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { | ||
34 | diff --git a/block/io.c b/block/io.c | 94 | diff --git a/block/io.c b/block/io.c |
35 | index XXXXXXX..XXXXXXX 100644 | 95 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/block/io.c | 96 | --- a/block/io.c |
37 | +++ b/block/io.c | 97 | +++ b/block/io.c |
38 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | 98 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, |
39 | return ret; | 99 | |
100 | bdrv_check_request(offset, bytes, &error_abort); | ||
101 | |||
102 | - if (bs->read_only) { | ||
103 | + if (bdrv_is_read_only(bs)) { | ||
104 | return -EPERM; | ||
40 | } | 105 | } |
41 | 106 | ||
42 | + /* If the request is misaligned then we can't make it efficient */ | 107 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
43 | + if ((flags & BDRV_REQ_NO_FALLBACK) && | 108 | if (new_bytes) { |
44 | + !QEMU_IS_ALIGNED(offset | bytes, align)) | 109 | bdrv_make_request_serialising(&req, 1); |
45 | + { | 110 | } |
46 | + return -ENOTSUP; | 111 | - if (bs->read_only) { |
47 | + } | 112 | + if (bdrv_is_read_only(bs)) { |
48 | + | 113 | error_setg(errp, "Image is read-only"); |
49 | bdrv_inc_in_flight(bs); | 114 | ret = -EACCES; |
50 | /* | 115 | goto out; |
51 | * Align write if necessary by performing a read-modify-write cycle. | 116 | diff --git a/block/qapi.c b/block/qapi.c |
52 | diff --git a/tests/qemu-iotests/268 b/tests/qemu-iotests/268 | ||
53 | new file mode 100755 | ||
54 | index XXXXXXX..XXXXXXX | ||
55 | --- /dev/null | ||
56 | +++ b/tests/qemu-iotests/268 | ||
57 | @@ -XXX,XX +XXX,XX @@ | ||
58 | +#!/usr/bin/env bash | ||
59 | +# | ||
60 | +# Test write request with required alignment larger than the cluster size | ||
61 | +# | ||
62 | +# Copyright (C) 2019 Igalia, S.L. | ||
63 | +# Author: Alberto Garcia <berto@igalia.com> | ||
64 | +# | ||
65 | +# This program is free software; you can redistribute it and/or modify | ||
66 | +# it under the terms of the GNU General Public License as published by | ||
67 | +# the Free Software Foundation; either version 2 of the License, or | ||
68 | +# (at your option) any later version. | ||
69 | +# | ||
70 | +# This program is distributed in the hope that it will be useful, | ||
71 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
72 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
73 | +# GNU General Public License for more details. | ||
74 | +# | ||
75 | +# You should have received a copy of the GNU General Public License | ||
76 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
77 | +# | ||
78 | + | ||
79 | +# creator | ||
80 | +owner=berto@igalia.com | ||
81 | + | ||
82 | +seq=`basename $0` | ||
83 | +echo "QA output created by $seq" | ||
84 | + | ||
85 | +status=1 # failure is the default! | ||
86 | + | ||
87 | +_cleanup() | ||
88 | +{ | ||
89 | + _cleanup_test_img | ||
90 | +} | ||
91 | +trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
92 | + | ||
93 | +# get standard environment, filters and checks | ||
94 | +. ./common.rc | ||
95 | +. ./common.filter | ||
96 | + | ||
97 | +_supported_fmt qcow2 | ||
98 | +_supported_proto file | ||
99 | + | ||
100 | +echo | ||
101 | +echo "== Required alignment larger than cluster size ==" | ||
102 | + | ||
103 | +CLUSTER_SIZE=2k _make_test_img 1M | ||
104 | +# Since commit c8bb23cbdb writing to an unallocated cluster fills the | ||
105 | +# empty COW areas with bdrv_write_zeroes(flags=BDRV_REQ_NO_FALLBACK) | ||
106 | +$QEMU_IO -c "open -o driver=$IMGFMT,file.align=4k blkdebug::$TEST_IMG" \ | ||
107 | + -c "write 0 512" | _filter_qemu_io | ||
108 | + | ||
109 | +# success, all done | ||
110 | +echo "*** done" | ||
111 | +rm -f $seq.full | ||
112 | +status=0 | ||
113 | diff --git a/tests/qemu-iotests/268.out b/tests/qemu-iotests/268.out | ||
114 | new file mode 100644 | ||
115 | index XXXXXXX..XXXXXXX | ||
116 | --- /dev/null | ||
117 | +++ b/tests/qemu-iotests/268.out | ||
118 | @@ -XXX,XX +XXX,XX @@ | ||
119 | +QA output created by 268 | ||
120 | + | ||
121 | +== Required alignment larger than cluster size == | ||
122 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 | ||
123 | +wrote 512/512 bytes at offset 0 | ||
124 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
125 | +*** done | ||
126 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | ||
127 | index XXXXXXX..XXXXXXX 100644 | 117 | index XXXXXXX..XXXXXXX 100644 |
128 | --- a/tests/qemu-iotests/group | 118 | --- a/block/qapi.c |
129 | +++ b/tests/qemu-iotests/group | 119 | +++ b/block/qapi.c |
130 | @@ -XXX,XX +XXX,XX @@ | 120 | @@ -XXX,XX +XXX,XX @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, |
131 | 265 rw auto quick | 121 | |
132 | 266 rw quick | 122 | info = g_malloc0(sizeof(*info)); |
133 | 267 rw auto quick snapshot | 123 | info->file = g_strdup(bs->filename); |
134 | +268 rw auto quick | 124 | - info->ro = bs->read_only; |
125 | + info->ro = bdrv_is_read_only(bs); | ||
126 | info->drv = g_strdup(bs->drv->format_name); | ||
127 | info->encrypted = bs->encrypted; | ||
128 | |||
129 | diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/qcow2-snapshot.c | ||
132 | +++ b/block/qcow2-snapshot.c | ||
133 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, | ||
134 | int new_l1_bytes; | ||
135 | int ret; | ||
136 | |||
137 | - assert(bs->read_only); | ||
138 | + assert(bdrv_is_read_only(bs)); | ||
139 | |||
140 | /* Search the snapshot */ | ||
141 | snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); | ||
142 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/block/qcow2.c | ||
145 | +++ b/block/qcow2.c | ||
146 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
147 | |||
148 | /* Clear unknown autoclear feature bits */ | ||
149 | update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; | ||
150 | - update_header = | ||
151 | - update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); | ||
152 | + update_header = update_header && bdrv_is_writable(bs); | ||
153 | if (update_header) { | ||
154 | s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; | ||
155 | } | ||
156 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
157 | bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; | ||
158 | |||
159 | /* Repair image if dirty */ | ||
160 | - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && | ||
161 | + if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && | ||
162 | (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { | ||
163 | BdrvCheckResult result = {0}; | ||
164 | |||
165 | diff --git a/block/snapshot.c b/block/snapshot.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/block/snapshot.c | ||
168 | +++ b/block/snapshot.c | ||
169 | @@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, | ||
170 | error_setg(errp, "snapshot_id and name are both NULL"); | ||
171 | return -EINVAL; | ||
172 | } | ||
173 | - if (!bs->read_only) { | ||
174 | + if (!bdrv_is_read_only(bs)) { | ||
175 | error_setg(errp, "Device is not readonly"); | ||
176 | return -EINVAL; | ||
177 | } | ||
178 | diff --git a/block/vhdx-log.c b/block/vhdx-log.c | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/block/vhdx-log.c | ||
181 | +++ b/block/vhdx-log.c | ||
182 | @@ -XXX,XX +XXX,XX @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, | ||
183 | } | ||
184 | |||
185 | if (logs.valid) { | ||
186 | - if (bs->read_only) { | ||
187 | + if (bdrv_is_read_only(bs)) { | ||
188 | bdrv_refresh_filename(bs); | ||
189 | ret = -EPERM; | ||
190 | error_setg(errp, | ||
135 | -- | 191 | -- |
136 | 2.20.1 | 192 | 2.30.2 |
137 | 193 | ||
138 | 194 | diff view generated by jsdifflib |
1 | From: Peter Lieven <pl@kamp.de> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | qemu is currently not able to detect truncated vhdx image files. | 3 | This variable is just a cache for !(bs->open_flags & BDRV_O_RDWR), |
4 | Add a basic check if all allocated blocks are reachable at open and | 4 | which we have to synchronize everywhere. Let's just drop it and |
5 | report all errors during bdrv_co_check. | 5 | consistently use bdrv_is_read_only(). |
6 | 6 | ||
7 | Signed-off-by: Peter Lieven <pl@kamp.de> | 7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
8 | Message-Id: <20210527154056.70294-3-vsementsov@virtuozzo.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 10 | --- |
10 | block/vhdx.c | 120 +++++++++++++++++++++++++++++++++++++++++++-------- | 11 | include/block/block_int.h | 1 - |
11 | 1 file changed, 103 insertions(+), 17 deletions(-) | 12 | block.c | 7 +------ |
13 | tests/unit/test-block-iothread.c | 6 ------ | ||
14 | 3 files changed, 1 insertion(+), 13 deletions(-) | ||
12 | 15 | ||
13 | diff --git a/block/vhdx.c b/block/vhdx.c | 16 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
14 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/vhdx.c | 18 | --- a/include/block/block_int.h |
16 | +++ b/block/vhdx.c | 19 | +++ b/include/block/block_int.h |
17 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { |
18 | #include "qemu/option.h" | 21 | * locking needed during I/O... |
19 | #include "qemu/crc32c.h" | 22 | */ |
20 | #include "qemu/bswap.h" | 23 | int open_flags; /* flags used to open the file, re-used for re-open */ |
21 | +#include "qemu/error-report.h" | 24 | - bool read_only; /* if true, the media is read only */ |
22 | #include "vhdx.h" | 25 | bool encrypted; /* if true, the media is encrypted */ |
23 | #include "migration/blocker.h" | 26 | bool sg; /* if true, the device is a /dev/sg* */ |
24 | #include "qemu/uuid.h" | 27 | bool probed; /* if true, format was probed rather than specified */ |
25 | @@ -XXX,XX +XXX,XX @@ static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length) | 28 | diff --git a/block.c b/block.c |
26 | end = start + length; | 29 | index XXXXXXX..XXXXXXX 100644 |
27 | QLIST_FOREACH(r, &s->regions, entries) { | 30 | --- a/block.c |
28 | if (!((start >= r->end) || (end <= r->start))) { | 31 | +++ b/block.c |
29 | + error_report("VHDX region %" PRIu64 "-%" PRIu64 " overlaps with " | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, |
30 | + "region %" PRIu64 "-%." PRIu64, start, end, r->start, | 33 | * image is inactivated. */ |
31 | + r->end); | 34 | bool bdrv_is_read_only(BlockDriverState *bs) |
32 | ret = -EINVAL; | 35 | { |
33 | goto exit; | 36 | - return bs->read_only; |
34 | } | 37 | + return !(bs->open_flags & BDRV_O_RDWR); |
35 | @@ -XXX,XX +XXX,XX @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s) | ||
36 | |||
37 | } | 38 | } |
38 | 39 | ||
39 | +static int vhdx_check_bat_entries(BlockDriverState *bs, int *errcnt) | 40 | int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, |
40 | +{ | 41 | @@ -XXX,XX +XXX,XX @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, |
41 | + BDRVVHDXState *s = bs->opaque; | ||
42 | + int64_t image_file_size = bdrv_getlength(bs->file->bs); | ||
43 | + uint64_t payblocks = s->chunk_ratio; | ||
44 | + uint64_t i; | ||
45 | + int ret = 0; | ||
46 | + | ||
47 | + if (image_file_size < 0) { | ||
48 | + error_report("Could not determinate VHDX image file size."); | ||
49 | + return image_file_size; | ||
50 | + } | ||
51 | + | ||
52 | + for (i = 0; i < s->bat_entries; i++) { | ||
53 | + if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == | ||
54 | + PAYLOAD_BLOCK_FULLY_PRESENT) { | ||
55 | + uint64_t offset = s->bat[i] & VHDX_BAT_FILE_OFF_MASK; | ||
56 | + /* | ||
57 | + * Allow that the last block exists only partially. The VHDX spec | ||
58 | + * states that the image file can only grow in blocksize increments, | ||
59 | + * but QEMU created images with partial last blocks in the past. | ||
60 | + */ | ||
61 | + uint32_t block_length = MIN(s->block_size, | ||
62 | + bs->total_sectors * BDRV_SECTOR_SIZE - i * s->block_size); | ||
63 | + /* | ||
64 | + * Check for BAT entry overflow. | ||
65 | + */ | ||
66 | + if (offset > INT64_MAX - s->block_size) { | ||
67 | + error_report("VHDX BAT entry %" PRIu64 " offset overflow.", i); | ||
68 | + ret = -EINVAL; | ||
69 | + if (!errcnt) { | ||
70 | + break; | ||
71 | + } | ||
72 | + (*errcnt)++; | ||
73 | + } | ||
74 | + /* | ||
75 | + * Check if fully allocated BAT entries do not reside after | ||
76 | + * end of the image file. | ||
77 | + */ | ||
78 | + if (offset >= image_file_size) { | ||
79 | + error_report("VHDX BAT entry %" PRIu64 " start offset %" PRIu64 | ||
80 | + " points after end of file (%" PRIi64 "). Image" | ||
81 | + " has probably been truncated.", | ||
82 | + i, offset, image_file_size); | ||
83 | + ret = -EINVAL; | ||
84 | + if (!errcnt) { | ||
85 | + break; | ||
86 | + } | ||
87 | + (*errcnt)++; | ||
88 | + } else if (offset + block_length > image_file_size) { | ||
89 | + error_report("VHDX BAT entry %" PRIu64 " end offset %" PRIu64 | ||
90 | + " points after end of file (%" PRIi64 "). Image" | ||
91 | + " has probably been truncated.", | ||
92 | + i, offset + block_length - 1, image_file_size); | ||
93 | + ret = -EINVAL; | ||
94 | + if (!errcnt) { | ||
95 | + break; | ||
96 | + } | ||
97 | + (*errcnt)++; | ||
98 | + } | ||
99 | + | ||
100 | + /* | ||
101 | + * verify populated BAT field file offsets against | ||
102 | + * region table and log entries | ||
103 | + */ | ||
104 | + if (payblocks--) { | ||
105 | + /* payload bat entries */ | ||
106 | + int ret2; | ||
107 | + ret2 = vhdx_region_check(s, offset, s->block_size); | ||
108 | + if (ret2 < 0) { | ||
109 | + ret = -EINVAL; | ||
110 | + if (!errcnt) { | ||
111 | + break; | ||
112 | + } | ||
113 | + (*errcnt)++; | ||
114 | + } | ||
115 | + } else { | ||
116 | + payblocks = s->chunk_ratio; | ||
117 | + /* | ||
118 | + * Once differencing files are supported, verify sector bitmap | ||
119 | + * blocks here | ||
120 | + */ | ||
121 | + } | ||
122 | + } | ||
123 | + } | ||
124 | + | ||
125 | + return ret; | ||
126 | +} | ||
127 | + | ||
128 | static void vhdx_close(BlockDriverState *bs) | ||
129 | { | ||
130 | BDRVVHDXState *s = bs->opaque; | ||
131 | @@ -XXX,XX +XXX,XX @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, | ||
132 | goto fail; | 42 | goto fail; |
133 | } | 43 | } |
134 | 44 | ||
135 | - uint64_t payblocks = s->chunk_ratio; | 45 | - bs->read_only = true; |
136 | - /* endian convert, and verify populated BAT field file offsets against | 46 | bs->open_flags &= ~BDRV_O_RDWR; |
137 | - * region table and log entries */ | 47 | |
138 | + /* endian convert populated BAT field entires */ | 48 | return 0; |
139 | for (i = 0; i < s->bat_entries; i++) { | 49 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, |
140 | s->bat[i] = le64_to_cpu(s->bat[i]); | ||
141 | - if (payblocks--) { | ||
142 | - /* payload bat entries */ | ||
143 | - if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == | ||
144 | - PAYLOAD_BLOCK_FULLY_PRESENT) { | ||
145 | - ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK, | ||
146 | - s->block_size); | ||
147 | - if (ret < 0) { | ||
148 | - goto fail; | ||
149 | - } | ||
150 | - } | ||
151 | - } else { | ||
152 | - payblocks = s->chunk_ratio; | ||
153 | - /* Once differencing files are supported, verify sector bitmap | ||
154 | - * blocks here */ | ||
155 | + } | ||
156 | + | ||
157 | + if (!(flags & BDRV_O_CHECK)) { | ||
158 | + ret = vhdx_check_bat_entries(bs, NULL); | ||
159 | + if (ret < 0) { | ||
160 | + goto fail; | ||
161 | } | ||
162 | } | 50 | } |
163 | 51 | ||
164 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vhdx_co_check(BlockDriverState *bs, | 52 | bs->drv = drv; |
165 | if (s->log_replayed_on_open) { | 53 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); |
166 | result->corruptions_fixed++; | 54 | bs->opaque = g_malloc0(drv->instance_size); |
167 | } | 55 | |
168 | + | 56 | if (drv->bdrv_file_open) { |
169 | + vhdx_check_bat_entries(bs, &result->corruptions); | 57 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, |
170 | + | 58 | trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, |
171 | return 0; | 59 | drv->format_name); |
60 | |||
61 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
62 | - | ||
63 | ro = bdrv_is_read_only(bs); | ||
64 | |||
65 | if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
66 | @@ -XXX,XX +XXX,XX @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) | ||
67 | bs->explicit_options = reopen_state->explicit_options; | ||
68 | bs->options = reopen_state->options; | ||
69 | bs->open_flags = reopen_state->flags; | ||
70 | - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); | ||
71 | bs->detect_zeroes = reopen_state->detect_zeroes; | ||
72 | |||
73 | if (reopen_state->replace_backing_bs) { | ||
74 | diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tests/unit/test-block-iothread.c | ||
77 | +++ b/tests/unit/test-block-iothread.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c) | ||
79 | g_assert_cmpint(ret, ==, -EINVAL); | ||
80 | |||
81 | /* Error: Read-only image */ | ||
82 | - c->bs->read_only = true; | ||
83 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
84 | |||
85 | ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
86 | g_assert_cmpint(ret, ==, -EACCES); | ||
87 | |||
88 | - c->bs->read_only = false; | ||
89 | c->bs->open_flags |= BDRV_O_RDWR; | ||
172 | } | 90 | } |
173 | 91 | ||
92 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_flush(BdrvChild *c) | ||
93 | g_assert_cmpint(ret, ==, 0); | ||
94 | |||
95 | /* Early success: Read-only image */ | ||
96 | - c->bs->read_only = true; | ||
97 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
98 | |||
99 | ret = bdrv_flush(c->bs); | ||
100 | g_assert_cmpint(ret, ==, 0); | ||
101 | |||
102 | - c->bs->read_only = false; | ||
103 | c->bs->open_flags |= BDRV_O_RDWR; | ||
104 | } | ||
105 | |||
106 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_blk_flush(BlockBackend *blk) | ||
107 | g_assert_cmpint(ret, ==, 0); | ||
108 | |||
109 | /* Early success: Read-only image */ | ||
110 | - bs->read_only = true; | ||
111 | bs->open_flags &= ~BDRV_O_RDWR; | ||
112 | |||
113 | ret = blk_flush(blk); | ||
114 | g_assert_cmpint(ret, ==, 0); | ||
115 | |||
116 | - bs->read_only = false; | ||
117 | bs->open_flags |= BDRV_O_RDWR; | ||
118 | } | ||
119 | |||
174 | -- | 120 | -- |
175 | 2.20.1 | 121 | 2.30.2 |
176 | 122 | ||
177 | 123 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | Instead of keeping additional boolean field, let's store the | ||
4 | information in BDRV_O_RDWR bit of BlockBackendRootState::open_flags. | ||
5 | |||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
7 | Message-Id: <20210527154056.70294-4-vsementsov@virtuozzo.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | --- | ||
10 | include/block/block_int.h | 1 - | ||
11 | block/block-backend.c | 10 ++-------- | ||
12 | blockdev.c | 3 +-- | ||
13 | 3 files changed, 3 insertions(+), 11 deletions(-) | ||
14 | |||
15 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/block/block_int.h | ||
18 | +++ b/include/block/block_int.h | ||
19 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
20 | |||
21 | struct BlockBackendRootState { | ||
22 | int open_flags; | ||
23 | - bool read_only; | ||
24 | BlockdevDetectZeroesOptions detect_zeroes; | ||
25 | }; | ||
26 | |||
27 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/block/block-backend.c | ||
30 | +++ b/block/block-backend.c | ||
31 | @@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk) | ||
32 | if (bs) { | ||
33 | return !bdrv_is_read_only(bs); | ||
34 | } else { | ||
35 | - return !blk->root_state.read_only; | ||
36 | + return blk->root_state.open_flags & BDRV_O_RDWR; | ||
37 | } | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | ||
41 | assert(blk->root); | ||
42 | |||
43 | blk->root_state.open_flags = blk->root->bs->open_flags; | ||
44 | - blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | ||
45 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) | ||
49 | */ | ||
50 | int blk_get_open_flags_from_root_state(BlockBackend *blk) | ||
51 | { | ||
52 | - int bs_flags; | ||
53 | - | ||
54 | - bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; | ||
55 | - bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; | ||
56 | - | ||
57 | - return bs_flags; | ||
58 | + return blk->root_state.open_flags; | ||
59 | } | ||
60 | |||
61 | BlockBackendRootState *blk_get_root_state(BlockBackend *blk) | ||
62 | diff --git a/blockdev.c b/blockdev.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/blockdev.c | ||
65 | +++ b/blockdev.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, | ||
67 | |||
68 | blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); | ||
69 | blk_rs = blk_get_root_state(blk); | ||
70 | - blk_rs->open_flags = bdrv_flags; | ||
71 | - blk_rs->read_only = read_only; | ||
72 | + blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR); | ||
73 | blk_rs->detect_zeroes = detect_zeroes; | ||
74 | |||
75 | qobject_unref(bs_opts); | ||
76 | -- | ||
77 | 2.30.2 | ||
78 | |||
79 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | A customer reported that running | ||
4 | |||
5 | qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2 | ||
6 | |||
7 | fails for them with the following error message when the images are | ||
8 | stored on a GPFS file system : | ||
9 | |||
10 | qemu-img: error while writing sector 0: Invalid argument | ||
11 | |||
12 | After analyzing the strace output, it seems like the problem is in | ||
13 | handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE) | ||
14 | returns EINVAL, which can apparently happen if the file system has | ||
15 | a different idea of the granularity of the operation. It's arguably | ||
16 | a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL | ||
17 | according to the man-page of fallocate(), but the file system is out | ||
18 | there in production and so we have to deal with it. In commit 294682cc3a | ||
19 | ("block: workaround for unaligned byte range in fallocate()") we also | ||
20 | already applied the a work-around for the same problem to the earlier | ||
21 | fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the | ||
22 | PUNCH_HOLE call. But instead of silently catching and returning | ||
23 | -ENOTSUP (which causes the caller to fall back to writing zeroes), | ||
24 | let's rather inform the user once about the buggy file system and | ||
25 | try the other fallback instead. | ||
26 | |||
27 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
28 | Message-Id: <20210527172020.847617-2-thuth@redhat.com> | ||
29 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
30 | --- | ||
31 | block/file-posix.c | 11 +++++++++++ | ||
32 | 1 file changed, 11 insertions(+) | ||
33 | |||
34 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/block/file-posix.c | ||
37 | +++ b/block/file-posix.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | ||
39 | return ret; | ||
40 | } | ||
41 | s->has_fallocate = false; | ||
42 | + } else if (ret == -EINVAL) { | ||
43 | + /* | ||
44 | + * Some file systems like older versions of GPFS do not like un- | ||
45 | + * aligned byte ranges, and return EINVAL in such a case, though | ||
46 | + * they should not do it according to the man-page of fallocate(). | ||
47 | + * Warn about the bad filesystem and try the final fallback instead. | ||
48 | + */ | ||
49 | + warn_report_once("Your file system is misbehaving: " | ||
50 | + "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " | ||
51 | + "Please report this bug to your file sytem " | ||
52 | + "vendor."); | ||
53 | } else if (ret != -ENOTSUP) { | ||
54 | return ret; | ||
55 | } else { | ||
56 | -- | ||
57 | 2.30.2 | ||
58 | |||
59 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | If fallocate(... FALLOC_FL_ZERO_RANGE ...) returns EINVAL, it's likely | ||
4 | an indication that the file system is buggy and does not implement | ||
5 | unaligned accesses right. We still might be lucky with the other | ||
6 | fallback fallocate() calls later in this function, though, so we should | ||
7 | not return immediately and try the others first. | ||
8 | Since FALLOC_FL_ZERO_RANGE could also return EINVAL if the file descriptor | ||
9 | is not a regular file, we ignore this filesystem bug silently, without | ||
10 | printing an error message for the user. | ||
11 | |||
12 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
13 | Message-Id: <20210527172020.847617-3-thuth@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
15 | --- | ||
16 | block/file-posix.c | 18 +++++++++--------- | ||
17 | 1 file changed, 9 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/file-posix.c | ||
22 | +++ b/block/file-posix.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | ||
24 | if (s->has_write_zeroes) { | ||
25 | int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, | ||
26 | aiocb->aio_offset, aiocb->aio_nbytes); | ||
27 | - if (ret == -EINVAL) { | ||
28 | - /* | ||
29 | - * Allow falling back to pwrite for file systems that | ||
30 | - * do not support fallocate() for an unaligned byte range. | ||
31 | - */ | ||
32 | - return -ENOTSUP; | ||
33 | - } | ||
34 | - if (ret == 0 || ret != -ENOTSUP) { | ||
35 | + if (ret == -ENOTSUP) { | ||
36 | + s->has_write_zeroes = false; | ||
37 | + } else if (ret == 0 || ret != -EINVAL) { | ||
38 | return ret; | ||
39 | } | ||
40 | - s->has_write_zeroes = false; | ||
41 | + /* | ||
42 | + * Note: Some file systems do not like unaligned byte ranges, and | ||
43 | + * return EINVAL in such a case, though they should not do it according | ||
44 | + * to the man-page of fallocate(). Thus we simply ignore this return | ||
45 | + * value and try the other fallbacks instead. | ||
46 | + */ | ||
47 | } | ||
48 | #endif | ||
49 | |||
50 | -- | ||
51 | 2.30.2 | ||
52 | |||
53 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | The logic around **child is not obvious: this reference is used not | ||
4 | only to return resulting child, but also to rollback NULL value on | ||
5 | transaction abort. | ||
6 | |||
7 | So, let's add documentation and some assertions. | ||
8 | |||
9 | While being here, drop extra declaration of bdrv_attach_child_noperm(). | ||
10 | |||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Message-Id: <20210601075218.79249-2-vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
14 | --- | ||
15 | block.c | 24 +++++++++++++++--------- | ||
16 | 1 file changed, 15 insertions(+), 9 deletions(-) | ||
17 | |||
18 | diff --git a/block.c b/block.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block.c | ||
21 | +++ b/block.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, | ||
23 | |||
24 | static void bdrv_replace_child_noperm(BdrvChild *child, | ||
25 | BlockDriverState *new_bs); | ||
26 | -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | ||
27 | - BlockDriverState *child_bs, | ||
28 | - const char *child_name, | ||
29 | - const BdrvChildClass *child_class, | ||
30 | - BdrvChildRole child_role, | ||
31 | - BdrvChild **child, | ||
32 | - Transaction *tran, | ||
33 | - Error **errp); | ||
34 | static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, | ||
35 | Transaction *tran); | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_attach_child_common_drv = { | ||
38 | |||
39 | /* | ||
40 | * Common part of attaching bdrv child to bs or to blk or to job | ||
41 | + * | ||
42 | + * Resulting new child is returned through @child. | ||
43 | + * At start *@child must be NULL. | ||
44 | + * @child is saved to a new entry of @tran, so that *@child could be reverted to | ||
45 | + * NULL on abort(). So referenced variable must live at least until transaction | ||
46 | + * end. | ||
47 | */ | ||
48 | static int bdrv_attach_child_common(BlockDriverState *child_bs, | ||
49 | const char *child_name, | ||
50 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | +/* | ||
55 | + * Variable referenced by @child must live at least until transaction end. | ||
56 | + * (see bdrv_attach_child_common() doc for details) | ||
57 | + */ | ||
58 | static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | ||
59 | BlockDriverState *child_bs, | ||
60 | const char *child_name, | ||
61 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | ||
62 | child_role, perm, shared_perm, opaque, | ||
63 | &child, tran, errp); | ||
64 | if (ret < 0) { | ||
65 | - assert(child == NULL); | ||
66 | goto out; | ||
67 | } | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | ||
70 | |||
71 | out: | ||
72 | tran_finalize(tran, ret); | ||
73 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | ||
74 | + assert((ret < 0) == !child); | ||
75 | + | ||
76 | bdrv_unref(child_bs); | ||
77 | return child; | ||
78 | } | ||
79 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, | ||
80 | |||
81 | out: | ||
82 | tran_finalize(tran, ret); | ||
83 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | ||
84 | + assert((ret < 0) == !child); | ||
85 | |||
86 | bdrv_unref(child_bs); | ||
87 | |||
88 | -- | ||
89 | 2.30.2 | ||
90 | |||
91 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | Replay is capable of recording normal BH events, but sometimes | 3 | We have different types of parents: block nodes, block backends and |
4 | there are single use callbacks scheduled with aio_bh_schedule_oneshot | 4 | jobs. So, it makes sense to specify type together with name. |
5 | function. This patch enables recording and replaying such callbacks. | ||
6 | Block layer uses these events for calling the completion function. | ||
7 | Replaying these calls makes the execution deterministic. | ||
8 | 5 | ||
9 | Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 6 | While being here also use g_autofree. |
10 | Acked-by: Kevin Wolf <kwolf@redhat.com> | 7 | |
8 | iotest 307 output is updated. | ||
9 | |||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
12 | Message-Id: <20210601075218.79249-3-vsementsov@virtuozzo.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 14 | --- |
13 | include/sysemu/replay.h | 4 ++++ | 15 | block/block-backend.c | 9 ++++----- |
14 | replay/replay-internal.h | 1 + | 16 | tests/qemu-iotests/307.out | 2 +- |
15 | block/block-backend.c | 9 ++++++--- | 17 | 2 files changed, 5 insertions(+), 6 deletions(-) |
16 | block/io.c | 4 ++-- | ||
17 | block/iscsi.c | 5 +++-- | ||
18 | block/nfs.c | 6 ++++-- | ||
19 | block/null.c | 4 +++- | ||
20 | block/nvme.c | 6 ++++-- | ||
21 | block/rbd.c | 5 +++-- | ||
22 | block/vxhs.c | 5 +++-- | ||
23 | replay/replay-events.c | 16 ++++++++++++++++ | ||
24 | stubs/replay-user.c | 9 +++++++++ | ||
25 | stubs/Makefile.objs | 1 + | ||
26 | 13 files changed, 59 insertions(+), 16 deletions(-) | ||
27 | create mode 100644 stubs/replay-user.c | ||
28 | 18 | ||
29 | diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/include/sysemu/replay.h | ||
32 | +++ b/include/sysemu/replay.h | ||
33 | @@ -XXX,XX +XXX,XX @@ | ||
34 | #include "qapi/qapi-types-misc.h" | ||
35 | #include "qapi/qapi-types-run-state.h" | ||
36 | #include "qapi/qapi-types-ui.h" | ||
37 | +#include "block/aio.h" | ||
38 | |||
39 | /* replay clock kinds */ | ||
40 | enum ReplayClockKind { | ||
41 | @@ -XXX,XX +XXX,XX @@ void replay_enable_events(void); | ||
42 | bool replay_events_enabled(void); | ||
43 | /*! Adds bottom half event to the queue */ | ||
44 | void replay_bh_schedule_event(QEMUBH *bh); | ||
45 | +/* Adds oneshot bottom half event to the queue */ | ||
46 | +void replay_bh_schedule_oneshot_event(AioContext *ctx, | ||
47 | + QEMUBHFunc *cb, void *opaque); | ||
48 | /*! Adds input event to the queue */ | ||
49 | void replay_input_event(QemuConsole *src, InputEvent *evt); | ||
50 | /*! Adds input sync event to the queue */ | ||
51 | diff --git a/replay/replay-internal.h b/replay/replay-internal.h | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/replay/replay-internal.h | ||
54 | +++ b/replay/replay-internal.h | ||
55 | @@ -XXX,XX +XXX,XX @@ enum ReplayEvents { | ||
56 | |||
57 | enum ReplayAsyncEventKind { | ||
58 | REPLAY_ASYNC_EVENT_BH, | ||
59 | + REPLAY_ASYNC_EVENT_BH_ONESHOT, | ||
60 | REPLAY_ASYNC_EVENT_INPUT, | ||
61 | REPLAY_ASYNC_EVENT_INPUT_SYNC, | ||
62 | REPLAY_ASYNC_EVENT_CHAR_READ, | ||
63 | diff --git a/block/block-backend.c b/block/block-backend.c | 19 | diff --git a/block/block-backend.c b/block/block-backend.c |
64 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
65 | --- a/block/block-backend.c | 21 | --- a/block/block-backend.c |
66 | +++ b/block/block-backend.c | 22 | +++ b/block/block-backend.c |
67 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, |
68 | #include "hw/qdev-core.h" | 24 | static char *blk_root_get_parent_desc(BdrvChild *child) |
69 | #include "sysemu/blockdev.h" | 25 | { |
70 | #include "sysemu/runstate.h" | 26 | BlockBackend *blk = child->opaque; |
71 | +#include "sysemu/sysemu.h" | 27 | - char *dev_id; |
72 | +#include "sysemu/replay.h" | 28 | + g_autofree char *dev_id = NULL; |
73 | #include "qapi/error.h" | 29 | |
74 | #include "qapi/qapi-events-block.h" | 30 | if (blk->name) { |
75 | #include "qemu/id.h" | 31 | - return g_strdup(blk->name); |
76 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, | 32 | + return g_strdup_printf("block device '%s'", blk->name); |
77 | acb->blk = blk; | ||
78 | acb->ret = ret; | ||
79 | |||
80 | - aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb); | ||
81 | + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), | ||
82 | + error_callback_bh, acb); | ||
83 | return &acb->common; | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
87 | |||
88 | acb->has_returned = true; | ||
89 | if (acb->rwco.ret != NOT_DONE) { | ||
90 | - aio_bh_schedule_oneshot(blk_get_aio_context(blk), | ||
91 | - blk_aio_complete_bh, acb); | ||
92 | + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), | ||
93 | + blk_aio_complete_bh, acb); | ||
94 | } | 33 | } |
95 | 34 | ||
96 | return &acb->common; | 35 | dev_id = blk_get_attached_dev_id(blk); |
97 | diff --git a/block/io.c b/block/io.c | 36 | if (*dev_id) { |
98 | index XXXXXXX..XXXXXXX 100644 | 37 | - return dev_id; |
99 | --- a/block/io.c | 38 | + return g_strdup_printf("block device '%s'", dev_id); |
100 | +++ b/block/io.c | ||
101 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
102 | if (bs) { | ||
103 | bdrv_inc_in_flight(bs); | ||
104 | } | ||
105 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | ||
106 | - bdrv_co_drain_bh_cb, &data); | ||
107 | + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs), | ||
108 | + bdrv_co_drain_bh_cb, &data); | ||
109 | |||
110 | qemu_coroutine_yield(); | ||
111 | /* If we are resumed from some other event (such as an aio completion or a | ||
112 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
113 | index XXXXXXX..XXXXXXX 100644 | ||
114 | --- a/block/iscsi.c | ||
115 | +++ b/block/iscsi.c | ||
116 | @@ -XXX,XX +XXX,XX @@ | ||
117 | #include "qemu/module.h" | ||
118 | #include "qemu/option.h" | ||
119 | #include "qemu/uuid.h" | ||
120 | +#include "sysemu/replay.h" | ||
121 | #include "qapi/error.h" | ||
122 | #include "qapi/qapi-commands-misc.h" | ||
123 | #include "qapi/qmp/qdict.h" | ||
124 | @@ -XXX,XX +XXX,XX @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, | ||
125 | } | ||
126 | |||
127 | if (iTask->co) { | ||
128 | - aio_bh_schedule_oneshot(iTask->iscsilun->aio_context, | ||
129 | - iscsi_co_generic_bh_cb, iTask); | ||
130 | + replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context, | ||
131 | + iscsi_co_generic_bh_cb, iTask); | ||
132 | } else { | 39 | } else { |
133 | iTask->complete = 1; | 40 | /* TODO Callback into the BB owner for something more detailed */ |
134 | } | 41 | - g_free(dev_id); |
135 | diff --git a/block/nfs.c b/block/nfs.c | 42 | - return g_strdup("a block device"); |
136 | index XXXXXXX..XXXXXXX 100644 | 43 | + return g_strdup("an unnamed block device"); |
137 | --- a/block/nfs.c | ||
138 | +++ b/block/nfs.c | ||
139 | @@ -XXX,XX +XXX,XX @@ | ||
140 | #include "qemu/option.h" | ||
141 | #include "qemu/uri.h" | ||
142 | #include "qemu/cutils.h" | ||
143 | +#include "sysemu/sysemu.h" | ||
144 | +#include "sysemu/replay.h" | ||
145 | #include "qapi/qapi-visit-block-core.h" | ||
146 | #include "qapi/qmp/qdict.h" | ||
147 | #include "qapi/qmp/qstring.h" | ||
148 | @@ -XXX,XX +XXX,XX @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data, | ||
149 | if (task->ret < 0) { | ||
150 | error_report("NFS Error: %s", nfs_get_error(nfs)); | ||
151 | } | ||
152 | - aio_bh_schedule_oneshot(task->client->aio_context, | ||
153 | - nfs_co_generic_bh_cb, task); | ||
154 | + replay_bh_schedule_oneshot_event(task->client->aio_context, | ||
155 | + nfs_co_generic_bh_cb, task); | ||
156 | } | ||
157 | |||
158 | static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
159 | diff --git a/block/null.c b/block/null.c | ||
160 | index XXXXXXX..XXXXXXX 100644 | ||
161 | --- a/block/null.c | ||
162 | +++ b/block/null.c | ||
163 | @@ -XXX,XX +XXX,XX @@ | ||
164 | #include "qemu/module.h" | ||
165 | #include "qemu/option.h" | ||
166 | #include "block/block_int.h" | ||
167 | +#include "sysemu/replay.h" | ||
168 | |||
169 | #define NULL_OPT_LATENCY "latency-ns" | ||
170 | #define NULL_OPT_ZEROES "read-zeroes" | ||
171 | @@ -XXX,XX +XXX,XX @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs, | ||
172 | timer_mod_ns(&acb->timer, | ||
173 | qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns); | ||
174 | } else { | ||
175 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), null_bh_cb, acb); | ||
176 | + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs), | ||
177 | + null_bh_cb, acb); | ||
178 | } | ||
179 | return &acb->common; | ||
180 | } | ||
181 | diff --git a/block/nvme.c b/block/nvme.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/block/nvme.c | ||
184 | +++ b/block/nvme.c | ||
185 | @@ -XXX,XX +XXX,XX @@ | ||
186 | #include "qemu/option.h" | ||
187 | #include "qemu/vfio-helpers.h" | ||
188 | #include "block/block_int.h" | ||
189 | +#include "sysemu/replay.h" | ||
190 | #include "trace.h" | ||
191 | |||
192 | #include "block/nvme.h" | ||
193 | @@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) | ||
194 | smp_mb_release(); | ||
195 | *q->cq.doorbell = cpu_to_le32(q->cq.head); | ||
196 | if (!qemu_co_queue_empty(&q->free_req_queue)) { | ||
197 | - aio_bh_schedule_oneshot(s->aio_context, nvme_free_req_queue_cb, q); | ||
198 | + replay_bh_schedule_oneshot_event(s->aio_context, | ||
199 | + nvme_free_req_queue_cb, q); | ||
200 | } | ||
201 | } | ||
202 | q->busy = false; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void nvme_rw_cb(void *opaque, int ret) | ||
204 | /* The rw coroutine hasn't yielded, don't try to enter. */ | ||
205 | return; | ||
206 | } | ||
207 | - aio_bh_schedule_oneshot(data->ctx, nvme_rw_cb_bh, data); | ||
208 | + replay_bh_schedule_oneshot_event(data->ctx, nvme_rw_cb_bh, data); | ||
209 | } | ||
210 | |||
211 | static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, | ||
212 | diff --git a/block/rbd.c b/block/rbd.c | ||
213 | index XXXXXXX..XXXXXXX 100644 | ||
214 | --- a/block/rbd.c | ||
215 | +++ b/block/rbd.c | ||
216 | @@ -XXX,XX +XXX,XX @@ | ||
217 | #include "block/qdict.h" | ||
218 | #include "crypto/secret.h" | ||
219 | #include "qemu/cutils.h" | ||
220 | +#include "sysemu/replay.h" | ||
221 | #include "qapi/qmp/qstring.h" | ||
222 | #include "qapi/qmp/qdict.h" | ||
223 | #include "qapi/qmp/qjson.h" | ||
224 | @@ -XXX,XX +XXX,XX @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) | ||
225 | rcb->ret = rbd_aio_get_return_value(c); | ||
226 | rbd_aio_release(c); | ||
227 | |||
228 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||
229 | - rbd_finish_bh, rcb); | ||
230 | + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs), | ||
231 | + rbd_finish_bh, rcb); | ||
232 | } | ||
233 | |||
234 | static int rbd_aio_discard_wrapper(rbd_image_t image, | ||
235 | diff --git a/block/vxhs.c b/block/vxhs.c | ||
236 | index XXXXXXX..XXXXXXX 100644 | ||
237 | --- a/block/vxhs.c | ||
238 | +++ b/block/vxhs.c | ||
239 | @@ -XXX,XX +XXX,XX @@ | ||
240 | #include "qapi/error.h" | ||
241 | #include "qemu/uuid.h" | ||
242 | #include "crypto/tlscredsx509.h" | ||
243 | +#include "sysemu/replay.h" | ||
244 | |||
245 | #define VXHS_OPT_FILENAME "filename" | ||
246 | #define VXHS_OPT_VDISK_ID "vdisk-id" | ||
247 | @@ -XXX,XX +XXX,XX @@ static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error) | ||
248 | trace_vxhs_iio_callback(error); | ||
249 | } | ||
250 | |||
251 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||
252 | - vxhs_complete_aio_bh, acb); | ||
253 | + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs), | ||
254 | + vxhs_complete_aio_bh, acb); | ||
255 | break; | ||
256 | |||
257 | default: | ||
258 | diff --git a/replay/replay-events.c b/replay/replay-events.c | ||
259 | index XXXXXXX..XXXXXXX 100644 | ||
260 | --- a/replay/replay-events.c | ||
261 | +++ b/replay/replay-events.c | ||
262 | @@ -XXX,XX +XXX,XX @@ static void replay_run_event(Event *event) | ||
263 | case REPLAY_ASYNC_EVENT_BH: | ||
264 | aio_bh_call(event->opaque); | ||
265 | break; | ||
266 | + case REPLAY_ASYNC_EVENT_BH_ONESHOT: | ||
267 | + ((QEMUBHFunc *)event->opaque)(event->opaque2); | ||
268 | + break; | ||
269 | case REPLAY_ASYNC_EVENT_INPUT: | ||
270 | qemu_input_event_send_impl(NULL, (InputEvent *)event->opaque); | ||
271 | qapi_free_InputEvent((InputEvent *)event->opaque); | ||
272 | @@ -XXX,XX +XXX,XX @@ void replay_bh_schedule_event(QEMUBH *bh) | ||
273 | } | 44 | } |
274 | } | 45 | } |
275 | 46 | ||
276 | +void replay_bh_schedule_oneshot_event(AioContext *ctx, | 47 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out |
277 | + QEMUBHFunc *cb, void *opaque) | ||
278 | +{ | ||
279 | + if (events_enabled) { | ||
280 | + uint64_t id = replay_get_current_icount(); | ||
281 | + replay_add_event(REPLAY_ASYNC_EVENT_BH_ONESHOT, cb, opaque, id); | ||
282 | + } else { | ||
283 | + aio_bh_schedule_oneshot(ctx, cb, opaque); | ||
284 | + } | ||
285 | +} | ||
286 | + | ||
287 | void replay_add_input_event(struct InputEvent *event) | ||
288 | { | ||
289 | replay_add_event(REPLAY_ASYNC_EVENT_INPUT, event, NULL, 0); | ||
290 | @@ -XXX,XX +XXX,XX @@ static void replay_save_event(Event *event, int checkpoint) | ||
291 | /* save event-specific data */ | ||
292 | switch (event->event_kind) { | ||
293 | case REPLAY_ASYNC_EVENT_BH: | ||
294 | + case REPLAY_ASYNC_EVENT_BH_ONESHOT: | ||
295 | replay_put_qword(event->id); | ||
296 | break; | ||
297 | case REPLAY_ASYNC_EVENT_INPUT: | ||
298 | @@ -XXX,XX +XXX,XX @@ static Event *replay_read_event(int checkpoint) | ||
299 | /* Events that has not to be in the queue */ | ||
300 | switch (replay_state.read_event_kind) { | ||
301 | case REPLAY_ASYNC_EVENT_BH: | ||
302 | + case REPLAY_ASYNC_EVENT_BH_ONESHOT: | ||
303 | if (replay_state.read_event_id == -1) { | ||
304 | replay_state.read_event_id = replay_get_qword(); | ||
305 | } | ||
306 | diff --git a/stubs/replay-user.c b/stubs/replay-user.c | ||
307 | new file mode 100644 | ||
308 | index XXXXXXX..XXXXXXX | ||
309 | --- /dev/null | ||
310 | +++ b/stubs/replay-user.c | ||
311 | @@ -XXX,XX +XXX,XX @@ | ||
312 | +#include "qemu/osdep.h" | ||
313 | +#include "sysemu/replay.h" | ||
314 | +#include "sysemu/sysemu.h" | ||
315 | + | ||
316 | +void replay_bh_schedule_oneshot_event(AioContext *ctx, | ||
317 | + QEMUBHFunc *cb, void *opaque) | ||
318 | +{ | ||
319 | + aio_bh_schedule_oneshot(ctx, cb, opaque); | ||
320 | +} | ||
321 | diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs | ||
322 | index XXXXXXX..XXXXXXX 100644 | 48 | index XXXXXXX..XXXXXXX 100644 |
323 | --- a/stubs/Makefile.objs | 49 | --- a/tests/qemu-iotests/307.out |
324 | +++ b/stubs/Makefile.objs | 50 | +++ b/tests/qemu-iotests/307.out |
325 | @@ -XXX,XX +XXX,XX @@ stub-obj-y += monitor.o | 51 | @@ -XXX,XX +XXX,XX @@ exports available: 1 |
326 | stub-obj-y += notify-event.o | 52 | |
327 | stub-obj-y += qtest.o | 53 | === Add a writable export === |
328 | stub-obj-y += replay.o | 54 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} |
329 | +stub-obj-y += replay-user.o | 55 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}} |
330 | stub-obj-y += runstate-check.o | 56 | +{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} |
331 | stub-obj-y += set-fd-handler.o | 57 | {"execute": "device_del", "arguments": {"id": "sda"}} |
332 | stub-obj-y += sysbus.o | 58 | {"return": {}} |
59 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
333 | -- | 60 | -- |
334 | 2.20.1 | 61 | 2.30.2 |
335 | 62 | ||
336 | 63 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | We have different types of parents: block nodes, block backends and | ||
4 | jobs. So, it makes sense to specify type together with name. | ||
5 | |||
6 | Next, this handler us used to compose an error message about permission | ||
7 | conflict. And permission conflict occurs in a specific place of block | ||
8 | graph. We shouldn't report name of parent device (as it refers another | ||
9 | place in block graph), but exactly and only the name of the node. So, | ||
10 | use bdrv_get_node_name() directly. | ||
11 | |||
12 | iotest 283 output is updated. | ||
13 | |||
14 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
15 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
16 | Message-Id: <20210601075218.79249-4-vsementsov@virtuozzo.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | --- | ||
19 | block.c | 2 +- | ||
20 | tests/qemu-iotests/283.out | 2 +- | ||
21 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
22 | |||
23 | diff --git a/block.c b/block.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/block.c | ||
26 | +++ b/block.c | ||
27 | @@ -XXX,XX +XXX,XX @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) | ||
28 | static char *bdrv_child_get_parent_desc(BdrvChild *c) | ||
29 | { | ||
30 | BlockDriverState *parent = c->opaque; | ||
31 | - return g_strdup(bdrv_get_device_or_node_name(parent)); | ||
32 | + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); | ||
33 | } | ||
34 | |||
35 | static void bdrv_child_cb_drained_begin(BdrvChild *child) | ||
36 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tests/qemu-iotests/283.out | ||
39 | +++ b/tests/qemu-iotests/283.out | ||
40 | @@ -XXX,XX +XXX,XX @@ | ||
41 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | ||
42 | {"return": {}} | ||
43 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | ||
44 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}} | ||
45 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | ||
46 | |||
47 | === backup-top should be gone after job-finalize === | ||
48 | |||
49 | -- | ||
50 | 2.30.2 | ||
51 | |||
52 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | Recently we've fixed a crash by adding .get_parent_aio_context handler | ||
4 | to child_vvfat_qcow. Now we want it to support .get_parent_desc as | ||
5 | well. child_vvfat_qcow wants to implement own .inherit_options, it's | ||
6 | not bad. But omitting all other handlers is a bad idea. Let's inherit | ||
7 | the class from child_of_bds instead, similar to chain_child_class and | ||
8 | detach_by_driver_cb_class in test-bdrv-drain.c. | ||
9 | |||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Message-Id: <20210601075218.79249-5-vsementsov@virtuozzo.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | ||
14 | block/vvfat.c | 8 +++----- | ||
15 | 1 file changed, 3 insertions(+), 5 deletions(-) | ||
16 | |||
17 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/vvfat.c | ||
20 | +++ b/block/vvfat.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | ||
22 | qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); | ||
23 | } | ||
24 | |||
25 | -static const BdrvChildClass child_vvfat_qcow = { | ||
26 | - .parent_is_bds = true, | ||
27 | - .inherit_options = vvfat_qcow_options, | ||
28 | - .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
29 | -}; | ||
30 | +static BdrvChildClass child_vvfat_qcow; | ||
31 | |||
32 | static int enable_write_target(BlockDriverState *bs, Error **errp) | ||
33 | { | ||
34 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = { | ||
35 | |||
36 | static void bdrv_vvfat_init(void) | ||
37 | { | ||
38 | + child_vvfat_qcow = child_of_bds; | ||
39 | + child_vvfat_qcow.inherit_options = vvfat_qcow_options; | ||
40 | bdrv_register(&bdrv_vvfat); | ||
41 | } | ||
42 | |||
43 | -- | ||
44 | 2.30.2 | ||
45 | |||
46 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
1 | 2 | ||
3 | All child classes have this callback. So, drop unreachable code. | ||
4 | |||
5 | Still add an assertion to bdrv_attach_child_common(), to early detect | ||
6 | bad classes. | ||
7 | |||
8 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
9 | Message-Id: <20210601075218.79249-6-vsementsov@virtuozzo.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | ||
12 | block.c | 7 ++----- | ||
13 | 1 file changed, 2 insertions(+), 5 deletions(-) | ||
14 | |||
15 | diff --git a/block.c b/block.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block.c | ||
18 | +++ b/block.c | ||
19 | @@ -XXX,XX +XXX,XX @@ bool bdrv_is_writable(BlockDriverState *bs) | ||
20 | |||
21 | static char *bdrv_child_user_desc(BdrvChild *c) | ||
22 | { | ||
23 | - if (c->klass->get_parent_desc) { | ||
24 | - return c->klass->get_parent_desc(c); | ||
25 | - } | ||
26 | - | ||
27 | - return g_strdup("another user"); | ||
28 | + return c->klass->get_parent_desc(c); | ||
29 | } | ||
30 | |||
31 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | ||
32 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | ||
33 | |||
34 | assert(child); | ||
35 | assert(*child == NULL); | ||
36 | + assert(child_class->get_parent_desc); | ||
37 | |||
38 | new_child = g_new(BdrvChild, 1); | ||
39 | *new_child = (BdrvChild) { | ||
40 | -- | ||
41 | 2.30.2 | ||
42 | |||
43 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | For long test image paths, the order of the "Formatting" line and the | 3 | Now permissions are updated as follows: |
4 | "(qemu)" prompt after a drive_backup HMP command may be reversed. In | 4 | 1. do graph modifications ignoring permissions |
5 | fact, the interaction between the prompt and the line may lead to the | 5 | 2. do permission update |
6 | "Formatting" to being greppable at all after "read"-ing it (if the | ||
7 | prompt injects an IFS character into the "Formatting" string). | ||
8 | 6 | ||
9 | So just wait until we get a prompt. At that point, the block job must | 7 | (of course, we rollback [1] if [2] fails) |
10 | have been started, so "info block-jobs" will only return "No active | ||
11 | jobs" once it is done. | ||
12 | 8 | ||
13 | Reported-by: Thomas Huth <thuth@redhat.com> | 9 | So, on stage [2] we can't say which users are "old" and which are |
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 10 | "new" and exist only since [1]. And current error message is a bit |
15 | Reviewed-by: John Snow <jsnow@redhat.com> | 11 | outdated. Let's improve it, to make everything clean. |
12 | |||
13 | While being here, add also a comment and some good assertions. | ||
14 | |||
15 | iotests 283, 307, qsd-jobs outputs are updated. | ||
16 | |||
17 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
18 | Message-Id: <20210601075218.79249-7-vsementsov@virtuozzo.com> | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
17 | --- | 20 | --- |
18 | tests/qemu-iotests/028 | 11 ++++++++--- | 21 | block.c | 29 ++++++++++++++++++++------- |
19 | tests/qemu-iotests/028.out | 1 - | 22 | tests/qemu-iotests/283.out | 2 +- |
20 | 2 files changed, 8 insertions(+), 4 deletions(-) | 23 | tests/qemu-iotests/307.out | 2 +- |
24 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- | ||
25 | 4 files changed, 25 insertions(+), 10 deletions(-) | ||
21 | 26 | ||
22 | diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028 | 27 | diff --git a/block.c b/block.c |
23 | index XXXXXXX..XXXXXXX 100755 | ||
24 | --- a/tests/qemu-iotests/028 | ||
25 | +++ b/tests/qemu-iotests/028 | ||
26 | @@ -XXX,XX +XXX,XX @@ fi | ||
27 | # Silence output since it contains the disk image path and QEMU's readline | ||
28 | # character echoing makes it very hard to filter the output. Plus, there | ||
29 | # is no telling how many times the command will repeat before succeeding. | ||
30 | -_send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)" >/dev/null | ||
31 | -_send_qemu_cmd $h "" "Formatting" | _filter_img_create | ||
32 | -qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs" >/dev/null | ||
33 | +# (Note that creating the image results in a "Formatting..." message over | ||
34 | +# stdout, which is the same channel the monitor uses. We cannot reliably | ||
35 | +# wait for it because the monitor output may interact with it in such a | ||
36 | +# way that _timed_wait_for cannot read it. However, once the block job is | ||
37 | +# done, we know that the "Formatting..." message must have appeared | ||
38 | +# already, so the output is still deterministic.) | ||
39 | +silent=y _send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)" | ||
40 | +silent=y qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs" | ||
41 | _send_qemu_cmd $h "info block-jobs" "No active jobs" | ||
42 | _send_qemu_cmd $h 'quit' "" | ||
43 | |||
44 | diff --git a/tests/qemu-iotests/028.out b/tests/qemu-iotests/028.out | ||
45 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/tests/qemu-iotests/028.out | 29 | --- a/block.c |
47 | +++ b/tests/qemu-iotests/028.out | 30 | +++ b/block.c |
48 | @@ -XXX,XX +XXX,XX @@ No errors were found on the image. | 31 | @@ -XXX,XX +XXX,XX @@ static char *bdrv_child_user_desc(BdrvChild *c) |
49 | 32 | return c->klass->get_parent_desc(c); | |
50 | block-backup | 33 | } |
51 | 34 | ||
52 | -Formatting 'TEST_DIR/t.IMGFMT.copy', fmt=IMGFMT size=4294968832 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT | 35 | +/* |
53 | (qemu) info block-jobs | 36 | + * Check that @a allows everything that @b needs. @a and @b must reference same |
54 | No active jobs | 37 | + * child node. |
55 | === IO: pattern 195 | 38 | + */ |
39 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | ||
40 | { | ||
41 | - g_autofree char *user = NULL; | ||
42 | - g_autofree char *perm_names = NULL; | ||
43 | + const char *child_bs_name; | ||
44 | + g_autofree char *a_user = NULL; | ||
45 | + g_autofree char *b_user = NULL; | ||
46 | + g_autofree char *perms = NULL; | ||
47 | + | ||
48 | + assert(a->bs); | ||
49 | + assert(a->bs == b->bs); | ||
50 | |||
51 | if ((b->perm & a->shared_perm) == b->perm) { | ||
52 | return true; | ||
53 | } | ||
54 | |||
55 | - perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); | ||
56 | - user = bdrv_child_user_desc(a); | ||
57 | - error_setg(errp, "Conflicts with use by %s as '%s', which does not " | ||
58 | - "allow '%s' on %s", | ||
59 | - user, a->name, perm_names, bdrv_get_node_name(b->bs)); | ||
60 | + child_bs_name = bdrv_get_node_name(b->bs); | ||
61 | + a_user = bdrv_child_user_desc(a); | ||
62 | + b_user = bdrv_child_user_desc(b); | ||
63 | + perms = bdrv_perm_names(b->perm & ~a->shared_perm); | ||
64 | + | ||
65 | + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " | ||
66 | + "both required by %s (uses node '%s' as '%s' child) and " | ||
67 | + "unshared by %s (uses node '%s' as '%s' child).", | ||
68 | + child_bs_name, perms, | ||
69 | + b_user, child_bs_name, b->name, | ||
70 | + a_user, child_bs_name, a->name); | ||
71 | |||
72 | return false; | ||
73 | } | ||
74 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tests/qemu-iotests/283.out | ||
77 | +++ b/tests/qemu-iotests/283.out | ||
78 | @@ -XXX,XX +XXX,XX @@ | ||
79 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | ||
80 | {"return": {}} | ||
81 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | ||
82 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | ||
83 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}} | ||
84 | |||
85 | === backup-top should be gone after job-finalize === | ||
86 | |||
87 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/tests/qemu-iotests/307.out | ||
90 | +++ b/tests/qemu-iotests/307.out | ||
91 | @@ -XXX,XX +XXX,XX @@ exports available: 1 | ||
92 | |||
93 | === Add a writable export === | ||
94 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} | ||
95 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} | ||
96 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}} | ||
97 | {"execute": "device_del", "arguments": {"id": "sda"}} | ||
98 | {"return": {}} | ||
99 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
100 | diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/tests/qemu-iotests/tests/qsd-jobs.out | ||
103 | +++ b/tests/qemu-iotests/tests/qsd-jobs.out | ||
104 | @@ -XXX,XX +XXX,XX @@ QMP_VERSION | ||
105 | {"return": {}} | ||
106 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} | ||
107 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} | ||
108 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}} | ||
109 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} | ||
110 | {"return": {}} | ||
111 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} | ||
112 | *** done | ||
56 | -- | 113 | -- |
57 | 2.20.1 | 114 | 2.30.2 |
58 | 115 | ||
59 | 116 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 1 | From: Sergio Lopez <slp@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | After recent updates block devices cannot be closed on qemu exit. | 3 | Allow block backends to poll their devices/users to check if they have |
4 | This happens due to the block request polling when replay is not finished. | 4 | been quiesced when entering a drained section. |
5 | Therefore now we stop execution recording before closing the block devices. | ||
6 | 5 | ||
7 | Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 6 | This will be used in the next patch to wait for the NBD server to be |
7 | completely quiesced. | ||
8 | |||
9 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
13 | Message-Id: <20210602060552.17433-2-slp@redhat.com> | ||
14 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 16 | --- |
10 | replay/replay.c | 2 ++ | 17 | include/sysemu/block-backend.h | 4 ++++ |
11 | vl.c | 1 + | 18 | block/block-backend.c | 7 ++++++- |
12 | 2 files changed, 3 insertions(+) | 19 | 2 files changed, 10 insertions(+), 1 deletion(-) |
13 | 20 | ||
14 | diff --git a/replay/replay.c b/replay/replay.c | 21 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h |
15 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/replay/replay.c | 23 | --- a/include/sysemu/block-backend.h |
17 | +++ b/replay/replay.c | 24 | +++ b/include/sysemu/block-backend.h |
18 | @@ -XXX,XX +XXX,XX @@ void replay_finish(void) | 25 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { |
19 | g_free(replay_snapshot); | 26 | * Runs when the backend's last drain request ends. |
20 | replay_snapshot = NULL; | 27 | */ |
21 | 28 | void (*drained_end)(void *opaque); | |
22 | + replay_mode = REPLAY_MODE_NONE; | 29 | + /* |
30 | + * Is the device still busy? | ||
31 | + */ | ||
32 | + bool (*drained_poll)(void *opaque); | ||
33 | } BlockDevOps; | ||
34 | |||
35 | /* This struct is embedded in (the private) BlockBackend struct and contains | ||
36 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/block/block-backend.c | ||
39 | +++ b/block/block-backend.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | ||
41 | static bool blk_root_drained_poll(BdrvChild *child) | ||
42 | { | ||
43 | BlockBackend *blk = child->opaque; | ||
44 | + bool busy = false; | ||
45 | assert(blk->quiesce_counter); | ||
46 | - return !!blk->in_flight; | ||
23 | + | 47 | + |
24 | replay_finish_events(); | 48 | + if (blk->dev_ops && blk->dev_ops->drained_poll) { |
49 | + busy = blk->dev_ops->drained_poll(blk->dev_opaque); | ||
50 | + } | ||
51 | + return busy || !!blk->in_flight; | ||
25 | } | 52 | } |
26 | 53 | ||
27 | diff --git a/vl.c b/vl.c | 54 | static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) |
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/vl.c | ||
30 | +++ b/vl.c | ||
31 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
32 | |||
33 | /* No more vcpu or device emulation activity beyond this point */ | ||
34 | vm_shutdown(); | ||
35 | + replay_finish(); | ||
36 | |||
37 | job_cancel_sync_all(); | ||
38 | bdrv_close_all(); | ||
39 | -- | 55 | -- |
40 | 2.20.1 | 56 | 2.30.2 |
41 | 57 | ||
42 | 58 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 1 | From: Sergio Lopez <slp@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | In record/replay mode bdrv queue is controlled by replay mechanism. | 3 | Before switching between AioContexts we need to make sure that we're |
4 | It does not allow saving or loading the snapshots | 4 | fully quiesced ("nb_requests == 0" for every client) when entering the |
5 | when bdrv queue is not empty. Stopping the VM is not blocked by nonempty | 5 | drained section. |
6 | queue, but flushing the queue is still impossible there, | ||
7 | because it may cause deadlocks in replay mode. | ||
8 | This patch disables bdrv_drain_all and bdrv_flush_all in | ||
9 | record/replay mode. | ||
10 | 6 | ||
11 | Stopping the machine when the IO requests are not finished is needed | 7 | To do this, we set "quiescing = true" for every client on |
12 | for the debugging. E.g., breakpoint may be set at the specified step, | 8 | ".drained_begin" to prevent new coroutines from being created, and |
13 | and forcing the IO requests to finish may break the determinism | 9 | check if "nb_requests == 0" on ".drained_poll". Finally, once we're |
14 | of the execution. | 10 | exiting the drained section, on ".drained_end" we set "quiescing = |
11 | false" and call "nbd_client_receive_next_request()" to resume the | ||
12 | processing of new requests. | ||
15 | 13 | ||
16 | Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 14 | With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be |
17 | Acked-by: Kevin Wolf <kwolf@redhat.com> | 15 | reverted to be as simple as they were before f148ae7d36. |
16 | |||
17 | RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 | ||
18 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | ||
19 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
20 | Message-Id: <20210602060552.17433-3-slp@redhat.com> | ||
21 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
18 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 22 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
19 | --- | 23 | --- |
20 | block/io.c | 28 ++++++++++++++++++++++++++++ | 24 | nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++-------------- |
21 | cpus.c | 2 -- | 25 | 1 file changed, 61 insertions(+), 21 deletions(-) |
22 | 2 files changed, 28 insertions(+), 2 deletions(-) | ||
23 | 26 | ||
24 | diff --git a/block/io.c b/block/io.c | 27 | diff --git a/nbd/server.c b/nbd/server.c |
25 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/io.c | 29 | --- a/nbd/server.c |
27 | +++ b/block/io.c | 30 | +++ b/nbd/server.c |
28 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req) |
29 | #include "qapi/error.h" | 32 | g_free(req); |
30 | #include "qemu/error-report.h" | 33 | |
31 | #include "qemu/main-loop.h" | 34 | client->nb_requests--; |
32 | +#include "sysemu/replay.h" | 35 | + |
33 | 36 | + if (client->quiescing && client->nb_requests == 0) { | |
34 | #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ | 37 | + aio_wait_kick(); |
35 | |||
36 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
37 | return; | ||
38 | } | ||
39 | |||
40 | + /* | ||
41 | + * bdrv queue is managed by record/replay, | ||
42 | + * waiting for finishing the I/O requests may | ||
43 | + * be infinite | ||
44 | + */ | ||
45 | + if (replay_events_enabled()) { | ||
46 | + return; | ||
47 | + } | 38 | + } |
48 | + | 39 | + |
49 | /* AIO_WAIT_WHILE() with a NULL context can only be called from the main | 40 | nbd_client_receive_next_request(client); |
50 | * loop AioContext, so make sure we're in the main context. */ | 41 | |
51 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | 42 | nbd_client_put(client); |
52 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | 43 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque) |
53 | BlockDriverState *bs = NULL; | 44 | QTAILQ_FOREACH(client, &exp->clients, next) { |
54 | int drained_end_counter = 0; | 45 | qio_channel_attach_aio_context(client->ioc, ctx); |
55 | 46 | ||
56 | + /* | 47 | + assert(client->nb_requests == 0); |
57 | + * bdrv queue is managed by record/replay, | 48 | assert(client->recv_coroutine == NULL); |
58 | + * waiting for finishing the I/O requests may | 49 | assert(client->send_coroutine == NULL); |
59 | + * be endless | 50 | - |
60 | + */ | 51 | - if (client->quiescing) { |
61 | + if (replay_events_enabled()) { | 52 | - client->quiescing = false; |
62 | + return; | 53 | - nbd_client_receive_next_request(client); |
54 | - } | ||
55 | } | ||
56 | } | ||
57 | |||
58 | -static void nbd_aio_detach_bh(void *opaque) | ||
59 | +static void blk_aio_detach(void *opaque) | ||
60 | { | ||
61 | NBDExport *exp = opaque; | ||
62 | NBDClient *client; | ||
63 | |||
64 | + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | ||
65 | + | ||
66 | QTAILQ_FOREACH(client, &exp->clients, next) { | ||
67 | qio_channel_detach_aio_context(client->ioc); | ||
63 | + } | 68 | + } |
64 | + | 69 | + |
65 | while ((bs = bdrv_next_all_states(bs))) { | 70 | + exp->common.ctx = NULL; |
66 | AioContext *aio_context = bdrv_get_aio_context(bs); | 71 | +} |
67 | 72 | + | |
68 | @@ -XXX,XX +XXX,XX @@ int bdrv_flush_all(void) | 73 | +static void nbd_drained_begin(void *opaque) |
69 | BlockDriverState *bs = NULL; | 74 | +{ |
70 | int result = 0; | 75 | + NBDExport *exp = opaque; |
76 | + NBDClient *client; | ||
77 | + | ||
78 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
79 | client->quiescing = true; | ||
80 | + } | ||
81 | +} | ||
82 | |||
83 | - if (client->recv_coroutine) { | ||
84 | - if (client->read_yielding) { | ||
85 | - qemu_aio_coroutine_enter(exp->common.ctx, | ||
86 | - client->recv_coroutine); | ||
87 | - } else { | ||
88 | - AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); | ||
89 | - } | ||
90 | - } | ||
91 | +static void nbd_drained_end(void *opaque) | ||
92 | +{ | ||
93 | + NBDExport *exp = opaque; | ||
94 | + NBDClient *client; | ||
95 | |||
96 | - if (client->send_coroutine) { | ||
97 | - AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); | ||
98 | - } | ||
99 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
100 | + client->quiescing = false; | ||
101 | + nbd_client_receive_next_request(client); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | -static void blk_aio_detach(void *opaque) | ||
106 | +static bool nbd_drained_poll(void *opaque) | ||
107 | { | ||
108 | NBDExport *exp = opaque; | ||
109 | + NBDClient *client; | ||
110 | |||
111 | - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | ||
112 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
113 | + if (client->nb_requests != 0) { | ||
114 | + /* | ||
115 | + * If there's a coroutine waiting for a request on nbd_read_eof() | ||
116 | + * enter it here so we don't depend on the client to wake it up. | ||
117 | + */ | ||
118 | + if (client->recv_coroutine != NULL && client->read_yielding) { | ||
119 | + qemu_aio_coroutine_enter(exp->common.ctx, | ||
120 | + client->recv_coroutine); | ||
121 | + } | ||
122 | |||
123 | - aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); | ||
124 | + return true; | ||
125 | + } | ||
126 | + } | ||
127 | |||
128 | - exp->common.ctx = NULL; | ||
129 | + return false; | ||
130 | } | ||
131 | |||
132 | static void nbd_eject_notifier(Notifier *n, void *data) | ||
133 | @@ -XXX,XX +XXX,XX @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) | ||
134 | blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); | ||
135 | } | ||
136 | |||
137 | +static const BlockDevOps nbd_block_ops = { | ||
138 | + .drained_begin = nbd_drained_begin, | ||
139 | + .drained_end = nbd_drained_end, | ||
140 | + .drained_poll = nbd_drained_poll, | ||
141 | +}; | ||
142 | + | ||
143 | static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
144 | Error **errp) | ||
145 | { | ||
146 | @@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
147 | |||
148 | exp->allocation_depth = arg->allocation_depth; | ||
71 | 149 | ||
72 | + /* | 150 | + /* |
73 | + * bdrv queue is managed by record/replay, | 151 | + * We need to inhibit request queuing in the block layer to ensure we can |
74 | + * creating new flush request for stopping | 152 | + * be properly quiesced when entering a drained section, as our coroutines |
75 | + * the VM may break the determinism | 153 | + * servicing pending requests might enter blk_pread(). |
76 | + */ | 154 | + */ |
77 | + if (replay_events_enabled()) { | 155 | + blk_set_disable_request_queuing(blk, true); |
78 | + return result; | ||
79 | + } | ||
80 | + | 156 | + |
81 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | 157 | blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); |
82 | AioContext *aio_context = bdrv_get_aio_context(bs); | 158 | |
83 | int ret; | 159 | + blk_set_dev_ops(blk, &nbd_block_ops, exp); |
84 | diff --git a/cpus.c b/cpus.c | 160 | + |
85 | index XXXXXXX..XXXXXXX 100644 | 161 | QTAILQ_INSERT_TAIL(&exports, exp, next); |
86 | --- a/cpus.c | 162 | |
87 | +++ b/cpus.c | 163 | return 0; |
88 | @@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state, bool send_stop) | 164 | @@ -XXX,XX +XXX,XX @@ static void nbd_export_delete(BlockExport *blk_exp) |
165 | } | ||
166 | blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, | ||
167 | blk_aio_detach, exp); | ||
168 | + blk_set_disable_request_queuing(exp->common.blk, false); | ||
89 | } | 169 | } |
90 | 170 | ||
91 | bdrv_drain_all(); | 171 | for (i = 0; i < exp->nr_export_bitmaps; i++) { |
92 | - replay_disable_events(); | ||
93 | ret = bdrv_flush_all(); | ||
94 | |||
95 | return ret; | ||
96 | @@ -XXX,XX +XXX,XX @@ int vm_prepare_start(void) | ||
97 | /* We are sending this now, but the CPUs will be resumed shortly later */ | ||
98 | qapi_event_send_resume(); | ||
99 | |||
100 | - replay_enable_events(); | ||
101 | cpu_enable_ticks(); | ||
102 | runstate_set(RUN_STATE_RUNNING); | ||
103 | vm_state_notify(1, RUN_STATE_RUNNING); | ||
104 | -- | 172 | -- |
105 | 2.20.1 | 173 | 2.30.2 |
106 | 174 | ||
107 | 175 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch updates the description of the command lines for using | 3 | Don't report successful progress on failure, when call_state->ret is |
4 | record/replay with attached block devices. | 4 | set. |
5 | 5 | ||
6 | Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> | 6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
7 | Message-Id: <20210528141628.44287-2-vsementsov@virtuozzo.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 10 | --- |
9 | docs/replay.txt | 12 +++++++++--- | 11 | block/block-copy.c | 8 +++++--- |
10 | 1 file changed, 9 insertions(+), 3 deletions(-) | 12 | 1 file changed, 5 insertions(+), 3 deletions(-) |
11 | 13 | ||
12 | diff --git a/docs/replay.txt b/docs/replay.txt | 14 | diff --git a/block/block-copy.c b/block/block-copy.c |
13 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/docs/replay.txt | 16 | --- a/block/block-copy.c |
15 | +++ b/docs/replay.txt | 17 | +++ b/block/block-copy.c |
16 | @@ -XXX,XX +XXX,XX @@ Usage of the record/replay: | 18 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_entry(AioTask *task) |
17 | * First, record the execution with the following command line: | 19 | |
18 | qemu-system-i386 \ | 20 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, |
19 | -icount shift=7,rr=record,rrfile=replay.bin \ | 21 | &error_is_read); |
20 | - -drive file=disk.qcow2,if=none,id=img-direct \ | 22 | - if (ret < 0 && !t->call_state->ret) { |
21 | + -drive file=disk.qcow2,if=none,snapshot,id=img-direct \ | 23 | - t->call_state->ret = ret; |
22 | -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ | 24 | - t->call_state->error_is_read = error_is_read; |
23 | -device ide-hd,drive=img-blkreplay \ | 25 | + if (ret < 0) { |
24 | -netdev user,id=net1 -device rtl8139,netdev=net1 \ | 26 | + if (!t->call_state->ret) { |
25 | @@ -XXX,XX +XXX,XX @@ Usage of the record/replay: | 27 | + t->call_state->ret = ret; |
26 | * After recording, you can replay it by using another command line: | 28 | + t->call_state->error_is_read = error_is_read; |
27 | qemu-system-i386 \ | 29 | + } |
28 | -icount shift=7,rr=replay,rrfile=replay.bin \ | 30 | } else { |
29 | - -drive file=disk.qcow2,if=none,id=img-direct \ | 31 | progress_work_done(t->s->progress, t->bytes); |
30 | + -drive file=disk.qcow2,if=none,snapshot,id=img-direct \ | 32 | } |
31 | -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ | ||
32 | -device ide-hd,drive=img-blkreplay \ | ||
33 | -netdev user,id=net1 -device rtl8139,netdev=net1 \ | ||
34 | @@ -XXX,XX +XXX,XX @@ Block devices record/replay module intercepts calls of | ||
35 | bdrv coroutine functions at the top of block drivers stack. | ||
36 | To record and replay block operations the drive must be configured | ||
37 | as following: | ||
38 | - -drive file=disk.qcow2,if=none,id=img-direct | ||
39 | + -drive file=disk.qcow2,if=none,snapshot,id=img-direct | ||
40 | -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay | ||
41 | -device ide-hd,drive=img-blkreplay | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ This snapshot is created at start of recording and restored at start | ||
44 | of replaying. It also can be loaded while replaying to roll back | ||
45 | the execution. | ||
46 | |||
47 | +'snapshot' flag of the disk image must be removed to save the snapshots | ||
48 | +in the overlay (or original image) instead of using the temporary overlay. | ||
49 | + -drive file=disk.ovl,if=none,id=img-direct | ||
50 | + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay | ||
51 | + -device ide-hd,drive=img-blkreplay | ||
52 | + | ||
53 | Use QEMU monitor to create additional snapshots. 'savevm <name>' command | ||
54 | created the snapshot and 'loadvm <name>' restores it. To prevent corruption | ||
55 | of the original disk image, use overlay files linked to the original images. | ||
56 | -- | 33 | -- |
57 | 2.20.1 | 34 | 2.30.2 |
58 | 35 | ||
59 | 36 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch enables making snapshots with blkreplay used in | 3 | Currently we update s->use_copy_range and s->copy_size in |
4 | block devices. | 4 | block_copy_do_copy(). |
5 | This function is required to make bdrv_snapshot_goto without | ||
6 | calling .bdrv_open which is not implemented. | ||
7 | 5 | ||
8 | Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 6 | It's not very good: |
9 | Acked-by: Kevin Wolf <kwolf@redhat.com> | 7 | |
8 | 1. block_copy_do_copy() is intended to be a simple function, that wraps | ||
9 | bdrv_co_<io> functions for need of block copy. That's why we don't pass | ||
10 | BlockCopyTask into it. So, block_copy_do_copy() is bad place for | ||
11 | manipulation with generic state of block-copy process | ||
12 | |||
13 | 2. We are going to make block-copy thread-safe. So, it's good to move | ||
14 | manipulation with state of block-copy to the places where we'll need | ||
15 | critical sections anyway, to not introduce extra synchronization | ||
16 | primitives in block_copy_do_copy(). | ||
17 | |||
18 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
19 | Message-Id: <20210528141628.44287-3-vsementsov@virtuozzo.com> | ||
20 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 22 | --- |
12 | block/blkreplay.c | 8 ++++++++ | 23 | block/block-copy.c | 72 +++++++++++++++++++++++++++++++--------------- |
13 | 1 file changed, 8 insertions(+) | 24 | 1 file changed, 49 insertions(+), 23 deletions(-) |
14 | 25 | ||
15 | diff --git a/block/blkreplay.c b/block/blkreplay.c | 26 | diff --git a/block/block-copy.c b/block/block-copy.c |
16 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/blkreplay.c | 28 | --- a/block/block-copy.c |
18 | +++ b/block/blkreplay.c | 29 | +++ b/block/block-copy.c |
19 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs) | 30 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyTask { |
31 | int64_t offset; | ||
32 | int64_t bytes; | ||
33 | bool zeroes; | ||
34 | + bool copy_range; | ||
35 | QLIST_ENTRY(BlockCopyTask) list; | ||
36 | CoQueue wait_queue; /* coroutines blocked on this task */ | ||
37 | } BlockCopyTask; | ||
38 | @@ -XXX,XX +XXX,XX @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, | ||
39 | .call_state = call_state, | ||
40 | .offset = offset, | ||
41 | .bytes = bytes, | ||
42 | + .copy_range = s->use_copy_range, | ||
43 | }; | ||
44 | qemu_co_queue_init(&task->wait_queue); | ||
45 | QLIST_INSERT_HEAD(&s->tasks, task, list); | ||
46 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, | ||
47 | * | ||
48 | * No sync here: nor bitmap neighter intersecting requests handling, only copy. | ||
49 | * | ||
50 | + * @copy_range is an in-out argument: if *copy_range is false, copy_range is not | ||
51 | + * done. If *copy_range is true, copy_range is attempted. If the copy_range | ||
52 | + * attempt fails, the function falls back to the usual read+write and | ||
53 | + * *copy_range is set to false. *copy_range and zeroes must not be true | ||
54 | + * simultaneously. | ||
55 | + * | ||
56 | * Returns 0 on success. | ||
57 | */ | ||
58 | static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | ||
59 | int64_t offset, int64_t bytes, | ||
60 | - bool zeroes, bool *error_is_read) | ||
61 | + bool zeroes, bool *copy_range, | ||
62 | + bool *error_is_read) | ||
63 | { | ||
64 | int ret; | ||
65 | int64_t nbytes = MIN(offset + bytes, s->len) - offset; | ||
66 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | ||
67 | assert(offset + bytes <= s->len || | ||
68 | offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); | ||
69 | assert(nbytes < INT_MAX); | ||
70 | + assert(!(*copy_range && zeroes)); | ||
71 | |||
72 | if (zeroes) { | ||
73 | ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & | ||
74 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | - if (s->use_copy_range) { | ||
79 | + if (*copy_range) { | ||
80 | ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, | ||
81 | 0, s->write_flags); | ||
82 | if (ret < 0) { | ||
83 | trace_block_copy_copy_range_fail(s, offset, ret); | ||
84 | - s->use_copy_range = false; | ||
85 | - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
86 | + *copy_range = false; | ||
87 | /* Fallback to read+write with allocated buffer */ | ||
88 | } else { | ||
89 | - if (s->use_copy_range) { | ||
90 | - /* | ||
91 | - * Successful copy-range. Now increase copy_size. copy_range | ||
92 | - * does not respect max_transfer (it's a TODO), so we factor | ||
93 | - * that in here. | ||
94 | - * | ||
95 | - * Note: we double-check s->use_copy_range for the case when | ||
96 | - * parallel block-copy request unsets it during previous | ||
97 | - * bdrv_co_copy_range call. | ||
98 | - */ | ||
99 | - s->copy_size = | ||
100 | - MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
101 | - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
102 | - s->target), | ||
103 | - s->cluster_size)); | ||
104 | - } | ||
105 | - goto out; | ||
106 | + return 0; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ out: | ||
20 | return ret; | 111 | return ret; |
21 | } | 112 | } |
22 | 113 | ||
23 | +static int blkreplay_snapshot_goto(BlockDriverState *bs, | 114 | +static void block_copy_handle_copy_range_result(BlockCopyState *s, |
24 | + const char *snapshot_id) | 115 | + bool is_success) |
25 | +{ | 116 | +{ |
26 | + return bdrv_snapshot_goto(bs->file->bs, snapshot_id, NULL); | 117 | + if (!s->use_copy_range) { |
118 | + /* already disabled */ | ||
119 | + return; | ||
120 | + } | ||
121 | + | ||
122 | + if (is_success) { | ||
123 | + /* | ||
124 | + * Successful copy-range. Now increase copy_size. copy_range | ||
125 | + * does not respect max_transfer (it's a TODO), so we factor | ||
126 | + * that in here. | ||
127 | + */ | ||
128 | + s->copy_size = | ||
129 | + MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
130 | + QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
131 | + s->target), | ||
132 | + s->cluster_size)); | ||
133 | + } else { | ||
134 | + /* Copy-range failed, disable it. */ | ||
135 | + s->use_copy_range = false; | ||
136 | + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
137 | + } | ||
27 | +} | 138 | +} |
28 | + | 139 | + |
29 | static BlockDriver bdrv_blkreplay = { | 140 | static coroutine_fn int block_copy_task_entry(AioTask *task) |
30 | .format_name = "blkreplay", | 141 | { |
31 | .instance_size = 0, | 142 | BlockCopyTask *t = container_of(task, BlockCopyTask, task); |
32 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_blkreplay = { | 143 | bool error_is_read = false; |
33 | .bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes, | 144 | + bool copy_range = t->copy_range; |
34 | .bdrv_co_pdiscard = blkreplay_co_pdiscard, | 145 | int ret; |
35 | .bdrv_co_flush = blkreplay_co_flush, | 146 | |
36 | + | 147 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, |
37 | + .bdrv_snapshot_goto = blkreplay_snapshot_goto, | 148 | - &error_is_read); |
38 | }; | 149 | + ©_range, &error_is_read); |
39 | 150 | + if (t->copy_range) { | |
40 | static void bdrv_blkreplay_init(void) | 151 | + block_copy_handle_copy_range_result(t->s, copy_range); |
152 | + } | ||
153 | if (ret < 0) { | ||
154 | if (!t->call_state->ret) { | ||
155 | t->call_state->ret = ret; | ||
156 | @@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) | ||
157 | g_free(task); | ||
158 | continue; | ||
159 | } | ||
160 | - task->zeroes = ret & BDRV_BLOCK_ZERO; | ||
161 | + if (ret & BDRV_BLOCK_ZERO) { | ||
162 | + task->zeroes = true; | ||
163 | + task->copy_range = false; | ||
164 | + } | ||
165 | |||
166 | if (s->speed) { | ||
167 | if (!call_state->ignore_ratelimit) { | ||
41 | -- | 168 | -- |
42 | 2.20.1 | 169 | 2.30.2 |
43 | 170 | ||
44 | 171 | diff view generated by jsdifflib |
1 | From: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch disables setting '-snapshot' option on by default | 3 | Document that security reports must use 'null-co,read-zeroes=on' |
4 | in record/replay mode. This is needed for creating vmstates in record | 4 | because otherwise the memory is left uninitialized (which is an |
5 | and replay modes. | 5 | on-purpose performance feature). |
6 | 6 | ||
7 | Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru> | 7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
8 | Acked-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
9 | Message-Id: <20210601162548.2076631-1-philmd@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 11 | --- |
11 | vl.c | 10 ++++++++-- | 12 | docs/devel/secure-coding-practices.rst | 9 +++++++++ |
12 | 1 file changed, 8 insertions(+), 2 deletions(-) | 13 | 1 file changed, 9 insertions(+) |
13 | 14 | ||
14 | diff --git a/vl.c b/vl.c | 15 | diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/vl.c | 17 | --- a/docs/devel/secure-coding-practices.rst |
17 | +++ b/vl.c | 18 | +++ b/docs/devel/secure-coding-practices.rst |
18 | @@ -XXX,XX +XXX,XX @@ static void configure_blockdev(BlockdevOptionsQueue *bdo_queue, | 19 | @@ -XXX,XX +XXX,XX @@ structures and only process the local copy. This prevents |
19 | qapi_free_BlockdevOptions(bdo->bdo); | 20 | time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to |
20 | g_free(bdo); | 21 | crash when a vCPU thread modifies guest RAM while device emulation is |
21 | } | 22 | processing it. |
22 | - if (snapshot || replay_mode != REPLAY_MODE_NONE) { | 23 | + |
23 | + if (snapshot) { | 24 | +Use of null-co block drivers |
24 | qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, | 25 | +---------------------------- |
25 | NULL, NULL); | 26 | + |
26 | } | 27 | +The ``null-co`` block driver is designed for performance: its read accesses are |
27 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | 28 | +not initialized by default. In case this driver has to be used for security |
28 | drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS); | 29 | +research, it must be used with the ``read-zeroes=on`` option which fills read |
29 | break; | 30 | +buffers with zeroes. Security issues reported with the default |
30 | case QEMU_OPTION_snapshot: | 31 | +(``read-zeroes=off``) will be discarded. |
31 | - snapshot = 1; | ||
32 | + { | ||
33 | + Error *blocker = NULL; | ||
34 | + snapshot = 1; | ||
35 | + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, | ||
36 | + "-snapshot"); | ||
37 | + replay_add_blocker(blocker); | ||
38 | + } | ||
39 | break; | ||
40 | case QEMU_OPTION_numa: | ||
41 | opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), | ||
42 | -- | 32 | -- |
43 | 2.20.1 | 33 | 2.30.2 |
44 | 34 | ||
45 | 35 | diff view generated by jsdifflib |