1 | The following changes since commit 9e06029aea3b2eca1d5261352e695edc1e7d7b8b: | 1 | The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e: |
---|---|---|---|
2 | 2 | ||
3 | Update version for v4.1.0 release (2019-08-15 13:03:37 +0100) | 3 | Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to a6b257a08e3d72219f03e461a52152672fec0612: | 9 | for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809: |
10 | 10 | ||
11 | file-posix: Handle undetectable alignment (2019-08-16 11:29:11 +0200) | 11 | qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches: | 14 | Block layer patches: |
15 | 15 | ||
16 | - file-posix: Fix O_DIRECT alignment detection | 16 | - Fix resize (extending) of short overlays |
17 | - Fixes for concurrent block jobs | 17 | - nvme: introduce PMR support from NVMe 1.4 spec |
18 | - block-backend: Queue requests while drained (fix IDE vs. job crashes) | 18 | - qemu-storage-daemon: Fix non-string --object properties |
19 | - qemu-img convert: Deprecate using -n and -o together | ||
20 | - iotests: Migration tests with filter nodes | ||
21 | - iotests: More media change tests | ||
22 | 19 | ||
23 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
24 | Kevin Wolf (10): | 21 | Alberto Garcia (1): |
25 | iotests/118: Test media change for scsi-cd | 22 | qcow2: Add incompatibility note between backing files and raw external data files |
26 | iotests/118: Create test classes dynamically | ||
27 | iotests/118: Add -blockdev based tests | ||
28 | iotests: Move migration helpers to iotests.py | ||
29 | iotests: Test migration with all kinds of filter nodes | ||
30 | block: Simplify bdrv_filter_default_perms() | ||
31 | block: Remove blk_pread_unthrottled() | ||
32 | mirror: Keep mirror_top_bs drained after dropping permissions | ||
33 | block-backend: Queue requests while drained | ||
34 | qemu-img convert: Deprecate using -n and -o together | ||
35 | 23 | ||
36 | Max Reitz (5): | 24 | Andrzej Jakowski (1): |
37 | block: Keep subtree drained in drop_intermediate | 25 | nvme: introduce PMR support from NVMe 1.4 spec |
38 | block: Reduce (un)drains when replacing a child | ||
39 | tests: Test polling in bdrv_drop_intermediate() | ||
40 | tests: Test mid-drain bdrv_replace_child_noperm() | ||
41 | iotests: Add test for concurrent stream/commit | ||
42 | 26 | ||
43 | Nir Soffer (1): | 27 | Kevin Wolf (12): |
44 | file-posix: Handle undetectable alignment | 28 | block: Add flags to BlockDriver.bdrv_co_truncate() |
29 | block: Add flags to bdrv(_co)_truncate() | ||
30 | block-backend: Add flags to blk_truncate() | ||
31 | qcow2: Support BDRV_REQ_ZERO_WRITE for truncate | ||
32 | raw-format: Support BDRV_REQ_ZERO_WRITE for truncate | ||
33 | file-posix: Support BDRV_REQ_ZERO_WRITE for truncate | ||
34 | block: truncate: Don't make backing file data visible | ||
35 | iotests: Filter testfiles out in filter_img_info() | ||
36 | iotests: Test committing to short backing file | ||
37 | qcow2: Forward ZERO_WRITE flag for full preallocation | ||
38 | qom: Factor out user_creatable_add_dict() | ||
39 | qemu-storage-daemon: Fix non-string --object properties | ||
45 | 40 | ||
46 | include/sysemu/block-backend.h | 3 +- | 41 | Paolo Bonzini (1): |
47 | block.c | 63 +++--- | 42 | qemu-iotests: allow qcow2 external discarded clusters to contain stale data |
48 | block/backup.c | 1 + | ||
49 | block/block-backend.c | 69 ++++-- | ||
50 | block/commit.c | 2 + | ||
51 | block/file-posix.c | 36 +++- | ||
52 | block/mirror.c | 7 +- | ||
53 | blockjob.c | 3 + | ||
54 | hw/block/hd-geometry.c | 7 +- | ||
55 | qemu-img.c | 5 + | ||
56 | tests/test-bdrv-drain.c | 476 +++++++++++++++++++++++++++++++++++++++++ | ||
57 | qemu-deprecated.texi | 7 + | ||
58 | tests/qemu-iotests/118 | 84 ++++---- | ||
59 | tests/qemu-iotests/118.out | 4 +- | ||
60 | tests/qemu-iotests/234 | 30 +-- | ||
61 | tests/qemu-iotests/258 | 163 ++++++++++++++ | ||
62 | tests/qemu-iotests/258.out | 33 +++ | ||
63 | tests/qemu-iotests/262 | 82 +++++++ | ||
64 | tests/qemu-iotests/262.out | 17 ++ | ||
65 | tests/qemu-iotests/group | 2 + | ||
66 | tests/qemu-iotests/iotests.py | 16 ++ | ||
67 | 21 files changed, 983 insertions(+), 127 deletions(-) | ||
68 | create mode 100755 tests/qemu-iotests/258 | ||
69 | create mode 100644 tests/qemu-iotests/258.out | ||
70 | create mode 100755 tests/qemu-iotests/262 | ||
71 | create mode 100644 tests/qemu-iotests/262.out | ||
72 | 43 | ||
44 | docs/interop/qcow2.txt | 3 + | ||
45 | hw/block/nvme.h | 2 + | ||
46 | include/block/block.h | 5 +- | ||
47 | include/block/block_int.h | 10 +- | ||
48 | include/block/nvme.h | 172 ++++++++++++++++++++++++++ | ||
49 | include/qom/object_interfaces.h | 16 +++ | ||
50 | include/sysemu/block-backend.h | 2 +- | ||
51 | block.c | 3 +- | ||
52 | block/block-backend.c | 4 +- | ||
53 | block/commit.c | 4 +- | ||
54 | block/crypto.c | 7 +- | ||
55 | block/file-posix.c | 6 +- | ||
56 | block/file-win32.c | 2 +- | ||
57 | block/gluster.c | 1 + | ||
58 | block/io.c | 43 ++++++- | ||
59 | block/iscsi.c | 2 +- | ||
60 | block/mirror.c | 2 +- | ||
61 | block/nfs.c | 3 +- | ||
62 | block/parallels.c | 6 +- | ||
63 | block/qcow.c | 4 +- | ||
64 | block/qcow2-cluster.c | 2 +- | ||
65 | block/qcow2-refcount.c | 2 +- | ||
66 | block/qcow2.c | 73 +++++++++-- | ||
67 | block/qed.c | 3 +- | ||
68 | block/raw-format.c | 6 +- | ||
69 | block/rbd.c | 1 + | ||
70 | block/sheepdog.c | 4 +- | ||
71 | block/ssh.c | 2 +- | ||
72 | block/vdi.c | 2 +- | ||
73 | block/vhdx-log.c | 2 +- | ||
74 | block/vhdx.c | 6 +- | ||
75 | block/vmdk.c | 8 +- | ||
76 | block/vpc.c | 2 +- | ||
77 | blockdev.c | 2 +- | ||
78 | hw/block/nvme.c | 109 ++++++++++++++++ | ||
79 | qemu-img.c | 2 +- | ||
80 | qemu-io-cmds.c | 2 +- | ||
81 | qemu-storage-daemon.c | 4 +- | ||
82 | qom/object_interfaces.c | 31 +++++ | ||
83 | qom/qom-qmp-cmds.c | 24 +--- | ||
84 | tests/test-block-iothread.c | 9 +- | ||
85 | tests/qemu-iotests/iotests.py | 5 +- | ||
86 | hw/block/Makefile.objs | 2 +- | ||
87 | hw/block/trace-events | 4 + | ||
88 | tests/qemu-iotests/244 | 10 +- | ||
89 | tests/qemu-iotests/244.out | 9 +- | ||
90 | tests/qemu-iotests/274 | 155 +++++++++++++++++++++++ | ||
91 | tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++ | ||
92 | tests/qemu-iotests/group | 1 + | ||
93 | 49 files changed, 951 insertions(+), 96 deletions(-) | ||
94 | create mode 100755 tests/qemu-iotests/274 | ||
95 | create mode 100644 tests/qemu-iotests/274.out | ||
96 | |||
97 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The test covered only floppy and ide-cd. Add scsi-cd as well. | ||
2 | 1 | ||
3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
4 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
5 | --- | ||
6 | tests/qemu-iotests/118 | 20 ++++++++++++++++++++ | ||
7 | tests/qemu-iotests/118.out | 4 ++-- | ||
8 | 2 files changed, 22 insertions(+), 2 deletions(-) | ||
9 | |||
10 | diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 | ||
11 | index XXXXXXX..XXXXXXX 100755 | ||
12 | --- a/tests/qemu-iotests/118 | ||
13 | +++ b/tests/qemu-iotests/118 | ||
14 | @@ -XXX,XX +XXX,XX @@ def interface_to_device_name(interface): | ||
15 | return 'ide-cd' | ||
16 | elif interface == 'floppy': | ||
17 | return 'floppy' | ||
18 | + elif interface == 'scsi': | ||
19 | + return 'scsi-cd' | ||
20 | else: | ||
21 | return None | ||
22 | |||
23 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass): | ||
24 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | ||
25 | self.vm = iotests.VM() | ||
26 | self.vm.add_drive(old_img, 'media=%s' % media, 'none') | ||
27 | + if interface == 'scsi': | ||
28 | + self.vm.add_device('virtio-scsi-pci') | ||
29 | self.vm.add_device('%s,drive=drive0,id=%s' % | ||
30 | (interface_to_device_name(interface), | ||
31 | self.device_name)) | ||
32 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass): | ||
33 | def setUp(self, media, interface): | ||
34 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | ||
35 | self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none') | ||
36 | + if interface == 'scsi': | ||
37 | + self.vm.add_device('virtio-scsi-pci') | ||
38 | self.vm.add_device('%s,drive=drive0,id=%s' % | ||
39 | (interface_to_device_name(interface), | ||
40 | self.device_name)) | ||
41 | @@ -XXX,XX +XXX,XX @@ class TestCDInitiallyEmpty(TestInitiallyEmpty): | ||
42 | def setUp(self): | ||
43 | self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide') | ||
44 | |||
45 | +class TestSCSICDInitiallyFilled(TestInitiallyFilled): | ||
46 | + TestInitiallyFilled = TestInitiallyFilled | ||
47 | + has_real_tray = True | ||
48 | + | ||
49 | + def setUp(self): | ||
50 | + self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi') | ||
51 | + | ||
52 | +class TestSCSICDInitiallyEmpty(TestInitiallyEmpty): | ||
53 | + TestInitiallyEmpty = TestInitiallyEmpty | ||
54 | + has_real_tray = True | ||
55 | + | ||
56 | + def setUp(self): | ||
57 | + self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi') | ||
58 | + | ||
59 | class TestFloppyInitiallyFilled(TestInitiallyFilled): | ||
60 | TestInitiallyFilled = TestInitiallyFilled | ||
61 | has_real_tray = False | ||
62 | diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/tests/qemu-iotests/118.out | ||
65 | +++ b/tests/qemu-iotests/118.out | ||
66 | @@ -XXX,XX +XXX,XX @@ | ||
67 | -............................................................... | ||
68 | +......................................................................................... | ||
69 | ---------------------------------------------------------------------- | ||
70 | -Ran 63 tests | ||
71 | +Ran 89 tests | ||
72 | |||
73 | OK | ||
74 | -- | ||
75 | 2.20.1 | ||
76 | |||
77 | diff view generated by jsdifflib |
1 | mirror_top_bs is currently implicitly drained through its connection to | 1 | From: Alberto Garcia <berto@igalia.com> |
---|---|---|---|
2 | the source or the target node. However, the drain section for target_bs | ||
3 | ends early after moving mirror_top_bs from src to target_bs, so that | ||
4 | requests can already be restarted while mirror_top_bs is still present | ||
5 | in the chain, but has dropped all permissions and therefore runs into an | ||
6 | assertion failure like this: | ||
7 | 2 | ||
8 | qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare: | 3 | Backing files and raw external data files are mutually exclusive. |
9 | Assertion `child->perm & BLK_PERM_WRITE' failed. | 4 | The documentation of the raw external data bit (in autoclear_features) |
5 | already indicates that, but we should also mention it on the other | ||
6 | side. | ||
10 | 7 | ||
11 | Keep mirror_top_bs drained until all graph changes have completed. | 8 | Suggested-by: Eric Blake <eblake@redhat.com> |
9 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
10 | Message-Id: <20200410121816.8334-1-berto@igalia.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | ||
14 | docs/interop/qcow2.txt | 3 +++ | ||
15 | 1 file changed, 3 insertions(+) | ||
12 | 16 | ||
13 | Cc: qemu-stable@nongnu.org | 17 | diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt |
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
15 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
16 | --- | ||
17 | block/mirror.c | 6 +++++- | ||
18 | 1 file changed, 5 insertions(+), 1 deletion(-) | ||
19 | |||
20 | diff --git a/block/mirror.c b/block/mirror.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/mirror.c | 19 | --- a/docs/interop/qcow2.txt |
23 | +++ b/block/mirror.c | 20 | +++ b/docs/interop/qcow2.txt |
24 | @@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job) | 21 | @@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header: |
25 | s->target = NULL; | 22 | is stored (NB: The string is not null terminated). 0 if the |
26 | 23 | image doesn't have a backing file. | |
27 | /* We don't access the source any more. Dropping any WRITE/RESIZE is | 24 | |
28 | - * required before it could become a backing file of target_bs. */ | 25 | + Note: backing files are incompatible with raw external data |
29 | + * required before it could become a backing file of target_bs. Not having | 26 | + files (auto-clear feature bit 1). |
30 | + * these permissions any more means that we can't allow any new requests on | 27 | + |
31 | + * mirror_top_bs from now on, so keep it drained. */ | 28 | 16 - 19: backing_file_size |
32 | + bdrv_drained_begin(mirror_top_bs); | 29 | Length of the backing file name in bytes. Must not be |
33 | bs_opaque->stop = true; | 30 | longer than 1023 bytes. Undefined if the image doesn't have |
34 | bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, | ||
35 | &error_abort); | ||
36 | @@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job) | ||
37 | bs_opaque->job = NULL; | ||
38 | |||
39 | bdrv_drained_end(src); | ||
40 | + bdrv_drained_end(mirror_top_bs); | ||
41 | s->in_drain = false; | ||
42 | bdrv_unref(mirror_top_bs); | ||
43 | bdrv_unref(src); | ||
44 | -- | 31 | -- |
45 | 2.20.1 | 32 | 2.25.3 |
46 | 33 | ||
47 | 34 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | bdrv_drop_intermediate() calls BdrvChildRole.update_filename(). That | 3 | Test 244 checks the expected behavior of qcow2 external data files |
4 | may poll, thus changing the graph, which potentially breaks the | 4 | with respect to zero and discarded clusters. Filesystems however |
5 | QLIST_FOREACH_SAFE() loop. | 5 | are free to ignore discard requests, and this seems to be the |
6 | case for overlayfs. Relax the tests to skip checks on the | ||
7 | external data file for discarded areas, which implies not using | ||
8 | qemu-img compare in the data_file_raw=on case. | ||
6 | 9 | ||
7 | Just keep the whole subtree drained. This is probably the right thing | 10 | This fixes docker tests on RHEL8. |
8 | to do anyway (dropping nodes while the subtree is not drained seems | ||
9 | wrong). | ||
10 | 11 | ||
11 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 12 | Cc: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 13 | Cc: qemu-block@nongnu.org |
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Message-Id: <20200409191006.24429-1-pbonzini@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 17 | --- |
15 | block.c | 2 ++ | 18 | tests/qemu-iotests/244 | 10 ++++++++-- |
16 | 1 file changed, 2 insertions(+) | 19 | tests/qemu-iotests/244.out | 9 ++++++--- |
20 | 2 files changed, 14 insertions(+), 5 deletions(-) | ||
17 | 21 | ||
18 | diff --git a/block.c b/block.c | 22 | diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 |
23 | index XXXXXXX..XXXXXXX 100755 | ||
24 | --- a/tests/qemu-iotests/244 | ||
25 | +++ b/tests/qemu-iotests/244 | ||
26 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \ | ||
27 | echo | ||
28 | $QEMU_IO -c 'read -P 0 0 1M' \ | ||
29 | -c 'read -P 0x11 1M 1M' \ | ||
30 | - -c 'read -P 0 2M 2M' \ | ||
31 | -c 'read -P 0x11 4M 1M' \ | ||
32 | -c 'read -P 0 5M 1M' \ | ||
33 | -f raw "$TEST_IMG.data" | | ||
34 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \ | ||
35 | -f $IMGFMT "$TEST_IMG" | | ||
36 | _filter_qemu_io | ||
37 | |||
38 | +# Discarded clusters are only marked as such in the qcow2 metadata, but | ||
39 | +# they can contain stale data in the external data file. Instead, zero | ||
40 | +# clusters must be zeroed in the external data file too. | ||
41 | echo | ||
42 | -$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data" | ||
43 | +$QEMU_IO -c 'read -P 0 0 1M' \ | ||
44 | + -c 'read -P 0x11 1M 1M' \ | ||
45 | + -c 'read -P 0 3M 3M' \ | ||
46 | + -f raw "$TEST_IMG".data | | ||
47 | + _filter_qemu_io | ||
48 | |||
49 | echo -n "qcow2 file size after I/O: " | ||
50 | du -b $TEST_IMG | cut -f1 | ||
51 | diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out | ||
19 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block.c | 53 | --- a/tests/qemu-iotests/244.out |
21 | +++ b/block.c | 54 | +++ b/tests/qemu-iotests/244.out |
22 | @@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, | 55 | @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0 |
23 | int ret = -EIO; | 56 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
24 | 57 | read 1048576/1048576 bytes at offset 1048576 | |
25 | bdrv_ref(top); | 58 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
26 | + bdrv_subtree_drained_begin(top); | 59 | -read 2097152/2097152 bytes at offset 2097152 |
27 | 60 | -2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | |
28 | if (!top->drv || !base->drv) { | 61 | read 1048576/1048576 bytes at offset 4194304 |
29 | goto exit; | 62 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
30 | @@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, | 63 | read 1048576/1048576 bytes at offset 5242880 |
31 | 64 | @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576 | |
32 | ret = 0; | 65 | read 4194304/4194304 bytes at offset 2097152 |
33 | exit: | 66 | 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
34 | + bdrv_subtree_drained_end(top); | 67 | |
35 | bdrv_unref(top); | 68 | -Images are identical. |
36 | return ret; | 69 | +read 1048576/1048576 bytes at offset 0 |
37 | } | 70 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
71 | +read 1048576/1048576 bytes at offset 1048576 | ||
72 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
73 | +read 3145728/3145728 bytes at offset 3145728 | ||
74 | +3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
75 | qcow2 file size after I/O: 327680 | ||
76 | |||
77 | === bdrv_co_block_status test for file and offset=0 === | ||
38 | -- | 78 | -- |
39 | 2.20.1 | 79 | 2.25.3 |
40 | 80 | ||
41 | 81 | diff view generated by jsdifflib |
1 | bdrv_create options specified with -o have no effect when skipping image | 1 | This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate() |
---|---|---|---|
2 | creation with -n, so this doesn't make sense. Warn against the misuse | 2 | driver callbacks, and a supported_truncate_flags field in |
3 | and deprecate the combination so we can make it a hard error later. | 3 | BlockDriverState that allows drivers to advertise support for request |
4 | flags in the context of truncate. | ||
5 | |||
6 | For now, we always pass 0 and no drivers declare support for any flag. | ||
4 | 7 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
6 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 11 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
7 | Reviewed-by: John Snow <jsnow@redhat.com> | 12 | Message-Id: <20200424125448.63318-2-kwolf@redhat.com> |
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 14 | --- |
10 | qemu-img.c | 5 +++++ | 15 | include/block/block_int.h | 10 +++++++++- |
11 | qemu-deprecated.texi | 7 +++++++ | 16 | block/crypto.c | 3 ++- |
12 | 2 files changed, 12 insertions(+) | 17 | block/file-posix.c | 2 +- |
13 | 18 | block/file-win32.c | 2 +- | |
14 | diff --git a/qemu-img.c b/qemu-img.c | 19 | block/gluster.c | 1 + |
15 | index XXXXXXX..XXXXXXX 100644 | 20 | block/io.c | 8 +++++++- |
16 | --- a/qemu-img.c | 21 | block/iscsi.c | 2 +- |
17 | +++ b/qemu-img.c | 22 | block/nfs.c | 3 ++- |
18 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | 23 | block/qcow2.c | 2 +- |
19 | goto fail_getopt; | 24 | block/qed.c | 1 + |
25 | block/raw-format.c | 2 +- | ||
26 | block/rbd.c | 1 + | ||
27 | block/sheepdog.c | 4 ++-- | ||
28 | block/ssh.c | 2 +- | ||
29 | tests/test-block-iothread.c | 3 ++- | ||
30 | 15 files changed, 33 insertions(+), 13 deletions(-) | ||
31 | |||
32 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/include/block/block_int.h | ||
35 | +++ b/include/block/block_int.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
37 | */ | ||
38 | int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, | ||
39 | bool exact, PreallocMode prealloc, | ||
40 | - Error **errp); | ||
41 | + BdrvRequestFlags flags, Error **errp); | ||
42 | |||
43 | int64_t (*bdrv_getlength)(BlockDriverState *bs); | ||
44 | bool has_variable_length; | ||
45 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
46 | /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, | ||
47 | * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ | ||
48 | unsigned int supported_zero_flags; | ||
49 | + /* | ||
50 | + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). | ||
51 | + * | ||
52 | + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure | ||
53 | + * that any added space reads as all zeros. If this can't be guaranteed, | ||
54 | + * the operation must fail. | ||
55 | + */ | ||
56 | + unsigned int supported_truncate_flags; | ||
57 | |||
58 | /* the following member gives a name to every node on the bs graph. */ | ||
59 | char node_name[32]; | ||
60 | diff --git a/block/crypto.c b/block/crypto.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/block/crypto.c | ||
63 | +++ b/block/crypto.c | ||
64 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs, | ||
65 | |||
66 | static int coroutine_fn | ||
67 | block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
68 | - PreallocMode prealloc, Error **errp) | ||
69 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
70 | + Error **errp) | ||
71 | { | ||
72 | BlockCrypto *crypto = bs->opaque; | ||
73 | uint64_t payload_offset = | ||
74 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/block/file-posix.c | ||
77 | +++ b/block/file-posix.c | ||
78 | @@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, | ||
79 | |||
80 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
81 | bool exact, PreallocMode prealloc, | ||
82 | - Error **errp) | ||
83 | + BdrvRequestFlags flags, Error **errp) | ||
84 | { | ||
85 | BDRVRawState *s = bs->opaque; | ||
86 | struct stat st; | ||
87 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/block/file-win32.c | ||
90 | +++ b/block/file-win32.c | ||
91 | @@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs) | ||
92 | |||
93 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
94 | bool exact, PreallocMode prealloc, | ||
95 | - Error **errp) | ||
96 | + BdrvRequestFlags flags, Error **errp) | ||
97 | { | ||
98 | BDRVRawState *s = bs->opaque; | ||
99 | LONG low, high; | ||
100 | diff --git a/block/gluster.c b/block/gluster.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/block/gluster.c | ||
103 | +++ b/block/gluster.c | ||
104 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, | ||
105 | int64_t offset, | ||
106 | bool exact, | ||
107 | PreallocMode prealloc, | ||
108 | + BdrvRequestFlags flags, | ||
109 | Error **errp) | ||
110 | { | ||
111 | BDRVGlusterState *s = bs->opaque; | ||
112 | diff --git a/block/io.c b/block/io.c | ||
113 | index XXXXXXX..XXXXXXX 100644 | ||
114 | --- a/block/io.c | ||
115 | +++ b/block/io.c | ||
116 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
117 | BlockDriverState *bs = child->bs; | ||
118 | BlockDriver *drv = bs->drv; | ||
119 | BdrvTrackedRequest req; | ||
120 | + BdrvRequestFlags flags = 0; | ||
121 | int64_t old_size, new_bytes; | ||
122 | int ret; | ||
123 | |||
124 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
20 | } | 125 | } |
21 | 126 | ||
22 | + if (skip_create && options) { | 127 | if (drv->bdrv_co_truncate) { |
23 | + warn_report("-o has no effect when skipping image creation"); | 128 | - ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); |
24 | + warn_report("This will become an error in future QEMU versions."); | 129 | + if (flags & ~bs->supported_truncate_flags) { |
25 | + } | 130 | + error_setg(errp, "Block driver does not support requested flags"); |
26 | + | 131 | + ret = -ENOTSUP; |
27 | s.src_num = argc - optind - 1; | 132 | + goto out; |
28 | out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL; | 133 | + } |
29 | 134 | + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); | |
30 | diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi | 135 | } else if (bs->file && drv->is_filter) { |
31 | index XXXXXXX..XXXXXXX 100644 | 136 | ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); |
32 | --- a/qemu-deprecated.texi | 137 | } else { |
33 | +++ b/qemu-deprecated.texi | 138 | diff --git a/block/iscsi.c b/block/iscsi.c |
34 | @@ -XXX,XX +XXX,XX @@ to just export the entire image and then mount only /dev/nbd0p1 than | 139 | index XXXXXXX..XXXXXXX 100644 |
35 | it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a | 140 | --- a/block/iscsi.c |
36 | subset of the image. | 141 | +++ b/block/iscsi.c |
37 | 142 | @@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) | |
38 | +@subsection qemu-img convert -n -o (since 4.2.0) | 143 | |
39 | + | 144 | static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, |
40 | +All options specified in @option{-o} are image creation options, so | 145 | bool exact, PreallocMode prealloc, |
41 | +they have no effect when used with @option{-n} to skip image creation. | 146 | - Error **errp) |
42 | +Silently ignored options can be confusing, so this combination of | 147 | + BdrvRequestFlags flags, Error **errp) |
43 | +options will be made an error in future versions. | 148 | { |
44 | + | 149 | IscsiLun *iscsilun = bs->opaque; |
45 | @section Build system | 150 | int64_t cur_length; |
46 | 151 | diff --git a/block/nfs.c b/block/nfs.c | |
47 | @subsection Python 2 support (since 4.1.0) | 152 | index XXXXXXX..XXXXXXX 100644 |
153 | --- a/block/nfs.c | ||
154 | +++ b/block/nfs.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) | ||
156 | |||
157 | static int coroutine_fn | ||
158 | nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
159 | - PreallocMode prealloc, Error **errp) | ||
160 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
161 | + Error **errp) | ||
162 | { | ||
163 | NFSClient *client = bs->opaque; | ||
164 | int ret; | ||
165 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/block/qcow2.c | ||
168 | +++ b/block/qcow2.c | ||
169 | @@ -XXX,XX +XXX,XX @@ fail: | ||
170 | |||
171 | static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
172 | bool exact, PreallocMode prealloc, | ||
173 | - Error **errp) | ||
174 | + BdrvRequestFlags flags, Error **errp) | ||
175 | { | ||
176 | BDRVQcow2State *s = bs->opaque; | ||
177 | uint64_t old_length; | ||
178 | diff --git a/block/qed.c b/block/qed.c | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/block/qed.c | ||
181 | +++ b/block/qed.c | ||
182 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, | ||
183 | int64_t offset, | ||
184 | bool exact, | ||
185 | PreallocMode prealloc, | ||
186 | + BdrvRequestFlags flags, | ||
187 | Error **errp) | ||
188 | { | ||
189 | BDRVQEDState *s = bs->opaque; | ||
190 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/block/raw-format.c | ||
193 | +++ b/block/raw-format.c | ||
194 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) | ||
195 | |||
196 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
197 | bool exact, PreallocMode prealloc, | ||
198 | - Error **errp) | ||
199 | + BdrvRequestFlags flags, Error **errp) | ||
200 | { | ||
201 | BDRVRawState *s = bs->opaque; | ||
202 | |||
203 | diff --git a/block/rbd.c b/block/rbd.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/block/rbd.c | ||
206 | +++ b/block/rbd.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, | ||
208 | int64_t offset, | ||
209 | bool exact, | ||
210 | PreallocMode prealloc, | ||
211 | + BdrvRequestFlags flags, | ||
212 | Error **errp) | ||
213 | { | ||
214 | int r; | ||
215 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
216 | index XXXXXXX..XXXXXXX 100644 | ||
217 | --- a/block/sheepdog.c | ||
218 | +++ b/block/sheepdog.c | ||
219 | @@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs) | ||
220 | |||
221 | static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, | ||
222 | bool exact, PreallocMode prealloc, | ||
223 | - Error **errp) | ||
224 | + BdrvRequestFlags flags, Error **errp) | ||
225 | { | ||
226 | BDRVSheepdogState *s = bs->opaque; | ||
227 | int ret, fd; | ||
228 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
229 | |||
230 | assert(!flags); | ||
231 | if (offset > s->inode.vdi_size) { | ||
232 | - ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); | ||
233 | + ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); | ||
234 | if (ret < 0) { | ||
235 | return ret; | ||
236 | } | ||
237 | diff --git a/block/ssh.c b/block/ssh.c | ||
238 | index XXXXXXX..XXXXXXX 100644 | ||
239 | --- a/block/ssh.c | ||
240 | +++ b/block/ssh.c | ||
241 | @@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs) | ||
242 | |||
243 | static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, | ||
244 | bool exact, PreallocMode prealloc, | ||
245 | - Error **errp) | ||
246 | + BdrvRequestFlags flags, Error **errp) | ||
247 | { | ||
248 | BDRVSSHState *s = bs->opaque; | ||
249 | |||
250 | diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c | ||
251 | index XXXXXXX..XXXXXXX 100644 | ||
252 | --- a/tests/test-block-iothread.c | ||
253 | +++ b/tests/test-block-iothread.c | ||
254 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, | ||
255 | |||
256 | static int coroutine_fn | ||
257 | bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
258 | - PreallocMode prealloc, Error **errp) | ||
259 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
260 | + Error **errp) | ||
261 | { | ||
262 | return 0; | ||
263 | } | ||
48 | -- | 264 | -- |
49 | 2.20.1 | 265 | 2.25.3 |
50 | 266 | ||
51 | 267 | diff view generated by jsdifflib |
1 | The functionality offered by blk_pread_unthrottled() goes back to commit | 1 | Now that block drivers can support flags for .bdrv_co_truncate, expose |
---|---|---|---|
2 | 498e386c584. Then, we couldn't perform I/O throttling with synchronous | 2 | the parameter in the node level interfaces bdrv_co_truncate() and |
3 | requests because timers wouldn't be executed in polling loops. So the | 3 | bdrv_truncate(). |
4 | commit automatically disabled I/O throttling as soon as a synchronous | ||
5 | request was issued. | ||
6 | |||
7 | However, for geometry detection during disk initialisation, we always | ||
8 | used (and still use) synchronous requests even if guest requests use AIO | ||
9 | later. Geometry detection was not wanted to disable I/O throttling, so | ||
10 | bdrv_pread_unthrottled() was introduced which disabled throttling only | ||
11 | temporarily. | ||
12 | |||
13 | All of this isn't necessary any more because we do run timers in polling | ||
14 | loop and even synchronous requests are now using coroutine | ||
15 | infrastructure internally. For this reason, commit 90c78624f already | ||
16 | removed the automatic disabling of I/O throttling. | ||
17 | |||
18 | It's time to get rid of the workaround for the removed code, and its | ||
19 | abuse of blk_root_drained_begin()/end(), as well. | ||
20 | 4 | ||
21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
7 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
22 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 8 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
9 | Message-Id: <20200424125448.63318-3-kwolf@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
23 | --- | 11 | --- |
24 | include/sysemu/block-backend.h | 2 -- | 12 | include/block/block.h | 5 +++-- |
25 | block/block-backend.c | 16 ---------------- | 13 | block/block-backend.c | 2 +- |
26 | hw/block/hd-geometry.c | 7 +------ | 14 | block/crypto.c | 2 +- |
27 | 3 files changed, 1 insertion(+), 24 deletions(-) | 15 | block/io.c | 12 +++++++----- |
16 | block/parallels.c | 6 +++--- | ||
17 | block/qcow.c | 4 ++-- | ||
18 | block/qcow2-refcount.c | 2 +- | ||
19 | block/qcow2.c | 15 +++++++++------ | ||
20 | block/raw-format.c | 2 +- | ||
21 | block/vhdx-log.c | 2 +- | ||
22 | block/vhdx.c | 2 +- | ||
23 | block/vmdk.c | 2 +- | ||
24 | tests/test-block-iothread.c | 6 +++--- | ||
25 | 13 files changed, 34 insertions(+), 28 deletions(-) | ||
28 | 26 | ||
29 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | 27 | diff --git a/include/block/block.h b/include/block/block.h |
30 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/include/sysemu/block-backend.h | 29 | --- a/include/block/block.h |
32 | +++ b/include/sysemu/block-backend.h | 30 | +++ b/include/block/block.h |
33 | @@ -XXX,XX +XXX,XX @@ char *blk_get_attached_dev_id(BlockBackend *blk); | 31 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, |
34 | BlockBackend *blk_by_dev(void *dev); | 32 | void bdrv_refresh_filename(BlockDriverState *bs); |
35 | BlockBackend *blk_by_qdev_id(const char *id, Error **errp); | 33 | |
36 | void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); | 34 | int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
37 | -int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, | 35 | - PreallocMode prealloc, Error **errp); |
38 | - int bytes); | 36 | + PreallocMode prealloc, BdrvRequestFlags flags, |
39 | int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, | 37 | + Error **errp); |
40 | unsigned int bytes, QEMUIOVector *qiov, | 38 | int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, |
41 | BdrvRequestFlags flags); | 39 | - PreallocMode prealloc, Error **errp); |
40 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); | ||
41 | |||
42 | int64_t bdrv_nb_sectors(BlockDriverState *bs); | ||
43 | int64_t bdrv_getlength(BlockDriverState *bs); | ||
42 | diff --git a/block/block-backend.c b/block/block-backend.c | 44 | diff --git a/block/block-backend.c b/block/block-backend.c |
43 | index XXXXXXX..XXXXXXX 100644 | 45 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/block/block-backend.c | 46 | --- a/block/block-backend.c |
45 | +++ b/block/block-backend.c | 47 | +++ b/block/block-backend.c |
46 | @@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, | 48 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, |
47 | return rwco.ret; | 49 | return -ENOMEDIUM; |
48 | } | 50 | } |
49 | 51 | ||
50 | -int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, | 52 | - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); |
51 | - int count) | 53 | + return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); |
52 | -{ | 54 | } |
53 | - int ret; | 55 | |
54 | - | 56 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, |
55 | - ret = blk_check_byte_request(blk, offset, count); | 57 | diff --git a/block/crypto.c b/block/crypto.c |
56 | - if (ret < 0) { | 58 | index XXXXXXX..XXXXXXX 100644 |
57 | - return ret; | 59 | --- a/block/crypto.c |
58 | - } | 60 | +++ b/block/crypto.c |
59 | - | 61 | @@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, |
60 | - blk_root_drained_begin(blk->root); | 62 | |
61 | - ret = blk_pread(blk, offset, buf, count); | 63 | offset += payload_offset; |
62 | - blk_root_drained_end(blk->root, NULL); | 64 | |
63 | - return ret; | 65 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); |
64 | -} | 66 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); |
65 | - | 67 | } |
66 | int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, | 68 | |
67 | int bytes, BdrvRequestFlags flags) | 69 | static void block_crypto_close(BlockDriverState *bs) |
70 | diff --git a/block/io.c b/block/io.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/block/io.c | ||
73 | +++ b/block/io.c | ||
74 | @@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) | ||
75 | * 'offset' bytes in length. | ||
76 | */ | ||
77 | int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
78 | - PreallocMode prealloc, Error **errp) | ||
79 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
80 | + Error **errp) | ||
68 | { | 81 | { |
69 | diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c | 82 | BlockDriverState *bs = child->bs; |
70 | index XXXXXXX..XXXXXXX 100644 | 83 | BlockDriver *drv = bs->drv; |
71 | --- a/hw/block/hd-geometry.c | 84 | BdrvTrackedRequest req; |
72 | +++ b/hw/block/hd-geometry.c | 85 | - BdrvRequestFlags flags = 0; |
73 | @@ -XXX,XX +XXX,XX @@ static int guess_disk_lchs(BlockBackend *blk, | 86 | int64_t old_size, new_bytes; |
74 | 87 | int ret; | |
75 | blk_get_geometry(blk, &nb_sectors); | 88 | |
76 | 89 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | |
77 | - /** | 90 | } |
78 | - * The function will be invoked during startup not only in sync I/O mode, | 91 | ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); |
79 | - * but also in async I/O mode. So the I/O throttling function has to | 92 | } else if (bs->file && drv->is_filter) { |
80 | - * be disabled temporarily here, not permanently. | 93 | - ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); |
81 | - */ | 94 | + ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); |
82 | - if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) { | 95 | } else { |
83 | + if (blk_pread(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) { | 96 | error_setg(errp, "Image format driver does not support resize"); |
97 | ret = -ENOTSUP; | ||
98 | @@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo { | ||
99 | int64_t offset; | ||
100 | bool exact; | ||
101 | PreallocMode prealloc; | ||
102 | + BdrvRequestFlags flags; | ||
103 | Error **errp; | ||
104 | int ret; | ||
105 | } TruncateCo; | ||
106 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque) | ||
107 | { | ||
108 | TruncateCo *tco = opaque; | ||
109 | tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, | ||
110 | - tco->prealloc, tco->errp); | ||
111 | + tco->prealloc, tco->flags, tco->errp); | ||
112 | aio_wait_kick(); | ||
113 | } | ||
114 | |||
115 | int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
116 | - PreallocMode prealloc, Error **errp) | ||
117 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) | ||
118 | { | ||
119 | Coroutine *co; | ||
120 | TruncateCo tco = { | ||
121 | @@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
122 | .offset = offset, | ||
123 | .exact = exact, | ||
124 | .prealloc = prealloc, | ||
125 | + .flags = flags, | ||
126 | .errp = errp, | ||
127 | .ret = NOT_DONE, | ||
128 | }; | ||
129 | diff --git a/block/parallels.c b/block/parallels.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/parallels.c | ||
132 | +++ b/block/parallels.c | ||
133 | @@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, | ||
134 | } else { | ||
135 | ret = bdrv_truncate(bs->file, | ||
136 | (s->data_end + space) << BDRV_SECTOR_BITS, | ||
137 | - false, PREALLOC_MODE_OFF, NULL); | ||
138 | + false, PREALLOC_MODE_OFF, 0, NULL); | ||
139 | } | ||
140 | if (ret < 0) { | ||
141 | return ret; | ||
142 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, | ||
143 | * That means we have to pass exact=true. | ||
144 | */ | ||
145 | ret = bdrv_truncate(bs->file, res->image_end_offset, true, | ||
146 | - PREALLOC_MODE_OFF, &local_err); | ||
147 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
148 | if (ret < 0) { | ||
149 | error_report_err(local_err); | ||
150 | res->check_errors++; | ||
151 | @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) | ||
152 | |||
153 | /* errors are ignored, so we might as well pass exact=true */ | ||
154 | bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, | ||
155 | - PREALLOC_MODE_OFF, NULL); | ||
156 | + PREALLOC_MODE_OFF, 0, NULL); | ||
157 | } | ||
158 | |||
159 | g_free(s->bat_dirty_bmap); | ||
160 | diff --git a/block/qcow.c b/block/qcow.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/block/qcow.c | ||
163 | +++ b/block/qcow.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | ||
165 | return -E2BIG; | ||
166 | } | ||
167 | ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, | ||
168 | - false, PREALLOC_MODE_OFF, NULL); | ||
169 | + false, PREALLOC_MODE_OFF, 0, NULL); | ||
170 | if (ret < 0) { | ||
171 | return ret; | ||
172 | } | ||
173 | @@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs) | ||
174 | l1_length) < 0) | ||
84 | return -1; | 175 | return -1; |
85 | } | 176 | ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, |
86 | /* test msdos magic */ | 177 | - PREALLOC_MODE_OFF, NULL); |
178 | + PREALLOC_MODE_OFF, 0, NULL); | ||
179 | if (ret < 0) | ||
180 | return ret; | ||
181 | |||
182 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/block/qcow2-refcount.c | ||
185 | +++ b/block/qcow2-refcount.c | ||
186 | @@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, | ||
187 | } | ||
188 | |||
189 | ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, | ||
190 | - PREALLOC_MODE_OFF, &local_err); | ||
191 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
192 | if (ret < 0) { | ||
193 | error_report_err(local_err); | ||
194 | goto resize_fail; | ||
195 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
196 | index XXXXXXX..XXXXXXX 100644 | ||
197 | --- a/block/qcow2.c | ||
198 | +++ b/block/qcow2.c | ||
199 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, | ||
200 | mode = PREALLOC_MODE_OFF; | ||
201 | } | ||
202 | ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, | ||
203 | - mode, errp); | ||
204 | + mode, 0, errp); | ||
205 | if (ret < 0) { | ||
206 | return ret; | ||
207 | } | ||
208 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
209 | * always fulfilled, so there is no need to pass it on.) | ||
210 | */ | ||
211 | bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, | ||
212 | - false, PREALLOC_MODE_OFF, &local_err); | ||
213 | + false, PREALLOC_MODE_OFF, 0, &local_err); | ||
214 | if (local_err) { | ||
215 | warn_reportf_err(local_err, | ||
216 | "Failed to truncate the tail of the image: "); | ||
217 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
218 | * file should be resized to the exact target size, too, | ||
219 | * so we pass @exact here. | ||
220 | */ | ||
221 | - ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); | ||
222 | + ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, | ||
223 | + errp); | ||
224 | if (ret < 0) { | ||
225 | goto fail; | ||
226 | } | ||
227 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
228 | new_file_size = allocation_start + | ||
229 | nb_new_data_clusters * s->cluster_size; | ||
230 | /* Image file grows, so @exact does not matter */ | ||
231 | - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); | ||
232 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, | ||
233 | + errp); | ||
234 | if (ret < 0) { | ||
235 | error_prepend(errp, "Failed to resize underlying file: "); | ||
236 | qcow2_free_clusters(bs, allocation_start, | ||
237 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, | ||
238 | if (len < 0) { | ||
239 | return len; | ||
240 | } | ||
241 | - return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); | ||
242 | + return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, | ||
243 | + NULL); | ||
244 | } | ||
245 | |||
246 | if (offset_into_cluster(s, offset)) { | ||
247 | @@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs) | ||
248 | } | ||
249 | |||
250 | ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, | ||
251 | - PREALLOC_MODE_OFF, &local_err); | ||
252 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
253 | if (ret < 0) { | ||
254 | error_report_err(local_err); | ||
255 | goto fail; | ||
256 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
257 | index XXXXXXX..XXXXXXX 100644 | ||
258 | --- a/block/raw-format.c | ||
259 | +++ b/block/raw-format.c | ||
260 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
261 | |||
262 | s->size = offset; | ||
263 | offset += s->offset; | ||
264 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); | ||
265 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); | ||
266 | } | ||
267 | |||
268 | static void raw_eject(BlockDriverState *bs, bool eject_flag) | ||
269 | diff --git a/block/vhdx-log.c b/block/vhdx-log.c | ||
270 | index XXXXXXX..XXXXXXX 100644 | ||
271 | --- a/block/vhdx-log.c | ||
272 | +++ b/block/vhdx-log.c | ||
273 | @@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, | ||
274 | goto exit; | ||
275 | } | ||
276 | ret = bdrv_truncate(bs->file, new_file_size, false, | ||
277 | - PREALLOC_MODE_OFF, NULL); | ||
278 | + PREALLOC_MODE_OFF, 0, NULL); | ||
279 | if (ret < 0) { | ||
280 | goto exit; | ||
281 | } | ||
282 | diff --git a/block/vhdx.c b/block/vhdx.c | ||
283 | index XXXXXXX..XXXXXXX 100644 | ||
284 | --- a/block/vhdx.c | ||
285 | +++ b/block/vhdx.c | ||
286 | @@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, | ||
287 | } | ||
288 | |||
289 | return bdrv_truncate(bs->file, *new_offset + s->block_size, false, | ||
290 | - PREALLOC_MODE_OFF, NULL); | ||
291 | + PREALLOC_MODE_OFF, 0, NULL); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | diff --git a/block/vmdk.c b/block/vmdk.c | ||
296 | index XXXXXXX..XXXXXXX 100644 | ||
297 | --- a/block/vmdk.c | ||
298 | +++ b/block/vmdk.c | ||
299 | @@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
300 | } | ||
301 | length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); | ||
302 | ret = bdrv_truncate(s->extents[i].file, length, false, | ||
303 | - PREALLOC_MODE_OFF, NULL); | ||
304 | + PREALLOC_MODE_OFF, 0, NULL); | ||
305 | if (ret < 0) { | ||
306 | return ret; | ||
307 | } | ||
308 | diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c | ||
309 | index XXXXXXX..XXXXXXX 100644 | ||
310 | --- a/tests/test-block-iothread.c | ||
311 | +++ b/tests/test-block-iothread.c | ||
312 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c) | ||
313 | int ret; | ||
314 | |||
315 | /* Normal success path */ | ||
316 | - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); | ||
317 | + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
318 | g_assert_cmpint(ret, ==, 0); | ||
319 | |||
320 | /* Early error: Negative offset */ | ||
321 | - ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); | ||
322 | + ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL); | ||
323 | g_assert_cmpint(ret, ==, -EINVAL); | ||
324 | |||
325 | /* Error: Read-only image */ | ||
326 | c->bs->read_only = true; | ||
327 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
328 | |||
329 | - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); | ||
330 | + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
331 | g_assert_cmpint(ret, ==, -EACCES); | ||
332 | |||
333 | c->bs->read_only = false; | ||
87 | -- | 334 | -- |
88 | 2.20.1 | 335 | 2.25.3 |
89 | 336 | ||
90 | 337 | diff view generated by jsdifflib |
1 | This fixes devices like IDE that can still start new requests from I/O | 1 | Now that node level interface bdrv_truncate() supports passing request |
---|---|---|---|
2 | handlers in the CPU thread while the block backend is drained. | 2 | flags to the block driver, expose this on the BlockBackend level, too. |
3 | |||
4 | The basic assumption is that in a drain section, no new requests should | ||
5 | be allowed through a BlockBackend (blk_drained_begin/end don't exist, | ||
6 | we get drain sections only on the node level). However, there are two | ||
7 | special cases where requests should not be queued: | ||
8 | |||
9 | 1. Block jobs: We already make sure that block jobs are paused in a | ||
10 | drain section, so they won't start new requests. However, if the | ||
11 | drain_begin is called on the job's BlockBackend first, it can happen | ||
12 | that we deadlock because the job stays busy until it reaches a pause | ||
13 | point - which it can't if its requests aren't processed any more. | ||
14 | |||
15 | The proper solution here would be to make all requests through the | ||
16 | job's filter node instead of using a BlockBackend. For now, just | ||
17 | disabling request queuing on the job BlockBackend is simpler. | ||
18 | |||
19 | 2. In test cases where making requests through bdrv_* would be | ||
20 | cumbersome because we'd need a BdrvChild. As we already got the | ||
21 | functionality to disable request queuing from 1., use it in tests, | ||
22 | too, for convenience. | ||
23 | 3 | ||
24 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
6 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
25 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 7 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
8 | Message-Id: <20200424125448.63318-4-kwolf@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
26 | --- | 10 | --- |
27 | include/sysemu/block-backend.h | 1 + | 11 | include/sysemu/block-backend.h | 2 +- |
28 | block/backup.c | 1 + | 12 | block.c | 3 ++- |
29 | block/block-backend.c | 53 ++++++++++++++++++++++++++++++++-- | 13 | block/block-backend.c | 4 ++-- |
30 | block/commit.c | 2 ++ | 14 | block/commit.c | 4 ++-- |
31 | block/mirror.c | 1 + | 15 | block/crypto.c | 2 +- |
32 | blockjob.c | 3 ++ | 16 | block/mirror.c | 2 +- |
33 | tests/test-bdrv-drain.c | 1 + | 17 | block/qcow2.c | 4 ++-- |
34 | 7 files changed, 59 insertions(+), 3 deletions(-) | 18 | block/qed.c | 2 +- |
19 | block/vdi.c | 2 +- | ||
20 | block/vhdx.c | 4 ++-- | ||
21 | block/vmdk.c | 6 +++--- | ||
22 | block/vpc.c | 2 +- | ||
23 | blockdev.c | 2 +- | ||
24 | qemu-img.c | 2 +- | ||
25 | qemu-io-cmds.c | 2 +- | ||
26 | 15 files changed, 22 insertions(+), 21 deletions(-) | ||
35 | 27 | ||
36 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | 28 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h |
37 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/include/sysemu/block-backend.h | 30 | --- a/include/sysemu/block-backend.h |
39 | +++ b/include/sysemu/block-backend.h | 31 | +++ b/include/sysemu/block-backend.h |
40 | @@ -XXX,XX +XXX,XX @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); | 32 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, |
41 | 33 | int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, | |
42 | void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); | 34 | int bytes); |
43 | void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); | 35 | int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, |
44 | +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); | 36 | - PreallocMode prealloc, Error **errp); |
45 | void blk_iostatus_enable(BlockBackend *blk); | 37 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); |
46 | bool blk_iostatus_is_enabled(const BlockBackend *blk); | 38 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); |
47 | BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); | 39 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, |
48 | diff --git a/block/backup.c b/block/backup.c | 40 | int64_t pos, int size); |
49 | index XXXXXXX..XXXXXXX 100644 | 41 | diff --git a/block.c b/block.c |
50 | --- a/block/backup.c | 42 | index XXXXXXX..XXXXXXX 100644 |
51 | +++ b/block/backup.c | 43 | --- a/block.c |
52 | @@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, | 44 | +++ b/block.c |
53 | if (ret < 0) { | 45 | @@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, |
54 | goto error; | 46 | int64_t size; |
55 | } | 47 | int ret; |
56 | + blk_set_disable_request_queuing(job->target, true); | 48 | |
57 | 49 | - ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); | |
58 | job->on_source_error = on_source_error; | 50 | + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, |
59 | job->on_target_error = on_target_error; | 51 | + &local_err); |
52 | if (ret < 0 && ret != -ENOTSUP) { | ||
53 | error_propagate(errp, local_err); | ||
54 | return ret; | ||
60 | diff --git a/block/block-backend.c b/block/block-backend.c | 55 | diff --git a/block/block-backend.c b/block/block-backend.c |
61 | index XXXXXXX..XXXXXXX 100644 | 56 | index XXXXXXX..XXXXXXX 100644 |
62 | --- a/block/block-backend.c | 57 | --- a/block/block-backend.c |
63 | +++ b/block/block-backend.c | 58 | +++ b/block/block-backend.c |
64 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { | 59 | @@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, |
65 | QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; | ||
66 | |||
67 | int quiesce_counter; | ||
68 | + CoQueue queued_requests; | ||
69 | + bool disable_request_queuing; | ||
70 | + | ||
71 | VMChangeStateEntry *vmsh; | ||
72 | bool force_allow_inactivate; | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) | ||
75 | |||
76 | block_acct_init(&blk->stats); | ||
77 | |||
78 | + qemu_co_queue_init(&blk->queued_requests); | ||
79 | notifier_list_init(&blk->remove_bs_notifiers); | ||
80 | notifier_list_init(&blk->insert_bs_notifiers); | ||
81 | QLIST_INIT(&blk->aio_notifiers); | ||
82 | @@ -XXX,XX +XXX,XX @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) | ||
83 | blk->allow_aio_context_change = allow; | ||
84 | } | 60 | } |
85 | 61 | ||
86 | +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) | 62 | int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, |
87 | +{ | 63 | - PreallocMode prealloc, Error **errp) |
88 | + blk->disable_request_queuing = disable; | 64 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) |
89 | +} | ||
90 | + | ||
91 | static int blk_check_byte_request(BlockBackend *blk, int64_t offset, | ||
92 | size_t size) | ||
93 | { | 65 | { |
94 | @@ -XXX,XX +XXX,XX @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, | 66 | if (!blk_is_available(blk)) { |
95 | return 0; | 67 | error_setg(errp, "No medium inserted"); |
68 | return -ENOMEDIUM; | ||
69 | } | ||
70 | |||
71 | - return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); | ||
72 | + return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); | ||
96 | } | 73 | } |
97 | 74 | ||
98 | +static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) | 75 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, |
99 | +{ | ||
100 | + if (blk->quiesce_counter && !blk->disable_request_queuing) { | ||
101 | + qemu_co_queue_wait(&blk->queued_requests, NULL); | ||
102 | + } | ||
103 | +} | ||
104 | + | ||
105 | int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, | ||
106 | unsigned int bytes, QEMUIOVector *qiov, | ||
107 | BdrvRequestFlags flags) | ||
108 | { | ||
109 | int ret; | ||
110 | - BlockDriverState *bs = blk_bs(blk); | ||
111 | + BlockDriverState *bs; | ||
112 | |||
113 | + blk_wait_while_drained(blk); | ||
114 | + | ||
115 | + /* Call blk_bs() only after waiting, the graph may have changed */ | ||
116 | + bs = blk_bs(blk); | ||
117 | trace_blk_co_preadv(blk, bs, offset, bytes, flags); | ||
118 | |||
119 | ret = blk_check_byte_request(blk, offset, bytes); | ||
120 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, | ||
121 | BdrvRequestFlags flags) | ||
122 | { | ||
123 | int ret; | ||
124 | - BlockDriverState *bs = blk_bs(blk); | ||
125 | + BlockDriverState *bs; | ||
126 | |||
127 | + blk_wait_while_drained(blk); | ||
128 | + | ||
129 | + /* Call blk_bs() only after waiting, the graph may have changed */ | ||
130 | + bs = blk_bs(blk); | ||
131 | trace_blk_co_pwritev(blk, bs, offset, bytes, flags); | ||
132 | |||
133 | ret = blk_check_byte_request(blk, offset, bytes); | ||
134 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque) | ||
135 | BlkRwCo *rwco = &acb->rwco; | ||
136 | QEMUIOVector *qiov = rwco->iobuf; | ||
137 | |||
138 | + if (rwco->blk->quiesce_counter) { | ||
139 | + blk_dec_in_flight(rwco->blk); | ||
140 | + blk_wait_while_drained(rwco->blk); | ||
141 | + blk_inc_in_flight(rwco->blk); | ||
142 | + } | ||
143 | + | ||
144 | assert(qiov->size == acb->bytes); | ||
145 | rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, | ||
146 | qiov, rwco->flags); | ||
147 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque) | ||
148 | BlkRwCo *rwco = &acb->rwco; | ||
149 | QEMUIOVector *qiov = rwco->iobuf; | ||
150 | |||
151 | + if (rwco->blk->quiesce_counter) { | ||
152 | + blk_dec_in_flight(rwco->blk); | ||
153 | + blk_wait_while_drained(rwco->blk); | ||
154 | + blk_inc_in_flight(rwco->blk); | ||
155 | + } | ||
156 | + | ||
157 | assert(!qiov || qiov->size == acb->bytes); | ||
158 | rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, | ||
159 | qiov, rwco->flags); | ||
160 | @@ -XXX,XX +XXX,XX @@ void blk_aio_cancel_async(BlockAIOCB *acb) | ||
161 | |||
162 | int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) | ||
163 | { | ||
164 | + blk_wait_while_drained(blk); | ||
165 | + | ||
166 | if (!blk_is_available(blk)) { | ||
167 | return -ENOMEDIUM; | ||
168 | } | ||
169 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, | ||
170 | |||
171 | int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
172 | { | ||
173 | - int ret = blk_check_byte_request(blk, offset, bytes); | ||
174 | + int ret; | ||
175 | + | ||
176 | + blk_wait_while_drained(blk); | ||
177 | + | ||
178 | + ret = blk_check_byte_request(blk, offset, bytes); | ||
179 | if (ret < 0) { | ||
180 | return ret; | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) | ||
183 | |||
184 | int blk_co_flush(BlockBackend *blk) | ||
185 | { | ||
186 | + blk_wait_while_drained(blk); | ||
187 | + | ||
188 | if (!blk_is_available(blk)) { | ||
189 | return -ENOMEDIUM; | ||
190 | } | ||
191 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) | ||
192 | if (blk->dev_ops && blk->dev_ops->drained_end) { | ||
193 | blk->dev_ops->drained_end(blk->dev_opaque); | ||
194 | } | ||
195 | + while (qemu_co_enter_next(&blk->queued_requests, NULL)) { | ||
196 | + /* Resume all queued requests */ | ||
197 | + } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | diff --git a/block/commit.c b/block/commit.c | 76 | diff --git a/block/commit.c b/block/commit.c |
202 | index XXXXXXX..XXXXXXX 100644 | 77 | index XXXXXXX..XXXXXXX 100644 |
203 | --- a/block/commit.c | 78 | --- a/block/commit.c |
204 | +++ b/block/commit.c | 79 | +++ b/block/commit.c |
205 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, | 80 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp) |
206 | if (ret < 0) { | 81 | } |
207 | goto fail; | 82 | |
208 | } | 83 | if (base_len < len) { |
209 | + blk_set_disable_request_queuing(s->base, true); | 84 | - ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); |
210 | s->base_bs = base; | 85 | + ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); |
211 | 86 | if (ret) { | |
212 | /* Required permissions are already taken with block_job_add_bdrv() */ | 87 | goto out; |
213 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, | 88 | } |
214 | if (ret < 0) { | 89 | @@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs) |
215 | goto fail; | 90 | * grow the backing file image if possible. If not possible, |
216 | } | 91 | * we must return an error */ |
217 | + blk_set_disable_request_queuing(s->top, true); | 92 | if (length > backing_length) { |
218 | 93 | - ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, | |
219 | s->backing_file_str = g_strdup(backing_file_str); | 94 | + ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, |
220 | s->on_error = on_error; | 95 | &local_err); |
96 | if (ret < 0) { | ||
97 | error_report_err(local_err); | ||
98 | diff --git a/block/crypto.c b/block/crypto.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/block/crypto.c | ||
101 | +++ b/block/crypto.c | ||
102 | @@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, | ||
103 | * which will be used by the crypto header | ||
104 | */ | ||
105 | return blk_truncate(data->blk, data->size + headerlen, false, | ||
106 | - data->prealloc, errp); | ||
107 | + data->prealloc, 0, errp); | ||
108 | } | ||
109 | |||
110 | |||
221 | diff --git a/block/mirror.c b/block/mirror.c | 111 | diff --git a/block/mirror.c b/block/mirror.c |
222 | index XXXXXXX..XXXXXXX 100644 | 112 | index XXXXXXX..XXXXXXX 100644 |
223 | --- a/block/mirror.c | 113 | --- a/block/mirror.c |
224 | +++ b/block/mirror.c | 114 | +++ b/block/mirror.c |
225 | @@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job( | 115 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp) |
226 | blk_set_force_allow_inactivate(s->target); | 116 | |
227 | } | 117 | if (s->bdev_length > base_length) { |
228 | blk_set_allow_aio_context_change(s->target, true); | 118 | ret = blk_truncate(s->target, s->bdev_length, false, |
229 | + blk_set_disable_request_queuing(s->target, true); | 119 | - PREALLOC_MODE_OFF, NULL); |
230 | 120 | + PREALLOC_MODE_OFF, 0, NULL); | |
231 | s->replaces = g_strdup(replaces); | 121 | if (ret < 0) { |
232 | s->on_source_error = on_source_error; | 122 | goto immediate_exit; |
233 | diff --git a/blockjob.c b/blockjob.c | 123 | } |
234 | index XXXXXXX..XXXXXXX 100644 | 124 | diff --git a/block/qcow2.c b/block/qcow2.c |
235 | --- a/blockjob.c | 125 | index XXXXXXX..XXXXXXX 100644 |
236 | +++ b/blockjob.c | 126 | --- a/block/qcow2.c |
237 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, | 127 | +++ b/block/qcow2.c |
238 | 128 | @@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) | |
239 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | 129 | |
240 | 130 | /* Okay, now that we have a valid image, let's give it the right size */ | |
241 | + /* Disable request queuing in the BlockBackend to avoid deadlocks on drain: | 131 | ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, |
242 | + * The job reports that it's busy until it reaches a pause point. */ | 132 | - errp); |
243 | + blk_set_disable_request_queuing(blk, true); | 133 | + 0, errp); |
244 | blk_set_allow_aio_context_change(blk, true); | 134 | if (ret < 0) { |
245 | 135 | error_prepend(errp, "Could not resize image: "); | |
246 | /* Only set speed when necessary to avoid NotSupported error */ | 136 | goto out; |
247 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 137 | @@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, |
248 | index XXXXXXX..XXXXXXX 100644 | 138 | * Amending image options should ensure that the image has |
249 | --- a/tests/test-bdrv-drain.c | 139 | * exactly the given new values, so pass exact=true here. |
250 | +++ b/tests/test-bdrv-drain.c | 140 | */ |
251 | @@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) | 141 | - ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); |
252 | &error_abort); | 142 | + ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); |
253 | s = bs->opaque; | 143 | blk_unref(blk); |
254 | blk_insert_bs(blk, bs, &error_abort); | 144 | if (ret < 0) { |
255 | + blk_set_disable_request_queuing(blk, true); | 145 | return ret; |
256 | 146 | diff --git a/block/qed.c b/block/qed.c | |
257 | blk_set_aio_context(blk, ctx_a, &error_abort); | 147 | index XXXXXXX..XXXXXXX 100644 |
258 | aio_context_acquire(ctx_a); | 148 | --- a/block/qed.c |
149 | +++ b/block/qed.c | ||
150 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, | ||
151 | * The QED format associates file length with allocation status, | ||
152 | * so a new file (which is empty) must have a length of 0. | ||
153 | */ | ||
154 | - ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); | ||
155 | + ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); | ||
156 | if (ret < 0) { | ||
157 | goto out; | ||
158 | } | ||
159 | diff --git a/block/vdi.c b/block/vdi.c | ||
160 | index XXXXXXX..XXXXXXX 100644 | ||
161 | --- a/block/vdi.c | ||
162 | +++ b/block/vdi.c | ||
163 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, | ||
164 | |||
165 | if (image_type == VDI_TYPE_STATIC) { | ||
166 | ret = blk_truncate(blk, offset + blocks * block_size, false, | ||
167 | - PREALLOC_MODE_OFF, errp); | ||
168 | + PREALLOC_MODE_OFF, 0, errp); | ||
169 | if (ret < 0) { | ||
170 | error_prepend(errp, "Failed to statically allocate file"); | ||
171 | goto exit; | ||
172 | diff --git a/block/vhdx.c b/block/vhdx.c | ||
173 | index XXXXXXX..XXXXXXX 100644 | ||
174 | --- a/block/vhdx.c | ||
175 | +++ b/block/vhdx.c | ||
176 | @@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, | ||
177 | /* All zeroes, so we can just extend the file - the end of the BAT | ||
178 | * is the furthest thing we have written yet */ | ||
179 | ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, | ||
180 | - errp); | ||
181 | + 0, errp); | ||
182 | if (ret < 0) { | ||
183 | goto exit; | ||
184 | } | ||
185 | } else if (type == VHDX_TYPE_FIXED) { | ||
186 | ret = blk_truncate(blk, data_file_offset + image_size, false, | ||
187 | - PREALLOC_MODE_OFF, errp); | ||
188 | + PREALLOC_MODE_OFF, 0, errp); | ||
189 | if (ret < 0) { | ||
190 | goto exit; | ||
191 | } | ||
192 | diff --git a/block/vmdk.c b/block/vmdk.c | ||
193 | index XXXXXXX..XXXXXXX 100644 | ||
194 | --- a/block/vmdk.c | ||
195 | +++ b/block/vmdk.c | ||
196 | @@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk, | ||
197 | int gd_buf_size; | ||
198 | |||
199 | if (flat) { | ||
200 | - ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); | ||
201 | + ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); | ||
202 | goto exit; | ||
203 | } | ||
204 | magic = cpu_to_be32(VMDK4_MAGIC); | ||
205 | @@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk, | ||
206 | } | ||
207 | |||
208 | ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, | ||
209 | - PREALLOC_MODE_OFF, errp); | ||
210 | + PREALLOC_MODE_OFF, 0, errp); | ||
211 | if (ret < 0) { | ||
212 | goto exit; | ||
213 | } | ||
214 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size, | ||
215 | /* bdrv_pwrite write padding zeros to align to sector, we don't need that | ||
216 | * for description file */ | ||
217 | if (desc_offset == 0) { | ||
218 | - ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); | ||
219 | + ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); | ||
220 | if (ret < 0) { | ||
221 | goto exit; | ||
222 | } | ||
223 | diff --git a/block/vpc.c b/block/vpc.c | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/block/vpc.c | ||
226 | +++ b/block/vpc.c | ||
227 | @@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, | ||
228 | /* Add footer to total size */ | ||
229 | total_size += HEADER_SIZE; | ||
230 | |||
231 | - ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); | ||
232 | + ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); | ||
233 | if (ret < 0) { | ||
234 | return ret; | ||
235 | } | ||
236 | diff --git a/blockdev.c b/blockdev.c | ||
237 | index XXXXXXX..XXXXXXX 100644 | ||
238 | --- a/blockdev.c | ||
239 | +++ b/blockdev.c | ||
240 | @@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device, | ||
241 | } | ||
242 | |||
243 | bdrv_drained_begin(bs); | ||
244 | - ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); | ||
245 | + ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); | ||
246 | bdrv_drained_end(bs); | ||
247 | |||
248 | out: | ||
249 | diff --git a/qemu-img.c b/qemu-img.c | ||
250 | index XXXXXXX..XXXXXXX 100644 | ||
251 | --- a/qemu-img.c | ||
252 | +++ b/qemu-img.c | ||
253 | @@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv) | ||
254 | * resizing, so pass @exact=true. It is of no use to report | ||
255 | * success when the image has not actually been resized. | ||
256 | */ | ||
257 | - ret = blk_truncate(blk, total_size, true, prealloc, &err); | ||
258 | + ret = blk_truncate(blk, total_size, true, prealloc, 0, &err); | ||
259 | if (!ret) { | ||
260 | qprintf(quiet, "Image resized.\n"); | ||
261 | } else { | ||
262 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | ||
263 | index XXXXXXX..XXXXXXX 100644 | ||
264 | --- a/qemu-io-cmds.c | ||
265 | +++ b/qemu-io-cmds.c | ||
266 | @@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) | ||
267 | * exact=true. It is better to err on the "emit more errors" side | ||
268 | * than to be overly permissive. | ||
269 | */ | ||
270 | - ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); | ||
271 | + ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err); | ||
272 | if (ret < 0) { | ||
273 | error_report_err(local_err); | ||
274 | return ret; | ||
259 | -- | 275 | -- |
260 | 2.20.1 | 276 | 2.25.3 |
261 | 277 | ||
262 | 278 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling |
---|---|---|---|
2 | qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't | ||
3 | undo any previous preallocation, but just adds the zero flag to all | ||
4 | relevant L2 entries. If an external data file is in use, a write_zeroes | ||
5 | request to the data file is made instead. | ||
2 | 6 | ||
3 | We already have 030 for that in general, but this tests very specific | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | cases of both jobs finishing concurrently. | 8 | Message-Id: <20200424125448.63318-5-kwolf@redhat.com> |
5 | 9 | Reviewed-by: Eric Blake <eblake@redhat.com> | |
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 10 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 12 | --- |
9 | tests/qemu-iotests/258 | 163 +++++++++++++++++++++++++++++++++++++ | 13 | block/qcow2-cluster.c | 2 +- |
10 | tests/qemu-iotests/258.out | 33 ++++++++ | 14 | block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++ |
11 | tests/qemu-iotests/group | 1 + | 15 | 2 files changed, 35 insertions(+), 1 deletion(-) |
12 | 3 files changed, 197 insertions(+) | ||
13 | create mode 100755 tests/qemu-iotests/258 | ||
14 | create mode 100644 tests/qemu-iotests/258.out | ||
15 | 16 | ||
16 | diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258 | 17 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c |
17 | new file mode 100755 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | index XXXXXXX..XXXXXXX | 19 | --- a/block/qcow2-cluster.c |
19 | --- /dev/null | 20 | +++ b/block/qcow2-cluster.c |
20 | +++ b/tests/qemu-iotests/258 | 21 | @@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, |
21 | @@ -XXX,XX +XXX,XX @@ | 22 | /* Caller must pass aligned values, except at image end */ |
22 | +#!/usr/bin/env python | 23 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); |
23 | +# | 24 | assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || |
24 | +# Very specific tests for adjacent commit/stream block jobs | 25 | - end_offset == bs->total_sectors << BDRV_SECTOR_BITS); |
25 | +# | 26 | + end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); |
26 | +# Copyright (C) 2019 Red Hat, Inc. | 27 | |
27 | +# | 28 | /* The zero flag is only supported by version 3 and newer */ |
28 | +# This program is free software; you can redistribute it and/or modify | 29 | if (s->qcow_version < 3) { |
29 | +# it under the terms of the GNU General Public License as published by | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
30 | +# the Free Software Foundation; either version 2 of the License, or | 31 | index XXXXXXX..XXXXXXX 100644 |
31 | +# (at your option) any later version. | 32 | --- a/block/qcow2.c |
32 | +# | 33 | +++ b/block/qcow2.c |
33 | +# This program is distributed in the hope that it will be useful, | 34 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, |
34 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | 35 | |
35 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 36 | bs->supported_zero_flags = header.version >= 3 ? |
36 | +# GNU General Public License for more details. | 37 | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0; |
37 | +# | 38 | + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; |
38 | +# You should have received a copy of the GNU General Public License | 39 | |
39 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | 40 | /* Repair image if dirty */ |
40 | +# | 41 | if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && |
41 | +# Creator/Owner: Max Reitz <mreitz@redhat.com> | 42 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, |
43 | g_assert_not_reached(); | ||
44 | } | ||
45 | |||
46 | + if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { | ||
47 | + uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); | ||
42 | + | 48 | + |
43 | +import iotests | 49 | + /* |
44 | +from iotests import log, qemu_img, qemu_io_silent, \ | 50 | + * Use zero clusters as much as we can. qcow2_cluster_zeroize() |
45 | + filter_qmp_testfiles, filter_qmp_imgfmt | 51 | + * requires a cluster-aligned start. The end may be unaligned if it is |
52 | + * at the end of the image (which it is here). | ||
53 | + */ | ||
54 | + ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); | ||
55 | + if (ret < 0) { | ||
56 | + error_setg_errno(errp, -ret, "Failed to zero out new clusters"); | ||
57 | + goto fail; | ||
58 | + } | ||
46 | + | 59 | + |
47 | +# Need backing file and change-backing-file support | 60 | + /* Write explicit zeros for the unaligned head */ |
48 | +iotests.verify_image_format(supported_fmts=['qcow2', 'qed']) | 61 | + if (zero_start > old_length) { |
49 | +iotests.verify_platform(['linux']) | 62 | + uint64_t len = zero_start - old_length; |
63 | + uint8_t *buf = qemu_blockalign0(bs, len); | ||
64 | + QEMUIOVector qiov; | ||
65 | + qemu_iovec_init_buf(&qiov, buf, len); | ||
50 | + | 66 | + |
67 | + qemu_co_mutex_unlock(&s->lock); | ||
68 | + ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); | ||
69 | + qemu_co_mutex_lock(&s->lock); | ||
51 | + | 70 | + |
52 | +# Returns a node for blockdev-add | 71 | + qemu_vfree(buf); |
53 | +def node(node_name, path, backing=None, fmt=None, throttle=None): | 72 | + if (ret < 0) { |
54 | + if fmt is None: | 73 | + error_setg_errno(errp, -ret, "Failed to zero out the new area"); |
55 | + fmt = iotests.imgfmt | 74 | + goto fail; |
56 | + | 75 | + } |
57 | + res = { | ||
58 | + 'node-name': node_name, | ||
59 | + 'driver': fmt, | ||
60 | + 'file': { | ||
61 | + 'driver': 'file', | ||
62 | + 'filename': path | ||
63 | + } | 76 | + } |
64 | + } | 77 | + } |
65 | + | 78 | + |
66 | + if backing is not None: | 79 | if (prealloc != PREALLOC_MODE_OFF) { |
67 | + res['backing'] = backing | 80 | /* Flush metadata before actually changing the image size */ |
68 | + | 81 | ret = qcow2_write_caches(bs); |
69 | + if throttle: | ||
70 | + res['file'] = { | ||
71 | + 'driver': 'throttle', | ||
72 | + 'throttle-group': throttle, | ||
73 | + 'file': res['file'] | ||
74 | + } | ||
75 | + | ||
76 | + return res | ||
77 | + | ||
78 | +# Finds a node in the debug block graph | ||
79 | +def find_graph_node(graph, node_id): | ||
80 | + return next(node for node in graph['nodes'] if node['id'] == node_id) | ||
81 | + | ||
82 | + | ||
83 | +def test_concurrent_finish(write_to_stream_node): | ||
84 | + log('') | ||
85 | + log('=== Commit and stream finish concurrently (letting %s write) ===' % \ | ||
86 | + ('stream' if write_to_stream_node else 'commit')) | ||
87 | + log('') | ||
88 | + | ||
89 | + # All chosen in such a way that when the commit job wants to | ||
90 | + # finish, it polls and thus makes stream finish concurrently -- | ||
91 | + # and the other way around, depending on whether the commit job | ||
92 | + # is finalized before stream completes or not. | ||
93 | + | ||
94 | + with iotests.FilePath('node4.img') as node4_path, \ | ||
95 | + iotests.FilePath('node3.img') as node3_path, \ | ||
96 | + iotests.FilePath('node2.img') as node2_path, \ | ||
97 | + iotests.FilePath('node1.img') as node1_path, \ | ||
98 | + iotests.FilePath('node0.img') as node0_path, \ | ||
99 | + iotests.VM() as vm: | ||
100 | + | ||
101 | + # It is important to use raw for the base layer (so that | ||
102 | + # permissions are just handed through to the protocol layer) | ||
103 | + assert qemu_img('create', '-f', 'raw', node0_path, '64M') == 0 | ||
104 | + | ||
105 | + stream_throttle=None | ||
106 | + commit_throttle=None | ||
107 | + | ||
108 | + for path in [node1_path, node2_path, node3_path, node4_path]: | ||
109 | + assert qemu_img('create', '-f', iotests.imgfmt, path, '64M') == 0 | ||
110 | + | ||
111 | + if write_to_stream_node: | ||
112 | + # This is what (most of the time) makes commit finish | ||
113 | + # earlier and then pull in stream | ||
114 | + assert qemu_io_silent(node2_path, | ||
115 | + '-c', 'write %iK 64K' % (65536 - 192), | ||
116 | + '-c', 'write %iK 64K' % (65536 - 64)) == 0 | ||
117 | + | ||
118 | + stream_throttle='tg' | ||
119 | + else: | ||
120 | + # And this makes stream finish earlier | ||
121 | + assert qemu_io_silent(node1_path, | ||
122 | + '-c', 'write %iK 64K' % (65536 - 64)) == 0 | ||
123 | + | ||
124 | + commit_throttle='tg' | ||
125 | + | ||
126 | + vm.launch() | ||
127 | + | ||
128 | + vm.qmp_log('object-add', | ||
129 | + qom_type='throttle-group', | ||
130 | + id='tg', | ||
131 | + props={ | ||
132 | + 'x-iops-write': 1, | ||
133 | + 'x-iops-write-max': 1 | ||
134 | + }) | ||
135 | + | ||
136 | + vm.qmp_log('blockdev-add', | ||
137 | + filters=[filter_qmp_testfiles, filter_qmp_imgfmt], | ||
138 | + **node('node4', node4_path, throttle=stream_throttle, | ||
139 | + backing=node('node3', node3_path, | ||
140 | + backing=node('node2', node2_path, | ||
141 | + backing=node('node1', node1_path, | ||
142 | + backing=node('node0', node0_path, throttle=commit_throttle, | ||
143 | + fmt='raw')))))) | ||
144 | + | ||
145 | + vm.qmp_log('block-commit', | ||
146 | + job_id='commit', | ||
147 | + device='node4', | ||
148 | + filter_node_name='commit-filter', | ||
149 | + top_node='node1', | ||
150 | + base_node='node0', | ||
151 | + auto_finalize=False) | ||
152 | + | ||
153 | + vm.qmp_log('block-stream', | ||
154 | + job_id='stream', | ||
155 | + device='node3', | ||
156 | + base_node='commit-filter') | ||
157 | + | ||
158 | + if write_to_stream_node: | ||
159 | + vm.run_job('commit', auto_finalize=False, auto_dismiss=True) | ||
160 | + vm.run_job('stream', auto_finalize=True, auto_dismiss=True) | ||
161 | + else: | ||
162 | + # No, the jobs do not really finish concurrently here, | ||
163 | + # the stream job does complete strictly before commit. | ||
164 | + # But still, this is close enough for what we want to | ||
165 | + # test. | ||
166 | + vm.run_job('stream', auto_finalize=True, auto_dismiss=True) | ||
167 | + vm.run_job('commit', auto_finalize=False, auto_dismiss=True) | ||
168 | + | ||
169 | + # Assert that the backing node of node3 is node 0 now | ||
170 | + graph = vm.qmp('x-debug-query-block-graph')['return'] | ||
171 | + for edge in graph['edges']: | ||
172 | + if edge['name'] == 'backing' and \ | ||
173 | + find_graph_node(graph, edge['parent'])['name'] == 'node3': | ||
174 | + assert find_graph_node(graph, edge['child'])['name'] == 'node0' | ||
175 | + break | ||
176 | + | ||
177 | + | ||
178 | +def main(): | ||
179 | + log('Running tests:') | ||
180 | + test_concurrent_finish(True) | ||
181 | + test_concurrent_finish(False) | ||
182 | + | ||
183 | +if __name__ == '__main__': | ||
184 | + main() | ||
185 | diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out | ||
186 | new file mode 100644 | ||
187 | index XXXXXXX..XXXXXXX | ||
188 | --- /dev/null | ||
189 | +++ b/tests/qemu-iotests/258.out | ||
190 | @@ -XXX,XX +XXX,XX @@ | ||
191 | +Running tests: | ||
192 | + | ||
193 | +=== Commit and stream finish concurrently (letting stream write) === | ||
194 | + | ||
195 | +{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}} | ||
196 | +{"return": {}} | ||
197 | +{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}} | ||
198 | +{"return": {}} | ||
199 | +{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}} | ||
200 | +{"return": {}} | ||
201 | +{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}} | ||
202 | +{"return": {}} | ||
203 | +{"execute": "job-finalize", "arguments": {"id": "commit"}} | ||
204 | +{"return": {}} | ||
205 | +{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
206 | +{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
207 | +{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
208 | + | ||
209 | +=== Commit and stream finish concurrently (letting commit write) === | ||
210 | + | ||
211 | +{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}} | ||
212 | +{"return": {}} | ||
213 | +{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}} | ||
214 | +{"return": {}} | ||
215 | +{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}} | ||
216 | +{"return": {}} | ||
217 | +{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}} | ||
218 | +{"return": {}} | ||
219 | +{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
220 | +{"execute": "job-finalize", "arguments": {"id": "commit"}} | ||
221 | +{"return": {}} | ||
222 | +{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
223 | +{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
224 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | ||
225 | index XXXXXXX..XXXXXXX 100644 | ||
226 | --- a/tests/qemu-iotests/group | ||
227 | +++ b/tests/qemu-iotests/group | ||
228 | @@ -XXX,XX +XXX,XX @@ | ||
229 | 254 rw backing quick | ||
230 | 255 rw quick | ||
231 | 256 rw quick | ||
232 | +258 rw quick | ||
233 | 262 rw quick migration | ||
234 | -- | 82 | -- |
235 | 2.20.1 | 83 | 2.25.3 |
236 | 84 | ||
237 | 85 | diff view generated by jsdifflib |
1 | The code path for -device drive=<node-name> or without a drive=... | 1 | The raw format driver can simply forward the flag and let its bs->file |
---|---|---|---|
2 | option for empty drives, which is supposed to be used with -blockdev | 2 | child take care of actually providing the zeros. |
3 | differs enough from the -drive based path with a user-owned | ||
4 | BlockBackend, so we want to test both paths at least for the basic tests | ||
5 | implemented by TestInitiallyFilled and TestInitiallyEmpty. | ||
6 | |||
7 | This would have caught the bug recently fixed for inserting read-only | ||
8 | nodes into a scsi-cd created without a drive=... option. | ||
9 | 3 | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 5 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20200424125448.63318-6-kwolf@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | --- | 10 | --- |
13 | tests/qemu-iotests/118 | 43 ++++++++++++++++++++++++++------------ | 11 | block/raw-format.c | 4 +++- |
14 | tests/qemu-iotests/118.out | 4 ++-- | 12 | 1 file changed, 3 insertions(+), 1 deletion(-) |
15 | 2 files changed, 32 insertions(+), 15 deletions(-) | ||
16 | 13 | ||
17 | diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 | 14 | diff --git a/block/raw-format.c b/block/raw-format.c |
18 | index XXXXXXX..XXXXXXX 100755 | ||
19 | --- a/tests/qemu-iotests/118 | ||
20 | +++ b/tests/qemu-iotests/118 | ||
21 | @@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase): | ||
22 | has_opened = False | ||
23 | has_closed = False | ||
24 | |||
25 | + device_name = 'qdev0' | ||
26 | + use_drive = False | ||
27 | + | ||
28 | def process_events(self): | ||
29 | for event in self.vm.get_qmp_events(wait=False): | ||
30 | if (event['event'] == 'DEVICE_TRAY_MOVED' and | ||
31 | - event['data']['device'] == 'drive0'): | ||
32 | + (event['data']['device'] == 'drive0' or | ||
33 | + event['data']['id'] == self.device_name)): | ||
34 | if event['data']['tray-open'] == False: | ||
35 | self.has_closed = True | ||
36 | else: | ||
37 | @@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase): | ||
38 | |||
39 | class GeneralChangeTestsBaseClass(ChangeBaseClass): | ||
40 | |||
41 | - device_name = 'qdev0' | ||
42 | - | ||
43 | def test_change(self): | ||
44 | + # 'change' requires a drive name, so skip the test for blockdev | ||
45 | + if not self.use_drive: | ||
46 | + return | ||
47 | + | ||
48 | result = self.vm.qmp('change', device='drive0', target=new_img, | ||
49 | arg=iotests.imgfmt) | ||
50 | self.assert_qmp(result, 'return', {}) | ||
51 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass): | ||
52 | qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k') | ||
53 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | ||
54 | self.vm = iotests.VM() | ||
55 | - self.vm.add_drive(old_img, 'media=%s' % self.media, 'none') | ||
56 | + if self.use_drive: | ||
57 | + self.vm.add_drive(old_img, 'media=%s' % self.media, 'none') | ||
58 | + else: | ||
59 | + self.vm.add_blockdev([ 'node-name=drive0', | ||
60 | + 'driver=%s' % iotests.imgfmt, | ||
61 | + 'file.driver=file', | ||
62 | + 'file.filename=%s' % old_img ]) | ||
63 | if self.interface == 'scsi': | ||
64 | self.vm.add_device('virtio-scsi-pci') | ||
65 | self.vm.add_device('%s,drive=drive0,id=%s' % | ||
66 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass): | ||
67 | |||
68 | def setUp(self): | ||
69 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | ||
70 | - self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none') | ||
71 | + self.vm = iotests.VM() | ||
72 | + if self.use_drive: | ||
73 | + self.vm.add_drive(None, 'media=%s' % self.media, 'none') | ||
74 | if self.interface == 'scsi': | ||
75 | self.vm.add_device('virtio-scsi-pci') | ||
76 | - self.vm.add_device('%s,drive=drive0,id=%s' % | ||
77 | + self.vm.add_device('%s,%sid=%s' % | ||
78 | (interface_to_device_name(self.interface), | ||
79 | + 'drive=drive0,' if self.use_drive else '', | ||
80 | self.device_name)) | ||
81 | self.vm.launch() | ||
82 | |||
83 | @@ -XXX,XX +XXX,XX @@ def create_basic_test_classes(): | ||
84 | ('disk', 'floppy', False) ]: | ||
85 | |||
86 | for case in [ TestInitiallyFilled, TestInitiallyEmpty ]: | ||
87 | - | ||
88 | - attr = { 'media': media, | ||
89 | - 'interface': interface, | ||
90 | - 'has_real_tray': has_real_tray } | ||
91 | - | ||
92 | - name = '%s_%s_%s' % (case.__name__, media, interface) | ||
93 | - globals()[name] = type(name, (case, ), attr) | ||
94 | + for use_drive in [ True, False ]: | ||
95 | + attr = { 'media': media, | ||
96 | + 'interface': interface, | ||
97 | + 'has_real_tray': has_real_tray, | ||
98 | + 'use_drive': use_drive } | ||
99 | + | ||
100 | + name = '%s_%s_%s_%s' % (case.__name__, media, interface, | ||
101 | + 'drive' if use_drive else 'blockdev') | ||
102 | + globals()[name] = type(name, (case, ), attr) | ||
103 | |||
104 | create_basic_test_classes() | ||
105 | |||
106 | diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out | ||
107 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
108 | --- a/tests/qemu-iotests/118.out | 16 | --- a/block/raw-format.c |
109 | +++ b/tests/qemu-iotests/118.out | 17 | +++ b/block/raw-format.c |
110 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, |
111 | -......................................................................................... | 19 | |
112 | +....................................................................................................................................................................... | 20 | s->size = offset; |
113 | ---------------------------------------------------------------------- | 21 | offset += s->offset; |
114 | -Ran 89 tests | 22 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); |
115 | +Ran 167 tests | 23 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); |
116 | 24 | } | |
117 | OK | 25 | |
26 | static void raw_eject(BlockDriverState *bs, bool eject_flag) | ||
27 | @@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, | ||
28 | bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | | ||
29 | ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & | ||
30 | bs->file->bs->supported_zero_flags); | ||
31 | + bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & | ||
32 | + BDRV_REQ_ZERO_WRITE; | ||
33 | |||
34 | if (bs->probed && !bdrv_is_read_only(bs)) { | ||
35 | bdrv_refresh_filename(bs->file->bs); | ||
118 | -- | 36 | -- |
119 | 2.20.1 | 37 | 2.25.3 |
120 | 38 | ||
121 | 39 | diff view generated by jsdifflib |
1 | From: Nir Soffer <nirsof@gmail.com> | 1 | For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the |
---|---|---|---|
2 | OS, so we can advertise the flag and just ignore it. | ||
2 | 3 | ||
3 | In some cases buf_align or request_alignment cannot be detected: | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | 5 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | |
5 | 1. With Gluster, buf_align cannot be detected since the actual I/O is | 6 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
6 | done on Gluster server, and qemu buffer alignment does not matter. | 7 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
7 | Since we don't have alignment requirement, buf_align=1 is the best | 8 | Message-Id: <20200424125448.63318-7-kwolf@redhat.com> |
8 | value. | ||
9 | |||
10 | 2. With local XFS filesystem, buf_align cannot be detected if reading | ||
11 | from unallocated area. In this we must align the buffer, but we don't | ||
12 | know what is the correct size. Using the wrong alignment results in | ||
13 | I/O error. | ||
14 | |||
15 | 3. With Gluster backed by XFS, request_alignment cannot be detected if | ||
16 | reading from unallocated area. In this case we need to use the | ||
17 | correct alignment, and failing to do so results in I/O errors. | ||
18 | |||
19 | 4. With NFS, the server does not use direct I/O, so both buf_align cannot | ||
20 | be detected. In this case we don't need any alignment so we can use | ||
21 | buf_align=1 and request_alignment=1. | ||
22 | |||
23 | These cases seems to work when storage sector size is 512 bytes, because | ||
24 | the current code starts checking align=512. If the check succeeds | ||
25 | because alignment cannot be detected we use 512. But this does not work | ||
26 | for storage with 4k sector size. | ||
27 | |||
28 | To determine if we can detect the alignment, we probe first with | ||
29 | align=1. If probing succeeds, maybe there are no alignment requirement | ||
30 | (cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we | ||
31 | don't have any way to tell, we treat this as undetectable alignment. If | ||
32 | probing with align=1 fails with EINVAL, but probing with one of the | ||
33 | expected alignments succeeds, we know that we found a working alignment. | ||
34 | |||
35 | Practically the alignment requirements are the same for buffer | ||
36 | alignment, buffer length, and offset in file. So in case we cannot | ||
37 | detect buf_align, we can use request alignment. If we cannot detect | ||
38 | request alignment, we can fallback to a safe value. To use this logic, | ||
39 | we probe first request alignment instead of buf_align. | ||
40 | |||
41 | Here is a table showing the behaviour with current code (the value in | ||
42 | parenthesis is the optimal value). | ||
43 | |||
44 | Case Sector buf_align (opt) request_alignment (opt) result | ||
45 | ====================================================================== | ||
46 | 1 512 512 (1) 512 (512) OK | ||
47 | 1 4096 512 (1) 4096 (4096) FAIL | ||
48 | ---------------------------------------------------------------------- | ||
49 | 2 512 512 (512) 512 (512) OK | ||
50 | 2 4096 512 (4096) 4096 (4096) FAIL | ||
51 | ---------------------------------------------------------------------- | ||
52 | 3 512 512 (1) 512 (512) OK | ||
53 | 3 4096 512 (1) 512 (4096) FAIL | ||
54 | ---------------------------------------------------------------------- | ||
55 | 4 512 512 (1) 512 (1) OK | ||
56 | 4 4096 512 (1) 512 (1) OK | ||
57 | |||
58 | Same cases with this change: | ||
59 | |||
60 | Case Sector buf_align (opt) request_alignment (opt) result | ||
61 | ====================================================================== | ||
62 | 1 512 512 (1) 512 (512) OK | ||
63 | 1 4096 4096 (1) 4096 (4096) OK | ||
64 | ---------------------------------------------------------------------- | ||
65 | 2 512 512 (512) 512 (512) OK | ||
66 | 2 4096 4096 (4096) 4096 (4096) OK | ||
67 | ---------------------------------------------------------------------- | ||
68 | 3 512 4096 (1) 4096 (512) OK | ||
69 | 3 4096 4096 (1) 4096 (4096) OK | ||
70 | ---------------------------------------------------------------------- | ||
71 | 4 512 4096 (1) 4096 (1) OK | ||
72 | 4 4096 4096 (1) 4096 (1) OK | ||
73 | |||
74 | I tested that provisioning VMs and copying disks on local XFS and | ||
75 | Gluster with 4k bytes sector size work now, resolving bugs [1],[2]. | ||
76 | I tested also on XFS, NFS, Gluster with 512 bytes sector size. | ||
77 | |||
78 | [1] https://bugzilla.redhat.com/1737256 | ||
79 | [2] https://bugzilla.redhat.com/1738657 | ||
80 | |||
81 | Signed-off-by: Nir Soffer <nsoffer@redhat.com> | ||
82 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
83 | --- | 10 | --- |
84 | block/file-posix.c | 36 +++++++++++++++++++++++++----------- | 11 | block/file-posix.c | 4 ++++ |
85 | 1 file changed, 25 insertions(+), 11 deletions(-) | 12 | 1 file changed, 4 insertions(+) |
86 | 13 | ||
87 | diff --git a/block/file-posix.c b/block/file-posix.c | 14 | diff --git a/block/file-posix.c b/block/file-posix.c |
88 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
89 | --- a/block/file-posix.c | 16 | --- a/block/file-posix.c |
90 | +++ b/block/file-posix.c | 17 | +++ b/block/file-posix.c |
91 | @@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) | 18 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, |
92 | BDRVRawState *s = bs->opaque; | ||
93 | char *buf; | ||
94 | size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); | ||
95 | + size_t alignments[] = {1, 512, 1024, 2048, 4096}; | ||
96 | |||
97 | /* For SCSI generic devices the alignment is not really used. | ||
98 | With buffered I/O, we don't have any restrictions. */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) | ||
100 | } | ||
101 | #endif | 19 | #endif |
102 | 20 | ||
103 | - /* If we could not get the sizes so far, we can only guess them */ | 21 | bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; |
104 | - if (!s->buf_align) { | 22 | + if (S_ISREG(st.st_mode)) { |
105 | + /* | 23 | + /* When extending regular files, we get zeros from the OS */ |
106 | + * If we could not get the sizes so far, we can only guess them. First try | 24 | + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; |
107 | + * to detect request alignment, since it is more likely to succeed. Then | 25 | + } |
108 | + * try to detect buf_align, which cannot be detected in some cases (e.g. | 26 | ret = 0; |
109 | + * Gluster). If buf_align cannot be detected, we fallback to the value of | 27 | fail: |
110 | + * request_alignment. | 28 | if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { |
111 | + */ | ||
112 | + | ||
113 | + if (!bs->bl.request_alignment) { | ||
114 | + int i; | ||
115 | size_t align; | ||
116 | - buf = qemu_memalign(max_align, 2 * max_align); | ||
117 | - for (align = 512; align <= max_align; align <<= 1) { | ||
118 | - if (raw_is_io_aligned(fd, buf + align, max_align)) { | ||
119 | - s->buf_align = align; | ||
120 | + buf = qemu_memalign(max_align, max_align); | ||
121 | + for (i = 0; i < ARRAY_SIZE(alignments); i++) { | ||
122 | + align = alignments[i]; | ||
123 | + if (raw_is_io_aligned(fd, buf, align)) { | ||
124 | + /* Fallback to safe value. */ | ||
125 | + bs->bl.request_alignment = (align != 1) ? align : max_align; | ||
126 | break; | ||
127 | } | ||
128 | } | ||
129 | qemu_vfree(buf); | ||
130 | } | ||
131 | |||
132 | - if (!bs->bl.request_alignment) { | ||
133 | + if (!s->buf_align) { | ||
134 | + int i; | ||
135 | size_t align; | ||
136 | - buf = qemu_memalign(s->buf_align, max_align); | ||
137 | - for (align = 512; align <= max_align; align <<= 1) { | ||
138 | - if (raw_is_io_aligned(fd, buf, align)) { | ||
139 | - bs->bl.request_alignment = align; | ||
140 | + buf = qemu_memalign(max_align, 2 * max_align); | ||
141 | + for (i = 0; i < ARRAY_SIZE(alignments); i++) { | ||
142 | + align = alignments[i]; | ||
143 | + if (raw_is_io_aligned(fd, buf + align, max_align)) { | ||
144 | + /* Fallback to request_aligment. */ | ||
145 | + s->buf_align = (align != 1) ? align : bs->bl.request_alignment; | ||
146 | break; | ||
147 | } | ||
148 | } | ||
149 | -- | 29 | -- |
150 | 2.20.1 | 30 | 2.25.3 |
151 | 31 | ||
152 | 32 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | When extending the size of an image that has a backing file larger than |
---|---|---|---|
2 | its old size, make sure that the backing file data doesn't become | ||
3 | visible in the guest, but the added area is properly zeroed out. | ||
2 | 4 | ||
3 | Add a test for what happens when you call bdrv_replace_child_noperm() | 5 | Consider the following scenario where the overlay is shorter than its |
4 | for various drain situations ({old,new} child {drained,not drained}). | 6 | backing file: |
5 | 7 | ||
6 | Most importantly, if both the old and the new child are drained, the | 8 | base.qcow2: AAAAAAAA |
7 | parent must not be undrained at any point. | 9 | overlay.qcow2: BBBB |
8 | 10 | ||
9 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 11 | When resizing (extending) overlay.qcow2, the new blocks should not stay |
12 | unallocated and make the additional As from base.qcow2 visible like | ||
13 | before this patch, but zeros should be read. | ||
14 | |||
15 | A similar case happens with the various variants of a commit job when an | ||
16 | intermediate file is short (- for unallocated): | ||
17 | |||
18 | base.qcow2: A-A-AAAA | ||
19 | mid.qcow2: BB-B | ||
20 | top.qcow2: C--C--C- | ||
21 | |||
22 | After commit top.qcow2 to mid.qcow2, the following happens: | ||
23 | |||
24 | mid.qcow2: CB-C00C0 (correct result) | ||
25 | mid.qcow2: CB-C--C- (before this fix) | ||
26 | |||
27 | Without the fix, blocks that previously read as zeros on top.qcow2 | ||
28 | suddenly turn into A. | ||
29 | |||
30 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
31 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
32 | Message-Id: <20200424125448.63318-8-kwolf@redhat.com> | ||
33 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 34 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 35 | --- |
12 | tests/test-bdrv-drain.c | 308 ++++++++++++++++++++++++++++++++++++++++ | 36 | block/io.c | 25 +++++++++++++++++++++++++ |
13 | 1 file changed, 308 insertions(+) | 37 | 1 file changed, 25 insertions(+) |
14 | 38 | ||
15 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 39 | diff --git a/block/io.c b/block/io.c |
16 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/test-bdrv-drain.c | 41 | --- a/block/io.c |
18 | +++ b/tests/test-bdrv-drain.c | 42 | +++ b/block/io.c |
19 | @@ -XXX,XX +XXX,XX @@ static void test_drop_intermediate_poll(void) | 43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
20 | bdrv_unref(chain[2]); | 44 | goto out; |
21 | } | 45 | } |
22 | 46 | ||
47 | + /* | ||
48 | + * If the image has a backing file that is large enough that it would | ||
49 | + * provide data for the new area, we cannot leave it unallocated because | ||
50 | + * then the backing file content would become visible. Instead, zero-fill | ||
51 | + * the new area. | ||
52 | + * | ||
53 | + * Note that if the image has a backing file, but was opened without the | ||
54 | + * backing file, taking care of keeping things consistent with that backing | ||
55 | + * file is the user's responsibility. | ||
56 | + */ | ||
57 | + if (new_bytes && bs->backing) { | ||
58 | + int64_t backing_len; | ||
23 | + | 59 | + |
24 | +typedef struct BDRVReplaceTestState { | 60 | + backing_len = bdrv_getlength(backing_bs(bs)); |
25 | + bool was_drained; | 61 | + if (backing_len < 0) { |
26 | + bool was_undrained; | 62 | + ret = backing_len; |
27 | + bool has_read; | 63 | + error_setg_errno(errp, -ret, "Could not get backing file size"); |
28 | + | 64 | + goto out; |
29 | + int drain_count; | ||
30 | + | ||
31 | + bool yield_before_read; | ||
32 | + Coroutine *io_co; | ||
33 | + Coroutine *drain_co; | ||
34 | +} BDRVReplaceTestState; | ||
35 | + | ||
36 | +static void bdrv_replace_test_close(BlockDriverState *bs) | ||
37 | +{ | ||
38 | +} | ||
39 | + | ||
40 | +/** | ||
41 | + * If @bs has a backing file: | ||
42 | + * Yield if .yield_before_read is true (and wait for drain_begin to | ||
43 | + * wake us up). | ||
44 | + * Forward the read to bs->backing. Set .has_read to true. | ||
45 | + * If drain_begin has woken us, wake it in turn. | ||
46 | + * | ||
47 | + * Otherwise: | ||
48 | + * Set .has_read to true and return success. | ||
49 | + */ | ||
50 | +static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, | ||
51 | + uint64_t offset, | ||
52 | + uint64_t bytes, | ||
53 | + QEMUIOVector *qiov, | ||
54 | + int flags) | ||
55 | +{ | ||
56 | + BDRVReplaceTestState *s = bs->opaque; | ||
57 | + | ||
58 | + if (bs->backing) { | ||
59 | + int ret; | ||
60 | + | ||
61 | + g_assert(!s->drain_count); | ||
62 | + | ||
63 | + s->io_co = qemu_coroutine_self(); | ||
64 | + if (s->yield_before_read) { | ||
65 | + s->yield_before_read = false; | ||
66 | + qemu_coroutine_yield(); | ||
67 | + } | ||
68 | + s->io_co = NULL; | ||
69 | + | ||
70 | + ret = bdrv_preadv(bs->backing, offset, qiov); | ||
71 | + s->has_read = true; | ||
72 | + | ||
73 | + /* Wake up drain_co if it runs */ | ||
74 | + if (s->drain_co) { | ||
75 | + aio_co_wake(s->drain_co); | ||
76 | + } | 65 | + } |
77 | + | 66 | + |
78 | + return ret; | 67 | + if (backing_len > old_size) { |
68 | + flags |= BDRV_REQ_ZERO_WRITE; | ||
69 | + } | ||
79 | + } | 70 | + } |
80 | + | 71 | + |
81 | + s->has_read = true; | 72 | if (drv->bdrv_co_truncate) { |
82 | + return 0; | 73 | if (flags & ~bs->supported_truncate_flags) { |
83 | +} | 74 | error_setg(errp, "Block driver does not support requested flags"); |
84 | + | ||
85 | +/** | ||
86 | + * If .drain_count is 0, wake up .io_co if there is one; and set | ||
87 | + * .was_drained. | ||
88 | + * Increment .drain_count. | ||
89 | + */ | ||
90 | +static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) | ||
91 | +{ | ||
92 | + BDRVReplaceTestState *s = bs->opaque; | ||
93 | + | ||
94 | + if (!s->drain_count) { | ||
95 | + /* Keep waking io_co up until it is done */ | ||
96 | + s->drain_co = qemu_coroutine_self(); | ||
97 | + while (s->io_co) { | ||
98 | + aio_co_wake(s->io_co); | ||
99 | + s->io_co = NULL; | ||
100 | + qemu_coroutine_yield(); | ||
101 | + } | ||
102 | + s->drain_co = NULL; | ||
103 | + | ||
104 | + s->was_drained = true; | ||
105 | + } | ||
106 | + s->drain_count++; | ||
107 | +} | ||
108 | + | ||
109 | +/** | ||
110 | + * Reduce .drain_count, set .was_undrained once it reaches 0. | ||
111 | + * If .drain_count reaches 0 and the node has a backing file, issue a | ||
112 | + * read request. | ||
113 | + */ | ||
114 | +static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) | ||
115 | +{ | ||
116 | + BDRVReplaceTestState *s = bs->opaque; | ||
117 | + | ||
118 | + g_assert(s->drain_count > 0); | ||
119 | + if (!--s->drain_count) { | ||
120 | + int ret; | ||
121 | + | ||
122 | + s->was_undrained = true; | ||
123 | + | ||
124 | + if (bs->backing) { | ||
125 | + char data; | ||
126 | + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); | ||
127 | + | ||
128 | + /* Queue a read request post-drain */ | ||
129 | + ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); | ||
130 | + g_assert(ret >= 0); | ||
131 | + } | ||
132 | + } | ||
133 | +} | ||
134 | + | ||
135 | +static BlockDriver bdrv_replace_test = { | ||
136 | + .format_name = "replace_test", | ||
137 | + .instance_size = sizeof(BDRVReplaceTestState), | ||
138 | + | ||
139 | + .bdrv_close = bdrv_replace_test_close, | ||
140 | + .bdrv_co_preadv = bdrv_replace_test_co_preadv, | ||
141 | + | ||
142 | + .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, | ||
143 | + .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, | ||
144 | + | ||
145 | + .bdrv_child_perm = bdrv_format_default_perms, | ||
146 | +}; | ||
147 | + | ||
148 | +static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque) | ||
149 | +{ | ||
150 | + int ret; | ||
151 | + char data; | ||
152 | + | ||
153 | + ret = blk_co_pread(opaque, 0, 1, &data, 0); | ||
154 | + g_assert(ret >= 0); | ||
155 | +} | ||
156 | + | ||
157 | +/** | ||
158 | + * We test two things: | ||
159 | + * (1) bdrv_replace_child_noperm() must not undrain the parent if both | ||
160 | + * children are drained. | ||
161 | + * (2) bdrv_replace_child_noperm() must never flush I/O requests to a | ||
162 | + * drained child. If the old child is drained, it must flush I/O | ||
163 | + * requests after the new one has been attached. If the new child | ||
164 | + * is drained, it must flush I/O requests before the old one is | ||
165 | + * detached. | ||
166 | + * | ||
167 | + * To do so, we create one parent node and two child nodes; then | ||
168 | + * attach one of the children (old_child_bs) to the parent, then | ||
169 | + * drain both old_child_bs and new_child_bs according to | ||
170 | + * old_drain_count and new_drain_count, respectively, and finally | ||
171 | + * we invoke bdrv_replace_node() to replace old_child_bs by | ||
172 | + * new_child_bs. | ||
173 | + * | ||
174 | + * The test block driver we use here (bdrv_replace_test) has a read | ||
175 | + * function that: | ||
176 | + * - For the parent node, can optionally yield, and then forwards the | ||
177 | + * read to bdrv_preadv(), | ||
178 | + * - For the child node, just returns immediately. | ||
179 | + * | ||
180 | + * If the read yields, the drain_begin function will wake it up. | ||
181 | + * | ||
182 | + * The drain_end function issues a read on the parent once it is fully | ||
183 | + * undrained (which simulates requests starting to come in again). | ||
184 | + */ | ||
185 | +static void do_test_replace_child_mid_drain(int old_drain_count, | ||
186 | + int new_drain_count) | ||
187 | +{ | ||
188 | + BlockBackend *parent_blk; | ||
189 | + BlockDriverState *parent_bs; | ||
190 | + BlockDriverState *old_child_bs, *new_child_bs; | ||
191 | + BDRVReplaceTestState *parent_s; | ||
192 | + BDRVReplaceTestState *old_child_s, *new_child_s; | ||
193 | + Coroutine *io_co; | ||
194 | + int i; | ||
195 | + | ||
196 | + parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0, | ||
197 | + &error_abort); | ||
198 | + parent_s = parent_bs->opaque; | ||
199 | + | ||
200 | + parent_blk = blk_new(qemu_get_aio_context(), | ||
201 | + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); | ||
202 | + blk_insert_bs(parent_blk, parent_bs, &error_abort); | ||
203 | + | ||
204 | + old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0, | ||
205 | + &error_abort); | ||
206 | + new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0, | ||
207 | + &error_abort); | ||
208 | + old_child_s = old_child_bs->opaque; | ||
209 | + new_child_s = new_child_bs->opaque; | ||
210 | + | ||
211 | + /* So that we can read something */ | ||
212 | + parent_bs->total_sectors = 1; | ||
213 | + old_child_bs->total_sectors = 1; | ||
214 | + new_child_bs->total_sectors = 1; | ||
215 | + | ||
216 | + bdrv_ref(old_child_bs); | ||
217 | + parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child", | ||
218 | + &child_backing, &error_abort); | ||
219 | + | ||
220 | + for (i = 0; i < old_drain_count; i++) { | ||
221 | + bdrv_drained_begin(old_child_bs); | ||
222 | + } | ||
223 | + for (i = 0; i < new_drain_count; i++) { | ||
224 | + bdrv_drained_begin(new_child_bs); | ||
225 | + } | ||
226 | + | ||
227 | + if (!old_drain_count) { | ||
228 | + /* | ||
229 | + * Start a read operation that will yield, so it will not | ||
230 | + * complete before the node is drained. | ||
231 | + */ | ||
232 | + parent_s->yield_before_read = true; | ||
233 | + io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co, | ||
234 | + parent_blk); | ||
235 | + qemu_coroutine_enter(io_co); | ||
236 | + } | ||
237 | + | ||
238 | + /* If we have started a read operation, it should have yielded */ | ||
239 | + g_assert(!parent_s->has_read); | ||
240 | + | ||
241 | + /* Reset drained status so we can see what bdrv_replace_node() does */ | ||
242 | + parent_s->was_drained = false; | ||
243 | + parent_s->was_undrained = false; | ||
244 | + | ||
245 | + g_assert(parent_bs->quiesce_counter == old_drain_count); | ||
246 | + bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); | ||
247 | + g_assert(parent_bs->quiesce_counter == new_drain_count); | ||
248 | + | ||
249 | + if (!old_drain_count && !new_drain_count) { | ||
250 | + /* | ||
251 | + * From undrained to undrained drains and undrains the parent, | ||
252 | + * because bdrv_replace_node() contains a drained section for | ||
253 | + * @old_child_bs. | ||
254 | + */ | ||
255 | + g_assert(parent_s->was_drained && parent_s->was_undrained); | ||
256 | + } else if (!old_drain_count && new_drain_count) { | ||
257 | + /* | ||
258 | + * From undrained to drained should drain the parent and keep | ||
259 | + * it that way. | ||
260 | + */ | ||
261 | + g_assert(parent_s->was_drained && !parent_s->was_undrained); | ||
262 | + } else if (old_drain_count && !new_drain_count) { | ||
263 | + /* | ||
264 | + * From drained to undrained should undrain the parent and | ||
265 | + * keep it that way. | ||
266 | + */ | ||
267 | + g_assert(!parent_s->was_drained && parent_s->was_undrained); | ||
268 | + } else /* if (old_drain_count && new_drain_count) */ { | ||
269 | + /* | ||
270 | + * From drained to drained must not undrain the parent at any | ||
271 | + * point | ||
272 | + */ | ||
273 | + g_assert(!parent_s->was_drained && !parent_s->was_undrained); | ||
274 | + } | ||
275 | + | ||
276 | + if (!old_drain_count || !new_drain_count) { | ||
277 | + /* | ||
278 | + * If !old_drain_count, we have started a read request before | ||
279 | + * bdrv_replace_node(). If !new_drain_count, the parent must | ||
280 | + * have been undrained at some point, and | ||
281 | + * bdrv_replace_test_co_drain_end() starts a read request | ||
282 | + * then. | ||
283 | + */ | ||
284 | + g_assert(parent_s->has_read); | ||
285 | + } else { | ||
286 | + /* | ||
287 | + * If the parent was never undrained, there is no way to start | ||
288 | + * a read request. | ||
289 | + */ | ||
290 | + g_assert(!parent_s->has_read); | ||
291 | + } | ||
292 | + | ||
293 | + /* A drained child must have not received any request */ | ||
294 | + g_assert(!(old_drain_count && old_child_s->has_read)); | ||
295 | + g_assert(!(new_drain_count && new_child_s->has_read)); | ||
296 | + | ||
297 | + for (i = 0; i < new_drain_count; i++) { | ||
298 | + bdrv_drained_end(new_child_bs); | ||
299 | + } | ||
300 | + for (i = 0; i < old_drain_count; i++) { | ||
301 | + bdrv_drained_end(old_child_bs); | ||
302 | + } | ||
303 | + | ||
304 | + /* | ||
305 | + * By now, bdrv_replace_test_co_drain_end() must have been called | ||
306 | + * at some point while the new child was attached to the parent. | ||
307 | + */ | ||
308 | + g_assert(parent_s->has_read); | ||
309 | + g_assert(new_child_s->has_read); | ||
310 | + | ||
311 | + blk_unref(parent_blk); | ||
312 | + bdrv_unref(parent_bs); | ||
313 | + bdrv_unref(old_child_bs); | ||
314 | + bdrv_unref(new_child_bs); | ||
315 | +} | ||
316 | + | ||
317 | +static void test_replace_child_mid_drain(void) | ||
318 | +{ | ||
319 | + int old_drain_count, new_drain_count; | ||
320 | + | ||
321 | + for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) { | ||
322 | + for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) { | ||
323 | + do_test_replace_child_mid_drain(old_drain_count, new_drain_count); | ||
324 | + } | ||
325 | + } | ||
326 | +} | ||
327 | + | ||
328 | int main(int argc, char **argv) | ||
329 | { | ||
330 | int ret; | ||
331 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
332 | g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll", | ||
333 | test_drop_intermediate_poll); | ||
334 | |||
335 | + g_test_add_func("/bdrv-drain/replace_child/mid-drain", | ||
336 | + test_replace_child_mid_drain); | ||
337 | + | ||
338 | ret = g_test_run(); | ||
339 | qemu_event_destroy(&done_event); | ||
340 | return ret; | ||
341 | -- | 75 | -- |
342 | 2.20.1 | 76 | 2.25.3 |
343 | 77 | ||
344 | 78 | diff view generated by jsdifflib |
1 | 234 implements functions that are useful for doing migration between two | 1 | We want to keep TEST_IMG for the full path of the main test image, but |
---|---|---|---|
2 | VMs. Move them to iotests.py so that other test cases can use them, too. | 2 | filter_testfiles() must be called for other test images before replacing |
3 | other things like the image format because the test directory path could | ||
4 | contain the format as a substring. | ||
5 | |||
6 | Insert a filter_testfiles() call between both. | ||
3 | 7 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 9 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
10 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Message-Id: <20200424125448.63318-9-kwolf@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | --- | 13 | --- |
7 | tests/qemu-iotests/234 | 30 +++++++----------------------- | 14 | tests/qemu-iotests/iotests.py | 5 +++-- |
8 | tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ | 15 | 1 file changed, 3 insertions(+), 2 deletions(-) |
9 | 2 files changed, 23 insertions(+), 23 deletions(-) | ||
10 | 16 | ||
11 | diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 | ||
12 | index XXXXXXX..XXXXXXX 100755 | ||
13 | --- a/tests/qemu-iotests/234 | ||
14 | +++ b/tests/qemu-iotests/234 | ||
15 | @@ -XXX,XX +XXX,XX @@ import os | ||
16 | iotests.verify_image_format(supported_fmts=['qcow2']) | ||
17 | iotests.verify_platform(['linux']) | ||
18 | |||
19 | -def enable_migration_events(vm, name): | ||
20 | - iotests.log('Enabling migration QMP events on %s...' % name) | ||
21 | - iotests.log(vm.qmp('migrate-set-capabilities', capabilities=[ | ||
22 | - { | ||
23 | - 'capability': 'events', | ||
24 | - 'state': True | ||
25 | - } | ||
26 | - ])) | ||
27 | - | ||
28 | -def wait_migration(vm): | ||
29 | - while True: | ||
30 | - event = vm.event_wait('MIGRATION') | ||
31 | - iotests.log(event, filters=[iotests.filter_qmp_event]) | ||
32 | - if event['data']['status'] == 'completed': | ||
33 | - break | ||
34 | - | ||
35 | with iotests.FilePath('img') as img_path, \ | ||
36 | iotests.FilePath('backing') as backing_path, \ | ||
37 | iotests.FilePath('mig_fifo_a') as fifo_a, \ | ||
38 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
39 | .add_blockdev('%s,file=drive0-backing-file,node-name=drive0-backing' % (iotests.imgfmt)) | ||
40 | .launch()) | ||
41 | |||
42 | - enable_migration_events(vm_a, 'A') | ||
43 | + vm_a.enable_migration_events('A') | ||
44 | |||
45 | iotests.log('Launching destination VM...') | ||
46 | (vm_b.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path)) | ||
47 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
48 | .add_incoming("exec: cat '%s'" % (fifo_a)) | ||
49 | .launch()) | ||
50 | |||
51 | - enable_migration_events(vm_b, 'B') | ||
52 | + vm_b.enable_migration_events('B') | ||
53 | |||
54 | # Add a child node that was created after the parent node. The reverse case | ||
55 | # is covered by the -blockdev options above. | ||
56 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
57 | iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) | ||
58 | with iotests.Timeout(3, 'Migration does not complete'): | ||
59 | # Wait for the source first (which includes setup=setup) | ||
60 | - wait_migration(vm_a) | ||
61 | + vm_a.wait_migration() | ||
62 | # Wait for the destination second (which does not) | ||
63 | - wait_migration(vm_b) | ||
64 | + vm_b.wait_migration() | ||
65 | |||
66 | iotests.log(vm_a.qmp('query-migrate')['return']['status']) | ||
67 | iotests.log(vm_b.qmp('query-migrate')['return']['status']) | ||
68 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
69 | .add_incoming("exec: cat '%s'" % (fifo_b)) | ||
70 | .launch()) | ||
71 | |||
72 | - enable_migration_events(vm_a, 'A') | ||
73 | + vm_a.enable_migration_events('A') | ||
74 | |||
75 | iotests.log(vm_a.qmp('blockdev-snapshot', node='drive0-backing', | ||
76 | overlay='drive0')) | ||
77 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
78 | iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) | ||
79 | with iotests.Timeout(3, 'Migration does not complete'): | ||
80 | # Wait for the source first (which includes setup=setup) | ||
81 | - wait_migration(vm_b) | ||
82 | + vm_b.wait_migration() | ||
83 | # Wait for the destination second (which does not) | ||
84 | - wait_migration(vm_a) | ||
85 | + vm_a.wait_migration() | ||
86 | |||
87 | iotests.log(vm_a.qmp('query-migrate')['return']['status']) | ||
88 | iotests.log(vm_b.qmp('query-migrate')['return']['status']) | ||
89 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py | 17 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py |
90 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
91 | --- a/tests/qemu-iotests/iotests.py | 19 | --- a/tests/qemu-iotests/iotests.py |
92 | +++ b/tests/qemu-iotests/iotests.py | 20 | +++ b/tests/qemu-iotests/iotests.py |
93 | @@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine): | 21 | @@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename): |
94 | elif status == 'null': | 22 | for line in output.split('\n'): |
95 | return error | 23 | if 'disk size' in line or 'actual-size' in line: |
96 | 24 | continue | |
97 | + def enable_migration_events(self, name): | 25 | - line = line.replace(filename, 'TEST_IMG') \ |
98 | + log('Enabling migration QMP events on %s...' % name) | 26 | - .replace(imgfmt, 'IMGFMT') |
99 | + log(self.qmp('migrate-set-capabilities', capabilities=[ | 27 | + line = line.replace(filename, 'TEST_IMG') |
100 | + { | 28 | + line = filter_testfiles(line) |
101 | + 'capability': 'events', | 29 | + line = line.replace(imgfmt, 'IMGFMT') |
102 | + 'state': True | 30 | line = re.sub('iters: [0-9]+', 'iters: XXX', line) |
103 | + } | 31 | line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line) |
104 | + ])) | 32 | line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line) |
105 | + | ||
106 | + def wait_migration(self): | ||
107 | + while True: | ||
108 | + event = self.event_wait('MIGRATION') | ||
109 | + log(event, filters=[filter_qmp_event]) | ||
110 | + if event['data']['status'] == 'completed': | ||
111 | + break | ||
112 | + | ||
113 | def node_info(self, node_name): | ||
114 | nodes = self.qmp('query-named-block-nodes') | ||
115 | for x in nodes['return']: | ||
116 | -- | 33 | -- |
117 | 2.20.1 | 34 | 2.25.3 |
118 | 35 | ||
119 | 36 | diff view generated by jsdifflib |
1 | This test case is motivated by commit 2b23f28639 ('block/copy-on-read: | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
---|---|---|---|
2 | Fix permissions for inactive node'). Instead of just testing | 2 | Message-Id: <20200424125448.63318-10-kwolf@redhat.com> |
3 | copy-on-read on migration, let's stack all sorts of filter nodes on top | 3 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
4 | of each other and try if the resulting VM can still migrate | 4 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
5 | successfully. For good measure, put everything into an iothread, because | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | why not? | 6 | --- |
7 | tests/qemu-iotests/274 | 155 +++++++++++++++++++++ | ||
8 | tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++ | ||
9 | tests/qemu-iotests/group | 1 + | ||
10 | 3 files changed, 424 insertions(+) | ||
11 | create mode 100755 tests/qemu-iotests/274 | ||
12 | create mode 100644 tests/qemu-iotests/274.out | ||
7 | 13 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274 |
9 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
10 | --- | ||
11 | tests/qemu-iotests/262 | 82 ++++++++++++++++++++++++++++++++++++++ | ||
12 | tests/qemu-iotests/262.out | 17 ++++++++ | ||
13 | tests/qemu-iotests/group | 1 + | ||
14 | 3 files changed, 100 insertions(+) | ||
15 | create mode 100755 tests/qemu-iotests/262 | ||
16 | create mode 100644 tests/qemu-iotests/262.out | ||
17 | |||
18 | diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 | ||
19 | new file mode 100755 | 15 | new file mode 100755 |
20 | index XXXXXXX..XXXXXXX | 16 | index XXXXXXX..XXXXXXX |
21 | --- /dev/null | 17 | --- /dev/null |
22 | +++ b/tests/qemu-iotests/262 | 18 | +++ b/tests/qemu-iotests/274 |
23 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
24 | +#!/usr/bin/env python | 20 | +#!/usr/bin/env python3 |
25 | +# | 21 | +# |
26 | +# Copyright (C) 2019 Red Hat, Inc. | 22 | +# Copyright (C) 2019 Red Hat, Inc. |
27 | +# | 23 | +# |
28 | +# This program is free software; you can redistribute it and/or modify | 24 | +# This program is free software; you can redistribute it and/or modify |
29 | +# it under the terms of the GNU General Public License as published by | 25 | +# it under the terms of the GNU General Public License as published by |
... | ... | ||
38 | +# You should have received a copy of the GNU General Public License | 34 | +# You should have received a copy of the GNU General Public License |
39 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | 35 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
40 | +# | 36 | +# |
41 | +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> | 37 | +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> |
42 | +# | 38 | +# |
43 | +# Test migration with filter drivers present. Keep everything in an | 39 | +# Some tests for short backing files and short overlays |
44 | +# iothread just for fun. | ||
45 | + | 40 | + |
46 | +import iotests | 41 | +import iotests |
47 | +import os | ||
48 | + | 42 | + |
49 | +iotests.verify_image_format(supported_fmts=['qcow2']) | 43 | +iotests.verify_image_format(supported_fmts=['qcow2']) |
50 | +iotests.verify_platform(['linux']) | 44 | +iotests.verify_platform(['linux']) |
51 | + | 45 | + |
52 | +with iotests.FilePath('img') as img_path, \ | 46 | +size_short = 1 * 1024 * 1024 |
53 | + iotests.FilePath('mig_fifo') as fifo, \ | 47 | +size_long = 2 * 1024 * 1024 |
54 | + iotests.VM(path_suffix='a') as vm_a, \ | 48 | +size_diff = size_long - size_short |
55 | + iotests.VM(path_suffix='b') as vm_b: | 49 | + |
56 | + | 50 | +def create_chain() -> None: |
57 | + def add_opts(vm): | 51 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, |
58 | + vm.add_object('iothread,id=iothread0') | 52 | + str(size_long)) |
59 | + vm.add_object('throttle-group,id=tg0,x-bps-total=65536') | 53 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid, |
60 | + vm.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path)) | 54 | + str(size_short)) |
61 | + vm.add_blockdev('%s,file=drive0-file,node-name=drive0-fmt' % (iotests.imgfmt)) | 55 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top, |
62 | + vm.add_blockdev('copy-on-read,file=drive0-fmt,node-name=drive0-cor') | 56 | + str(size_long)) |
63 | + vm.add_blockdev('throttle,file=drive0-cor,node-name=drive0-throttle,throttle-group=tg0') | 57 | + |
64 | + vm.add_blockdev('blkdebug,image=drive0-throttle,node-name=drive0-dbg') | 58 | + iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base) |
65 | + vm.add_blockdev('null-co,node-name=null,read-zeroes=on') | 59 | + |
66 | + vm.add_blockdev('blkverify,test=drive0-dbg,raw=null,node-name=drive0-verify') | 60 | +def create_vm() -> iotests.VM: |
67 | + | 61 | + vm = iotests.VM() |
68 | + if iotests.supports_quorum(): | 62 | + vm.add_blockdev('file,filename=%s,node-name=base-file' % base) |
69 | + vm.add_blockdev('quorum,children.0=drive0-verify,vote-threshold=1,node-name=drive0-quorum') | 63 | + vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt) |
70 | + root = "drive0-quorum" | 64 | + vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid) |
71 | + else: | 65 | + vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base' |
72 | + root = "drive0-verify" | 66 | + % iotests.imgfmt) |
73 | + | 67 | + vm.add_drive(top, 'backing=mid,node-name=top') |
74 | + vm.add_device('virtio-blk,drive=%s,iothread=iothread0' % root) | 68 | + return vm |
75 | + | 69 | + |
76 | + iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, img_path, '64M') | 70 | +with iotests.FilePath('base') as base, \ |
77 | + | 71 | + iotests.FilePath('mid') as mid, \ |
78 | + os.mkfifo(fifo) | 72 | + iotests.FilePath('top') as top: |
79 | + | 73 | + |
80 | + iotests.log('Launching source VM...') | 74 | + iotests.log('== Commit tests ==') |
81 | + add_opts(vm_a) | 75 | + |
82 | + vm_a.launch() | 76 | + create_chain() |
83 | + | 77 | + |
84 | + vm_a.enable_migration_events('A') | 78 | + iotests.log('=== Check visible data ===') |
85 | + | 79 | + |
86 | + iotests.log('Launching destination VM...') | 80 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top) |
87 | + add_opts(vm_b) | 81 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top) |
88 | + vm_b.add_incoming("exec: cat '%s'" % (fifo)) | 82 | + |
89 | + vm_b.launch() | 83 | + iotests.log('=== Checking allocation status ===') |
90 | + | 84 | + |
91 | + vm_b.enable_migration_events('B') | 85 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, |
92 | + | 86 | + '-c', 'alloc %d %d' % (size_short, size_diff), |
93 | + iotests.log('Starting migration to B...') | 87 | + base) |
94 | + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) | 88 | + |
95 | + with iotests.Timeout(3, 'Migration does not complete'): | 89 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, |
96 | + # Wait for the source first (which includes setup=setup) | 90 | + '-c', 'alloc %d %d' % (size_short, size_diff), |
97 | + vm_a.wait_migration() | 91 | + mid) |
98 | + # Wait for the destination second (which does not) | 92 | + |
99 | + vm_b.wait_migration() | 93 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, |
100 | + | 94 | + '-c', 'alloc %d %d' % (size_short, size_diff), |
101 | + iotests.log(vm_a.qmp('query-migrate')['return']['status']) | 95 | + top) |
102 | + iotests.log(vm_b.qmp('query-migrate')['return']['status']) | 96 | + |
103 | + | 97 | + iotests.log('=== Checking map ===') |
104 | + iotests.log(vm_a.qmp('query-status')) | 98 | + |
105 | + iotests.log(vm_b.qmp('query-status')) | 99 | + iotests.qemu_img_log('map', '--output=json', base) |
106 | diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out | 100 | + iotests.qemu_img_log('map', '--output=human', base) |
101 | + iotests.qemu_img_log('map', '--output=json', mid) | ||
102 | + iotests.qemu_img_log('map', '--output=human', mid) | ||
103 | + iotests.qemu_img_log('map', '--output=json', top) | ||
104 | + iotests.qemu_img_log('map', '--output=human', top) | ||
105 | + | ||
106 | + iotests.log('=== Testing qemu-img commit (top -> mid) ===') | ||
107 | + | ||
108 | + iotests.qemu_img_log('commit', top) | ||
109 | + iotests.img_info_log(mid) | ||
110 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
111 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
112 | + | ||
113 | + iotests.log('=== Testing HMP commit (top -> mid) ===') | ||
114 | + | ||
115 | + create_chain() | ||
116 | + with create_vm() as vm: | ||
117 | + vm.launch() | ||
118 | + vm.qmp_log('human-monitor-command', command_line='commit drive0') | ||
119 | + | ||
120 | + iotests.img_info_log(mid) | ||
121 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
122 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
123 | + | ||
124 | + iotests.log('=== Testing QMP active commit (top -> mid) ===') | ||
125 | + | ||
126 | + create_chain() | ||
127 | + with create_vm() as vm: | ||
128 | + vm.launch() | ||
129 | + vm.qmp_log('block-commit', device='top', base_node='mid', | ||
130 | + job_id='job0', auto_dismiss=False) | ||
131 | + vm.run_job('job0', wait=5) | ||
132 | + | ||
133 | + iotests.img_info_log(mid) | ||
134 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
135 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
136 | + | ||
137 | + | ||
138 | + iotests.log('== Resize tests ==') | ||
139 | + | ||
140 | + # Use different sizes for different allocation modes: | ||
141 | + # | ||
142 | + # We want to have at least one test where 32 bit truncation in the size of | ||
143 | + # the overlapping area becomes visible. This is covered by the | ||
144 | + # prealloc='off' case (1G to 6G is an overlap of 5G). | ||
145 | + # | ||
146 | + # However, we can only do this for modes that don't preallocate data | ||
147 | + # because otherwise we might run out of space on the test host. | ||
148 | + # | ||
149 | + # We also want to test some unaligned combinations. | ||
150 | + for (prealloc, base_size, top_size_old, top_size_new, off) in [ | ||
151 | + ('off', '6G', '1G', '8G', '5G'), | ||
152 | + ('metadata', '32G', '30G', '33G', '31G'), | ||
153 | + ('falloc', '10M', '5M', '15M', '9M'), | ||
154 | + ('full', '16M', '8M', '12M', '11M'), | ||
155 | + ('off', '384k', '253k', '512k', '253k'), | ||
156 | + ('off', '400k', '256k', '512k', '336k'), | ||
157 | + ('off', '512k', '256k', '500k', '436k')]: | ||
158 | + | ||
159 | + iotests.log('=== preallocation=%s ===' % prealloc) | ||
160 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size) | ||
161 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top, | ||
162 | + top_size_old) | ||
163 | + iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base) | ||
164 | + | ||
165 | + # After this, top_size_old to base_size should be allocated/zeroed. | ||
166 | + # | ||
167 | + # In theory, leaving base_size to top_size_new unallocated would be | ||
168 | + # correct, but in practice, if we zero out anything, we zero out | ||
169 | + # everything up to top_size_new. | ||
170 | + iotests.qemu_img_log('resize', '-f', iotests.imgfmt, | ||
171 | + '--preallocation', prealloc, top, top_size_new) | ||
172 | + iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top) | ||
173 | + iotests.qemu_io_log('-c', 'map', top) | ||
174 | + iotests.qemu_img_log('map', '--output=json', top) | ||
175 | diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out | ||
107 | new file mode 100644 | 176 | new file mode 100644 |
108 | index XXXXXXX..XXXXXXX | 177 | index XXXXXXX..XXXXXXX |
109 | --- /dev/null | 178 | --- /dev/null |
110 | +++ b/tests/qemu-iotests/262.out | 179 | +++ b/tests/qemu-iotests/274.out |
111 | @@ -XXX,XX +XXX,XX @@ | 180 | @@ -XXX,XX +XXX,XX @@ |
112 | +Launching source VM... | 181 | +== Commit tests == |
113 | +Enabling migration QMP events on A... | 182 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
183 | + | ||
184 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
185 | + | ||
186 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
187 | + | ||
188 | +wrote 2097152/2097152 bytes at offset 0 | ||
189 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
190 | + | ||
191 | +=== Check visible data === | ||
192 | +read 1048576/1048576 bytes at offset 0 | ||
193 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
194 | + | ||
195 | +read 1048576/1048576 bytes at offset 1048576 | ||
196 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
197 | + | ||
198 | +=== Checking allocation status === | ||
199 | +1048576/1048576 bytes allocated at offset 0 bytes | ||
200 | +1048576/1048576 bytes allocated at offset 1 MiB | ||
201 | + | ||
202 | +0/1048576 bytes allocated at offset 0 bytes | ||
203 | +0/0 bytes allocated at offset 1 MiB | ||
204 | + | ||
205 | +0/1048576 bytes allocated at offset 0 bytes | ||
206 | +0/1048576 bytes allocated at offset 1 MiB | ||
207 | + | ||
208 | +=== Checking map === | ||
209 | +[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}] | ||
210 | + | ||
211 | +Offset Length Mapped to File | ||
212 | +0 0x200000 0x50000 TEST_DIR/PID-base | ||
213 | + | ||
214 | +[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}] | ||
215 | + | ||
216 | +Offset Length Mapped to File | ||
217 | +0 0x100000 0x50000 TEST_DIR/PID-base | ||
218 | + | ||
219 | +[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680}, | ||
220 | +{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}] | ||
221 | + | ||
222 | +Offset Length Mapped to File | ||
223 | +0 0x100000 0x50000 TEST_DIR/PID-base | ||
224 | + | ||
225 | +=== Testing qemu-img commit (top -> mid) === | ||
226 | +Image committed. | ||
227 | + | ||
228 | +image: TEST_IMG | ||
229 | +file format: IMGFMT | ||
230 | +virtual size: 2 MiB (2097152 bytes) | ||
231 | +cluster_size: 65536 | ||
232 | +backing file: TEST_DIR/PID-base | ||
233 | +Format specific information: | ||
234 | + compat: 1.1 | ||
235 | + lazy refcounts: false | ||
236 | + refcount bits: 16 | ||
237 | + corrupt: false | ||
238 | + | ||
239 | +read 1048576/1048576 bytes at offset 0 | ||
240 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
241 | + | ||
242 | +read 1048576/1048576 bytes at offset 1048576 | ||
243 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
244 | + | ||
245 | +=== Testing HMP commit (top -> mid) === | ||
246 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
247 | + | ||
248 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
249 | + | ||
250 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
251 | + | ||
252 | +wrote 2097152/2097152 bytes at offset 0 | ||
253 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
254 | + | ||
255 | +{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}} | ||
256 | +{"return": ""} | ||
257 | +image: TEST_IMG | ||
258 | +file format: IMGFMT | ||
259 | +virtual size: 2 MiB (2097152 bytes) | ||
260 | +cluster_size: 65536 | ||
261 | +backing file: TEST_DIR/PID-base | ||
262 | +Format specific information: | ||
263 | + compat: 1.1 | ||
264 | + lazy refcounts: false | ||
265 | + refcount bits: 16 | ||
266 | + corrupt: false | ||
267 | + | ||
268 | +read 1048576/1048576 bytes at offset 0 | ||
269 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
270 | + | ||
271 | +read 1048576/1048576 bytes at offset 1048576 | ||
272 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
273 | + | ||
274 | +=== Testing QMP active commit (top -> mid) === | ||
275 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
276 | + | ||
277 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
278 | + | ||
279 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
280 | + | ||
281 | +wrote 2097152/2097152 bytes at offset 0 | ||
282 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
283 | + | ||
284 | +{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}} | ||
114 | +{"return": {}} | 285 | +{"return": {}} |
115 | +Launching destination VM... | 286 | +{"execute": "job-complete", "arguments": {"id": "job0"}} |
116 | +Enabling migration QMP events on B... | ||
117 | +{"return": {}} | 287 | +{"return": {}} |
118 | +Starting migration to B... | 288 | +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} |
289 | +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
290 | +{"execute": "job-dismiss", "arguments": {"id": "job0"}} | ||
119 | +{"return": {}} | 291 | +{"return": {}} |
120 | +{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | 292 | +image: TEST_IMG |
121 | +{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | 293 | +file format: IMGFMT |
122 | +{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | 294 | +virtual size: 2 MiB (2097152 bytes) |
123 | +{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | 295 | +cluster_size: 65536 |
124 | +{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | 296 | +backing file: TEST_DIR/PID-base |
125 | +completed | 297 | +Format specific information: |
126 | +completed | 298 | + compat: 1.1 |
127 | +{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} | 299 | + lazy refcounts: false |
128 | +{"return": {"running": true, "singlestep": false, "status": "running"}} | 300 | + refcount bits: 16 |
301 | + corrupt: false | ||
302 | + | ||
303 | +read 1048576/1048576 bytes at offset 0 | ||
304 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
305 | + | ||
306 | +read 1048576/1048576 bytes at offset 1048576 | ||
307 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
308 | + | ||
309 | +== Resize tests == | ||
310 | +=== preallocation=off === | ||
311 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
312 | + | ||
313 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
314 | + | ||
315 | +wrote 65536/65536 bytes at offset 5368709120 | ||
316 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
317 | + | ||
318 | +Image resized. | ||
319 | + | ||
320 | +read 65536/65536 bytes at offset 5368709120 | ||
321 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
322 | + | ||
323 | +1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0) | ||
324 | +7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000) | ||
325 | + | ||
326 | +[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false}, | ||
327 | +{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] | ||
328 | + | ||
329 | +=== preallocation=metadata === | ||
330 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
331 | + | ||
332 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
333 | + | ||
334 | +wrote 65536/65536 bytes at offset 33285996544 | ||
335 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
336 | + | ||
337 | +Image resized. | ||
338 | + | ||
339 | +read 65536/65536 bytes at offset 33285996544 | ||
340 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
341 | + | ||
342 | +30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0) | ||
343 | +3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000) | ||
344 | + | ||
345 | +[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false}, | ||
346 | +{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680}, | ||
347 | +{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128}, | ||
348 | +{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576}, | ||
349 | +{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024}, | ||
350 | +{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008}, | ||
351 | +{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] | ||
352 | + | ||
353 | +=== preallocation=falloc === | ||
354 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
355 | + | ||
356 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
357 | + | ||
358 | +wrote 65536/65536 bytes at offset 9437184 | ||
359 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
360 | + | ||
361 | +Image resized. | ||
362 | + | ||
363 | +read 65536/65536 bytes at offset 9437184 | ||
364 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
365 | + | ||
366 | +5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0) | ||
367 | +10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) | ||
368 | + | ||
369 | +[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, | ||
370 | +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] | ||
371 | + | ||
372 | +=== preallocation=full === | ||
373 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
374 | + | ||
375 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
376 | + | ||
377 | +wrote 65536/65536 bytes at offset 11534336 | ||
378 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
379 | + | ||
380 | +Image resized. | ||
381 | + | ||
382 | +read 65536/65536 bytes at offset 11534336 | ||
383 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
384 | + | ||
385 | +8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0) | ||
386 | +4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) | ||
387 | + | ||
388 | +[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, | ||
389 | +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] | ||
390 | + | ||
391 | +=== preallocation=off === | ||
392 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
393 | + | ||
394 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
395 | + | ||
396 | +wrote 65536/65536 bytes at offset 259072 | ||
397 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
398 | + | ||
399 | +Image resized. | ||
400 | + | ||
401 | +read 65536/65536 bytes at offset 259072 | ||
402 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
403 | + | ||
404 | +192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0) | ||
405 | +320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000) | ||
406 | + | ||
407 | +[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false}, | ||
408 | +{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680}, | ||
409 | +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] | ||
410 | + | ||
411 | +=== preallocation=off === | ||
412 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
413 | + | ||
414 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
415 | + | ||
416 | +wrote 65536/65536 bytes at offset 344064 | ||
417 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
418 | + | ||
419 | +Image resized. | ||
420 | + | ||
421 | +read 65536/65536 bytes at offset 344064 | ||
422 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
423 | + | ||
424 | +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) | ||
425 | +256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000) | ||
426 | + | ||
427 | +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, | ||
428 | +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] | ||
429 | + | ||
430 | +=== preallocation=off === | ||
431 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
432 | + | ||
433 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
434 | + | ||
435 | +wrote 65536/65536 bytes at offset 446464 | ||
436 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
437 | + | ||
438 | +Image resized. | ||
439 | + | ||
440 | +read 65536/65536 bytes at offset 446464 | ||
441 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
442 | + | ||
443 | +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) | ||
444 | +244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000) | ||
445 | + | ||
446 | +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, | ||
447 | +{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}] | ||
448 | + | ||
129 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | 449 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group |
130 | index XXXXXXX..XXXXXXX 100644 | 450 | index XXXXXXX..XXXXXXX 100644 |
131 | --- a/tests/qemu-iotests/group | 451 | --- a/tests/qemu-iotests/group |
132 | +++ b/tests/qemu-iotests/group | 452 | +++ b/tests/qemu-iotests/group |
133 | @@ -XXX,XX +XXX,XX @@ | 453 | @@ -XXX,XX +XXX,XX @@ |
134 | 254 rw backing quick | 454 | 270 rw backing quick |
135 | 255 rw quick | 455 | 272 rw |
136 | 256 rw quick | 456 | 273 backing quick |
137 | +262 rw quick migration | 457 | +274 rw backing |
458 | 277 rw quick | ||
459 | 279 rw backing quick | ||
460 | 280 rw migration quick | ||
138 | -- | 461 | -- |
139 | 2.20.1 | 462 | 2.25.3 |
140 | 463 | ||
141 | 464 | diff view generated by jsdifflib |
1 | We're getting a ridiculous number of child classes of | 1 | The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the |
---|---|---|---|
2 | TestInitiallyFilled and TestInitiallyEmpty that differ only in a few | 2 | image is possibly preallocated and then the zero flag is added to all |
3 | attributes that we want to test in all combinations. | 3 | clusters. This means that a copy-on-write operation may be needed when |
4 | writing to these clusters, despite having used preallocation, negating | ||
5 | one of the major benefits of preallocation. | ||
4 | 6 | ||
5 | Instead of explicitly writing down every combination, let's use a loop | 7 | Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver, |
6 | and create those classes dynamically. | 8 | and if the protocol driver can ensure that the new area reads as zeros, |
9 | we can skip setting the zero flag in the qcow2 layer. | ||
10 | |||
11 | Unfortunately, the same approach doesn't work for metadata | ||
12 | preallocation, so we'll still set the zero flag there. | ||
7 | 13 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 15 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
16 | Message-Id: <20200424142701.67053-1-kwolf@redhat.com> | ||
17 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
18 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | 19 | --- |
11 | tests/qemu-iotests/118 | 69 +++++++++++++----------------------------- | 20 | block/qcow2.c | 22 +++++++++++++++++++--- |
12 | 1 file changed, 21 insertions(+), 48 deletions(-) | 21 | tests/qemu-iotests/274.out | 4 ++-- |
22 | 2 files changed, 21 insertions(+), 5 deletions(-) | ||
13 | 23 | ||
14 | diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 | 24 | diff --git a/block/qcow2.c b/block/qcow2.c |
15 | index XXXXXXX..XXXXXXX 100755 | 25 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tests/qemu-iotests/118 | 26 | --- a/block/qcow2.c |
17 | +++ b/tests/qemu-iotests/118 | 27 | +++ b/block/qcow2.c |
18 | @@ -XXX,XX +XXX,XX @@ class GeneralChangeTestsBaseClass(ChangeBaseClass): | 28 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, |
19 | class TestInitiallyFilled(GeneralChangeTestsBaseClass): | 29 | /* Allocate the data area */ |
20 | was_empty = False | 30 | new_file_size = allocation_start + |
21 | 31 | nb_new_data_clusters * s->cluster_size; | |
22 | - def setUp(self, media, interface): | 32 | - /* Image file grows, so @exact does not matter */ |
23 | + def setUp(self): | 33 | - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, |
24 | qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k') | 34 | - errp); |
25 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | 35 | + /* |
26 | self.vm = iotests.VM() | 36 | + * Image file grows, so @exact does not matter. |
27 | - self.vm.add_drive(old_img, 'media=%s' % media, 'none') | 37 | + * |
28 | - if interface == 'scsi': | 38 | + * If we need to zero out the new area, try first whether the protocol |
29 | + self.vm.add_drive(old_img, 'media=%s' % self.media, 'none') | 39 | + * driver can already take care of this. |
30 | + if self.interface == 'scsi': | 40 | + */ |
31 | self.vm.add_device('virtio-scsi-pci') | 41 | + if (flags & BDRV_REQ_ZERO_WRITE) { |
32 | self.vm.add_device('%s,drive=drive0,id=%s' % | 42 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, |
33 | - (interface_to_device_name(interface), | 43 | + BDRV_REQ_ZERO_WRITE, NULL); |
34 | + (interface_to_device_name(self.interface), | 44 | + if (ret >= 0) { |
35 | self.device_name)) | 45 | + flags &= ~BDRV_REQ_ZERO_WRITE; |
36 | self.vm.launch() | 46 | + } |
37 | 47 | + } else { | |
38 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass): | 48 | + ret = -1; |
39 | class TestInitiallyEmpty(GeneralChangeTestsBaseClass): | 49 | + } |
40 | was_empty = True | 50 | + if (ret < 0) { |
41 | 51 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, | |
42 | - def setUp(self, media, interface): | 52 | + errp); |
43 | + def setUp(self): | 53 | + } |
44 | qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') | 54 | if (ret < 0) { |
45 | - self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none') | 55 | error_prepend(errp, "Failed to resize underlying file: "); |
46 | - if interface == 'scsi': | 56 | qcow2_free_clusters(bs, allocation_start, |
47 | + self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none') | 57 | diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out |
48 | + if self.interface == 'scsi': | 58 | index XXXXXXX..XXXXXXX 100644 |
49 | self.vm.add_device('virtio-scsi-pci') | 59 | --- a/tests/qemu-iotests/274.out |
50 | self.vm.add_device('%s,drive=drive0,id=%s' % | 60 | +++ b/tests/qemu-iotests/274.out |
51 | - (interface_to_device_name(interface), | 61 | @@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184 |
52 | + (interface_to_device_name(self.interface), | 62 | 10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) |
53 | self.device_name)) | 63 | |
54 | self.vm.launch() | 64 | [{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, |
55 | 65 | -{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] | |
56 | @@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass): | 66 | +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] |
57 | # Should be a no-op | 67 | |
58 | self.assert_qmp(result, 'return', {}) | 68 | === preallocation=full === |
59 | 69 | Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | |
60 | -class TestCDInitiallyFilled(TestInitiallyFilled): | 70 | @@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336 |
61 | - TestInitiallyFilled = TestInitiallyFilled | 71 | 4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) |
62 | - has_real_tray = True | 72 | |
63 | - | 73 | [{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, |
64 | - def setUp(self): | 74 | -{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] |
65 | - self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide') | 75 | +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] |
66 | - | 76 | |
67 | -class TestCDInitiallyEmpty(TestInitiallyEmpty): | 77 | === preallocation=off === |
68 | - TestInitiallyEmpty = TestInitiallyEmpty | 78 | Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
69 | - has_real_tray = True | ||
70 | - | ||
71 | - def setUp(self): | ||
72 | - self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide') | ||
73 | +# Do this in a function to avoid leaking variables like case into the global | ||
74 | +# name space (otherwise tests would be run for the abstract base classes) | ||
75 | +def create_basic_test_classes(): | ||
76 | + for (media, interface, has_real_tray) in [ ('cdrom', 'ide', True), | ||
77 | + ('cdrom', 'scsi', True), | ||
78 | + ('disk', 'floppy', False) ]: | ||
79 | |||
80 | -class TestSCSICDInitiallyFilled(TestInitiallyFilled): | ||
81 | - TestInitiallyFilled = TestInitiallyFilled | ||
82 | - has_real_tray = True | ||
83 | + for case in [ TestInitiallyFilled, TestInitiallyEmpty ]: | ||
84 | |||
85 | - def setUp(self): | ||
86 | - self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi') | ||
87 | + attr = { 'media': media, | ||
88 | + 'interface': interface, | ||
89 | + 'has_real_tray': has_real_tray } | ||
90 | |||
91 | -class TestSCSICDInitiallyEmpty(TestInitiallyEmpty): | ||
92 | - TestInitiallyEmpty = TestInitiallyEmpty | ||
93 | - has_real_tray = True | ||
94 | + name = '%s_%s_%s' % (case.__name__, media, interface) | ||
95 | + globals()[name] = type(name, (case, ), attr) | ||
96 | |||
97 | - def setUp(self): | ||
98 | - self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi') | ||
99 | - | ||
100 | -class TestFloppyInitiallyFilled(TestInitiallyFilled): | ||
101 | - TestInitiallyFilled = TestInitiallyFilled | ||
102 | - has_real_tray = False | ||
103 | - | ||
104 | - def setUp(self): | ||
105 | - self.TestInitiallyFilled.setUp(self, 'disk', 'floppy') | ||
106 | - | ||
107 | -class TestFloppyInitiallyEmpty(TestInitiallyEmpty): | ||
108 | - TestInitiallyEmpty = TestInitiallyEmpty | ||
109 | - has_real_tray = False | ||
110 | - | ||
111 | - def setUp(self): | ||
112 | - self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy') | ||
113 | - # FDDs not having a real tray and there not being a medium inside the | ||
114 | - # tray at startup means the tray will be considered open | ||
115 | - self.has_opened = True | ||
116 | +create_basic_test_classes() | ||
117 | |||
118 | class TestChangeReadOnly(ChangeBaseClass): | ||
119 | device_name = 'qdev0' | ||
120 | -- | 79 | -- |
121 | 2.20.1 | 80 | 2.25.3 |
122 | 81 | ||
123 | 82 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com> |
---|---|---|---|
2 | 2 | ||
3 | Currently, bdrv_replace_child_noperm() undrains the parent until it is | 3 | This patch introduces support for PMR that has been defined as part of NVMe 1.4 |
4 | completely undrained, then re-drains it after attaching the new child | 4 | spec. User can now specify a pmrdev option that should point to HostMemoryBackend. |
5 | node. | 5 | pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe |
6 | device. Guest OS can perform mmio read and writes to the PMR region that will stay | ||
7 | persistent across system reboot. | ||
6 | 8 | ||
7 | This is a problem with bdrv_drop_intermediate(): We want to keep the | 9 | Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com> |
8 | whole subtree drained, including parents, while the operation is | 10 | Reviewed-by: Klaus Jensen <k.jensen@samsung.com> |
9 | under way. bdrv_replace_child_noperm() breaks this by allowing every | 11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | parent to become unquiesced briefly, and then redraining it. | 12 | Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com> |
11 | 13 | Reviewed-by: Keith Busch <kbusch@kernel.org> | |
12 | In fact, there is no reason why the parent should become unquiesced and | ||
13 | be allowed to submit requests to the new child node if that new node is | ||
14 | supposed to be kept drained. So if anything, we have to drain the | ||
15 | parent before detaching the old child node. Conversely, we have to | ||
16 | undrain it only after attaching the new child node. | ||
17 | |||
18 | Thus, change the whole drain algorithm here: Calculate the number of | ||
19 | times we have to drain/undrain the parent before replacing the child | ||
20 | node then drain it (if necessary), replace the child node, and then | ||
21 | undrain it. | ||
22 | |||
23 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
24 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
25 | --- | 15 | --- |
26 | block.c | 49 +++++++++++++++++++++++++++++++++---------------- | 16 | hw/block/nvme.h | 2 + |
27 | 1 file changed, 33 insertions(+), 16 deletions(-) | 17 | include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++ |
18 | hw/block/nvme.c | 109 ++++++++++++++++++++++++++ | ||
19 | hw/block/Makefile.objs | 2 +- | ||
20 | hw/block/trace-events | 4 + | ||
21 | 5 files changed, 288 insertions(+), 1 deletion(-) | ||
28 | 22 | ||
29 | diff --git a/block.c b/block.c | 23 | diff --git a/hw/block/nvme.h b/hw/block/nvme.h |
30 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/block.c | 25 | --- a/hw/block/nvme.h |
32 | +++ b/block.c | 26 | +++ b/hw/block/nvme.h |
33 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | 27 | @@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl { |
34 | BlockDriverState *new_bs) | 28 | uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ |
35 | { | 29 | |
36 | BlockDriverState *old_bs = child->bs; | 30 | char *serial; |
37 | - int i; | 31 | + HostMemoryBackend *pmrdev; |
38 | + int new_bs_quiesce_counter; | 32 | + |
39 | + int drain_saldo; | 33 | NvmeNamespace *namespaces; |
40 | 34 | NvmeSQueue **sq; | |
41 | assert(!child->frozen); | 35 | NvmeCQueue **cq; |
42 | 36 | diff --git a/include/block/nvme.h b/include/block/nvme.h | |
43 | if (old_bs && new_bs) { | 37 | index XXXXXXX..XXXXXXX 100644 |
44 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | 38 | --- a/include/block/nvme.h |
39 | +++ b/include/block/nvme.h | ||
40 | @@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar { | ||
41 | uint64_t acq; | ||
42 | uint32_t cmbloc; | ||
43 | uint32_t cmbsz; | ||
44 | + uint8_t padding[3520]; /* not used by QEMU */ | ||
45 | + uint32_t pmrcap; | ||
46 | + uint32_t pmrctl; | ||
47 | + uint32_t pmrsts; | ||
48 | + uint32_t pmrebs; | ||
49 | + uint32_t pmrswtp; | ||
50 | + uint32_t pmrmsc; | ||
51 | } NvmeBar; | ||
52 | |||
53 | enum NvmeCapShift { | ||
54 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapShift { | ||
55 | CAP_CSS_SHIFT = 37, | ||
56 | CAP_MPSMIN_SHIFT = 48, | ||
57 | CAP_MPSMAX_SHIFT = 52, | ||
58 | + CAP_PMR_SHIFT = 56, | ||
59 | }; | ||
60 | |||
61 | enum NvmeCapMask { | ||
62 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapMask { | ||
63 | CAP_CSS_MASK = 0xff, | ||
64 | CAP_MPSMIN_MASK = 0xf, | ||
65 | CAP_MPSMAX_MASK = 0xf, | ||
66 | + CAP_PMR_MASK = 0x1, | ||
67 | }; | ||
68 | |||
69 | #define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) | ||
70 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapMask { | ||
71 | << CAP_MPSMIN_SHIFT) | ||
72 | #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ | ||
73 | << CAP_MPSMAX_SHIFT) | ||
74 | +#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\ | ||
75 | + << CAP_PMR_SHIFT) | ||
76 | |||
77 | enum NvmeCcShift { | ||
78 | CC_EN_SHIFT = 0, | ||
79 | @@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask { | ||
80 | #define NVME_CMBSZ_GETSIZE(cmbsz) \ | ||
81 | (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) | ||
82 | |||
83 | +enum NvmePmrcapShift { | ||
84 | + PMRCAP_RDS_SHIFT = 3, | ||
85 | + PMRCAP_WDS_SHIFT = 4, | ||
86 | + PMRCAP_BIR_SHIFT = 5, | ||
87 | + PMRCAP_PMRTU_SHIFT = 8, | ||
88 | + PMRCAP_PMRWBM_SHIFT = 10, | ||
89 | + PMRCAP_PMRTO_SHIFT = 16, | ||
90 | + PMRCAP_CMSS_SHIFT = 24, | ||
91 | +}; | ||
92 | + | ||
93 | +enum NvmePmrcapMask { | ||
94 | + PMRCAP_RDS_MASK = 0x1, | ||
95 | + PMRCAP_WDS_MASK = 0x1, | ||
96 | + PMRCAP_BIR_MASK = 0x7, | ||
97 | + PMRCAP_PMRTU_MASK = 0x3, | ||
98 | + PMRCAP_PMRWBM_MASK = 0xf, | ||
99 | + PMRCAP_PMRTO_MASK = 0xff, | ||
100 | + PMRCAP_CMSS_MASK = 0x1, | ||
101 | +}; | ||
102 | + | ||
103 | +#define NVME_PMRCAP_RDS(pmrcap) \ | ||
104 | + ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK) | ||
105 | +#define NVME_PMRCAP_WDS(pmrcap) \ | ||
106 | + ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK) | ||
107 | +#define NVME_PMRCAP_BIR(pmrcap) \ | ||
108 | + ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK) | ||
109 | +#define NVME_PMRCAP_PMRTU(pmrcap) \ | ||
110 | + ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK) | ||
111 | +#define NVME_PMRCAP_PMRWBM(pmrcap) \ | ||
112 | + ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK) | ||
113 | +#define NVME_PMRCAP_PMRTO(pmrcap) \ | ||
114 | + ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK) | ||
115 | +#define NVME_PMRCAP_CMSS(pmrcap) \ | ||
116 | + ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK) | ||
117 | + | ||
118 | +#define NVME_PMRCAP_SET_RDS(pmrcap, val) \ | ||
119 | + (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT) | ||
120 | +#define NVME_PMRCAP_SET_WDS(pmrcap, val) \ | ||
121 | + (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT) | ||
122 | +#define NVME_PMRCAP_SET_BIR(pmrcap, val) \ | ||
123 | + (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT) | ||
124 | +#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \ | ||
125 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT) | ||
126 | +#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \ | ||
127 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT) | ||
128 | +#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \ | ||
129 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT) | ||
130 | +#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \ | ||
131 | + (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT) | ||
132 | + | ||
133 | +enum NvmePmrctlShift { | ||
134 | + PMRCTL_EN_SHIFT = 0, | ||
135 | +}; | ||
136 | + | ||
137 | +enum NvmePmrctlMask { | ||
138 | + PMRCTL_EN_MASK = 0x1, | ||
139 | +}; | ||
140 | + | ||
141 | +#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK) | ||
142 | + | ||
143 | +#define NVME_PMRCTL_SET_EN(pmrctl, val) \ | ||
144 | + (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT) | ||
145 | + | ||
146 | +enum NvmePmrstsShift { | ||
147 | + PMRSTS_ERR_SHIFT = 0, | ||
148 | + PMRSTS_NRDY_SHIFT = 8, | ||
149 | + PMRSTS_HSTS_SHIFT = 9, | ||
150 | + PMRSTS_CBAI_SHIFT = 12, | ||
151 | +}; | ||
152 | + | ||
153 | +enum NvmePmrstsMask { | ||
154 | + PMRSTS_ERR_MASK = 0xff, | ||
155 | + PMRSTS_NRDY_MASK = 0x1, | ||
156 | + PMRSTS_HSTS_MASK = 0x7, | ||
157 | + PMRSTS_CBAI_MASK = 0x1, | ||
158 | +}; | ||
159 | + | ||
160 | +#define NVME_PMRSTS_ERR(pmrsts) \ | ||
161 | + ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK) | ||
162 | +#define NVME_PMRSTS_NRDY(pmrsts) \ | ||
163 | + ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK) | ||
164 | +#define NVME_PMRSTS_HSTS(pmrsts) \ | ||
165 | + ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK) | ||
166 | +#define NVME_PMRSTS_CBAI(pmrsts) \ | ||
167 | + ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK) | ||
168 | + | ||
169 | +#define NVME_PMRSTS_SET_ERR(pmrsts, val) \ | ||
170 | + (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT) | ||
171 | +#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \ | ||
172 | + (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT) | ||
173 | +#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \ | ||
174 | + (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT) | ||
175 | +#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \ | ||
176 | + (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT) | ||
177 | + | ||
178 | +enum NvmePmrebsShift { | ||
179 | + PMREBS_PMRSZU_SHIFT = 0, | ||
180 | + PMREBS_RBB_SHIFT = 4, | ||
181 | + PMREBS_PMRWBZ_SHIFT = 8, | ||
182 | +}; | ||
183 | + | ||
184 | +enum NvmePmrebsMask { | ||
185 | + PMREBS_PMRSZU_MASK = 0xf, | ||
186 | + PMREBS_RBB_MASK = 0x1, | ||
187 | + PMREBS_PMRWBZ_MASK = 0xffffff, | ||
188 | +}; | ||
189 | + | ||
190 | +#define NVME_PMREBS_PMRSZU(pmrebs) \ | ||
191 | + ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK) | ||
192 | +#define NVME_PMREBS_RBB(pmrebs) \ | ||
193 | + ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK) | ||
194 | +#define NVME_PMREBS_PMRWBZ(pmrebs) \ | ||
195 | + ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK) | ||
196 | + | ||
197 | +#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \ | ||
198 | + (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT) | ||
199 | +#define NVME_PMREBS_SET_RBB(pmrebs, val) \ | ||
200 | + (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT) | ||
201 | +#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \ | ||
202 | + (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT) | ||
203 | + | ||
204 | +enum NvmePmrswtpShift { | ||
205 | + PMRSWTP_PMRSWTU_SHIFT = 0, | ||
206 | + PMRSWTP_PMRSWTV_SHIFT = 8, | ||
207 | +}; | ||
208 | + | ||
209 | +enum NvmePmrswtpMask { | ||
210 | + PMRSWTP_PMRSWTU_MASK = 0xf, | ||
211 | + PMRSWTP_PMRSWTV_MASK = 0xffffff, | ||
212 | +}; | ||
213 | + | ||
214 | +#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \ | ||
215 | + ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK) | ||
216 | +#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \ | ||
217 | + ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK) | ||
218 | + | ||
219 | +#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \ | ||
220 | + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT) | ||
221 | +#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ | ||
222 | + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) | ||
223 | + | ||
224 | +enum NvmePmrmscShift { | ||
225 | + PMRMSC_CMSE_SHIFT = 1, | ||
226 | + PMRMSC_CBA_SHIFT = 12, | ||
227 | +}; | ||
228 | + | ||
229 | +enum NvmePmrmscMask { | ||
230 | + PMRMSC_CMSE_MASK = 0x1, | ||
231 | + PMRMSC_CBA_MASK = 0xfffffffffffff, | ||
232 | +}; | ||
233 | + | ||
234 | +#define NVME_PMRMSC_CMSE(pmrmsc) \ | ||
235 | + ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) | ||
236 | +#define NVME_PMRMSC_CBA(pmrmsc) \ | ||
237 | + ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) | ||
238 | + | ||
239 | +#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ | ||
240 | + (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) | ||
241 | +#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ | ||
242 | + (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) | ||
243 | + | ||
244 | typedef struct NvmeCmd { | ||
245 | uint8_t opcode; | ||
246 | uint8_t fuse; | ||
247 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c | ||
248 | index XXXXXXX..XXXXXXX 100644 | ||
249 | --- a/hw/block/nvme.c | ||
250 | +++ b/hw/block/nvme.c | ||
251 | @@ -XXX,XX +XXX,XX @@ | ||
252 | * -drive file=<file>,if=none,id=<drive_id> | ||
253 | * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ | ||
254 | * cmb_size_mb=<cmb_size_mb[optional]>, \ | ||
255 | + * [pmrdev=<mem_backend_file_id>,] \ | ||
256 | * num_queues=<N[optional]> | ||
257 | * | ||
258 | * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at | ||
259 | * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. | ||
260 | + * | ||
261 | + * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation | ||
262 | + * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when | ||
263 | + * both provided. | ||
264 | + * Enabling pmr emulation can be achieved by pointing to memory-backend-file. | ||
265 | + * For example: | ||
266 | + * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \ | ||
267 | + * size=<size> .... -device nvme,...,pmrdev=<mem_id> | ||
268 | */ | ||
269 | |||
270 | #include "qemu/osdep.h" | ||
271 | @@ -XXX,XX +XXX,XX @@ | ||
272 | #include "sysemu/sysemu.h" | ||
273 | #include "qapi/error.h" | ||
274 | #include "qapi/visitor.h" | ||
275 | +#include "sysemu/hostmem.h" | ||
276 | #include "sysemu/block-backend.h" | ||
277 | +#include "exec/ram_addr.h" | ||
278 | |||
279 | #include "qemu/log.h" | ||
280 | #include "qemu/module.h" | ||
281 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
282 | NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
283 | "invalid write to read only CMBSZ, ignored"); | ||
284 | return; | ||
285 | + case 0xE00: /* PMRCAP */ | ||
286 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, | ||
287 | + "invalid write to PMRCAP register, ignored"); | ||
288 | + return; | ||
289 | + case 0xE04: /* TODO PMRCTL */ | ||
290 | + break; | ||
291 | + case 0xE08: /* PMRSTS */ | ||
292 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, | ||
293 | + "invalid write to PMRSTS register, ignored"); | ||
294 | + return; | ||
295 | + case 0xE0C: /* PMREBS */ | ||
296 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, | ||
297 | + "invalid write to PMREBS register, ignored"); | ||
298 | + return; | ||
299 | + case 0xE10: /* PMRSWTP */ | ||
300 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, | ||
301 | + "invalid write to PMRSWTP register, ignored"); | ||
302 | + return; | ||
303 | + case 0xE14: /* TODO PMRMSC */ | ||
304 | + break; | ||
305 | default: | ||
306 | NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
307 | "invalid MMIO write," | ||
308 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
45 | } | 309 | } |
46 | + | 310 | |
47 | + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); | 311 | if (addr < sizeof(n->bar)) { |
48 | + drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; | 312 | + /* |
49 | + | 313 | + * When PMRWBM bit 1 is set then read from |
50 | + /* | 314 | + * from PMRSTS should ensure prior writes |
51 | + * If the new child node is drained but the old one was not, flush | 315 | + * made it to persistent media |
52 | + * all outstanding requests to the old child node. | 316 | + */ |
53 | + */ | 317 | + if (addr == 0xE08 && |
54 | + while (drain_saldo > 0 && child->role->drained_begin) { | 318 | + (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { |
55 | + bdrv_parent_drained_begin_single(child, true); | 319 | + qemu_ram_writeback(n->pmrdev->mr.ram_block, |
56 | + drain_saldo--; | 320 | + 0, n->pmrdev->size); |
321 | + } | ||
322 | memcpy(&val, ptr + addr, size); | ||
323 | } else { | ||
324 | NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
325 | @@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) | ||
326 | error_setg(errp, "serial property not set"); | ||
327 | return; | ||
328 | } | ||
329 | + | ||
330 | + if (!n->cmb_size_mb && n->pmrdev) { | ||
331 | + if (host_memory_backend_is_mapped(n->pmrdev)) { | ||
332 | + char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); | ||
333 | + error_setg(errp, "can't use already busy memdev: %s", path); | ||
334 | + g_free(path); | ||
335 | + return; | ||
336 | + } | ||
337 | + | ||
338 | + if (!is_power_of_2(n->pmrdev->size)) { | ||
339 | + error_setg(errp, "pmr backend size needs to be power of 2 in size"); | ||
340 | + return; | ||
341 | + } | ||
342 | + | ||
343 | + host_memory_backend_set_mapped(n->pmrdev, true); | ||
57 | + } | 344 | + } |
58 | + | 345 | + |
59 | if (old_bs) { | 346 | blkconf_blocksizes(&n->conf); |
60 | /* Detach first so that the recursive drain sections coming from @child | 347 | if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), |
61 | * are already gone and we only end the drain sections that came from | 348 | false, errp)) { |
62 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | 349 | @@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) |
63 | if (child->role->detach) { | 350 | PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | |
64 | child->role->detach(child); | 351 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); |
65 | } | 352 | |
66 | - while (child->parent_quiesce_counter) { | 353 | + } else if (n->pmrdev) { |
67 | - bdrv_parent_drained_end_single(child); | 354 | + /* Controller Capabilities register */ |
68 | - } | 355 | + NVME_CAP_SET_PMRS(n->bar.cap, 1); |
69 | QLIST_REMOVE(child, next_parent); | 356 | + |
70 | - } else { | 357 | + /* PMR Capabities register */ |
71 | - assert(child->parent_quiesce_counter == 0); | 358 | + n->bar.pmrcap = 0; |
359 | + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); | ||
360 | + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); | ||
361 | + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); | ||
362 | + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); | ||
363 | + /* Turn on bit 1 support */ | ||
364 | + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); | ||
365 | + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); | ||
366 | + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); | ||
367 | + | ||
368 | + /* PMR Control register */ | ||
369 | + n->bar.pmrctl = 0; | ||
370 | + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); | ||
371 | + | ||
372 | + /* PMR Status register */ | ||
373 | + n->bar.pmrsts = 0; | ||
374 | + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); | ||
375 | + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); | ||
376 | + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); | ||
377 | + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); | ||
378 | + | ||
379 | + /* PMR Elasticity Buffer Size register */ | ||
380 | + n->bar.pmrebs = 0; | ||
381 | + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); | ||
382 | + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); | ||
383 | + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); | ||
384 | + | ||
385 | + /* PMR Sustained Write Throughput register */ | ||
386 | + n->bar.pmrswtp = 0; | ||
387 | + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); | ||
388 | + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); | ||
389 | + | ||
390 | + /* PMR Memory Space Control register */ | ||
391 | + n->bar.pmrmsc = 0; | ||
392 | + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); | ||
393 | + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); | ||
394 | + | ||
395 | + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), | ||
396 | + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | | ||
397 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); | ||
72 | } | 398 | } |
73 | 399 | ||
74 | child->bs = new_bs; | 400 | for (i = 0; i < n->num_namespaces; i++) { |
75 | 401 | @@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev) | |
76 | if (new_bs) { | 402 | if (n->cmb_size_mb) { |
77 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | 403 | g_free(n->cmbuf); |
78 | - if (new_bs->quiesce_counter) { | ||
79 | - int num = new_bs->quiesce_counter; | ||
80 | - if (child->role->parent_is_bds) { | ||
81 | - num -= bdrv_drain_all_count; | ||
82 | - } | ||
83 | - assert(num >= 0); | ||
84 | - for (i = 0; i < num; i++) { | ||
85 | - bdrv_parent_drained_begin_single(child, true); | ||
86 | - } | ||
87 | - } | ||
88 | + | ||
89 | + /* | ||
90 | + * Detaching the old node may have led to the new node's | ||
91 | + * quiesce_counter having been decreased. Not a problem, we | ||
92 | + * just need to recognize this here and then invoke | ||
93 | + * drained_end appropriately more often. | ||
94 | + */ | ||
95 | + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); | ||
96 | + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; | ||
97 | |||
98 | /* Attach only after starting new drained sections, so that recursive | ||
99 | * drain sections coming from @child don't get an extra .drained_begin | ||
100 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
101 | child->role->attach(child); | ||
102 | } | ||
103 | } | 404 | } |
104 | + | 405 | + |
105 | + /* | 406 | + if (n->pmrdev) { |
106 | + * If the old child node was drained but the new one is not, allow | 407 | + host_memory_backend_set_mapped(n->pmrdev, false); |
107 | + * requests to come in only after the new node has been attached. | ||
108 | + */ | ||
109 | + while (drain_saldo < 0 && child->role->drained_end) { | ||
110 | + bdrv_parent_drained_end_single(child); | ||
111 | + drain_saldo++; | ||
112 | + } | 408 | + } |
409 | msix_uninit_exclusive_bar(pci_dev); | ||
113 | } | 410 | } |
114 | 411 | ||
115 | /* | 412 | static Property nvme_props[] = { |
413 | DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), | ||
414 | + DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, | ||
415 | + HostMemoryBackend *), | ||
416 | DEFINE_PROP_STRING("serial", NvmeCtrl, serial), | ||
417 | DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), | ||
418 | DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), | ||
419 | diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs | ||
420 | index XXXXXXX..XXXXXXX 100644 | ||
421 | --- a/hw/block/Makefile.objs | ||
422 | +++ b/hw/block/Makefile.objs | ||
423 | @@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o | ||
424 | common-obj-$(CONFIG_XEN) += xen-block.o | ||
425 | common-obj-$(CONFIG_ECC) += ecc.o | ||
426 | common-obj-$(CONFIG_ONENAND) += onenand.o | ||
427 | -common-obj-$(CONFIG_NVME_PCI) += nvme.o | ||
428 | common-obj-$(CONFIG_SWIM) += swim.o | ||
429 | |||
430 | common-obj-$(CONFIG_SH4) += tc58128.o | ||
431 | |||
432 | obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o | ||
433 | obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o | ||
434 | +obj-$(CONFIG_NVME_PCI) += nvme.o | ||
435 | |||
436 | obj-y += dataplane/ | ||
437 | diff --git a/hw/block/trace-events b/hw/block/trace-events | ||
438 | index XXXXXXX..XXXXXXX 100644 | ||
439 | --- a/hw/block/trace-events | ||
440 | +++ b/hw/block/trace-events | ||
441 | @@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA | ||
442 | nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
443 | nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
444 | nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
445 | +nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" | ||
446 | +nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" | ||
447 | +nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" | ||
448 | +nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" | ||
449 | nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
450 | nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
451 | nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
116 | -- | 452 | -- |
117 | 2.20.1 | 453 | 2.25.3 |
118 | 454 | ||
119 | 455 | diff view generated by jsdifflib |
1 | The same change as commit 2b23f28639 ('block/copy-on-read: Fix | 1 | The QMP handler qmp_object_add() and the implementation of --object in |
---|---|---|---|
2 | permissions for inactive node') made for the copy-on-read driver can be | 2 | qemu-storage-daemon can share most of the code. Currently, |
3 | made for bdrv_filter_default_perms(): Retaining the old permissions from | 3 | qemu-storage-daemon calls qmp_object_add(), but this is not correct |
4 | the BdrvChild if it is given complicates things unnecessarily when in | 4 | because different visitors need to be used. |
5 | the end this only means that the options set in the c == NULL case (i.e. | 5 | |
6 | during child creation) are retained. | 6 | As a first step towards a fix, make qmp_object_add() a wrapper around a |
7 | new function user_creatable_add_dict() that can get an additional | ||
8 | parameter. The handling of "props" is only required for compatibility | ||
9 | and not required for the qemu-storage-daemon command line, so it stays | ||
10 | in qmp_object_add(). | ||
7 | 11 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
11 | --- | 13 | --- |
12 | block.c | 12 ++---------- | 14 | include/qom/object_interfaces.h | 12 ++++++++++++ |
13 | 1 file changed, 2 insertions(+), 10 deletions(-) | 15 | qom/object_interfaces.c | 27 +++++++++++++++++++++++++++ |
16 | qom/qom-qmp-cmds.c | 24 +----------------------- | ||
17 | 3 files changed, 40 insertions(+), 23 deletions(-) | ||
14 | 18 | ||
15 | diff --git a/block.c b/block.c | 19 | diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block.c | 21 | --- a/include/qom/object_interfaces.h |
18 | +++ b/block.c | 22 | +++ b/include/qom/object_interfaces.h |
19 | @@ -XXX,XX +XXX,XX @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, | 23 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, |
20 | uint64_t perm, uint64_t shared, | 24 | const QDict *qdict, |
21 | uint64_t *nperm, uint64_t *nshared) | 25 | Visitor *v, Error **errp); |
26 | |||
27 | +/** | ||
28 | + * user_creatable_add_dict: | ||
29 | + * @qdict: the object definition | ||
30 | + * @errp: if an error occurs, a pointer to an area to store the error | ||
31 | + * | ||
32 | + * Create an instance of the user creatable object that is defined by | ||
33 | + * @qdict. The object type is taken from the QDict key 'qom-type', its | ||
34 | + * ID from the key 'id'. The remaining entries in @qdict are used to | ||
35 | + * initialize the object properties. | ||
36 | + */ | ||
37 | +void user_creatable_add_dict(QDict *qdict, Error **errp); | ||
38 | + | ||
39 | /** | ||
40 | * user_creatable_add_opts: | ||
41 | * @opts: the object definition | ||
42 | diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/qom/object_interfaces.c | ||
45 | +++ b/qom/object_interfaces.c | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | #include "qapi/qmp/qerror.h" | ||
48 | #include "qapi/qmp/qjson.h" | ||
49 | #include "qapi/qmp/qstring.h" | ||
50 | +#include "qapi/qobject-input-visitor.h" | ||
51 | #include "qom/object_interfaces.h" | ||
52 | #include "qemu/help_option.h" | ||
53 | #include "qemu/module.h" | ||
54 | @@ -XXX,XX +XXX,XX @@ out: | ||
55 | return obj; | ||
56 | } | ||
57 | |||
58 | +void user_creatable_add_dict(QDict *qdict, Error **errp) | ||
59 | +{ | ||
60 | + Visitor *v; | ||
61 | + Object *obj; | ||
62 | + g_autofree char *type = NULL; | ||
63 | + g_autofree char *id = NULL; | ||
64 | + | ||
65 | + type = g_strdup(qdict_get_try_str(qdict, "qom-type")); | ||
66 | + if (!type) { | ||
67 | + error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); | ||
68 | + return; | ||
69 | + } | ||
70 | + qdict_del(qdict, "qom-type"); | ||
71 | + | ||
72 | + id = g_strdup(qdict_get_try_str(qdict, "id")); | ||
73 | + if (!id) { | ||
74 | + error_setg(errp, QERR_MISSING_PARAMETER, "id"); | ||
75 | + return; | ||
76 | + } | ||
77 | + qdict_del(qdict, "id"); | ||
78 | + | ||
79 | + v = qobject_input_visitor_new(QOBJECT(qdict)); | ||
80 | + obj = user_creatable_add_type(type, id, qdict, v, errp); | ||
81 | + visit_free(v); | ||
82 | + object_unref(obj); | ||
83 | +} | ||
84 | |||
85 | Object *user_creatable_add_opts(QemuOpts *opts, Error **errp) | ||
22 | { | 86 | { |
23 | - if (c == NULL) { | 87 | diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c |
24 | - *nperm = perm & DEFAULT_PERM_PASSTHROUGH; | 88 | index XXXXXXX..XXXXXXX 100644 |
25 | - *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; | 89 | --- a/qom/qom-qmp-cmds.c |
90 | +++ b/qom/qom-qmp-cmds.c | ||
91 | @@ -XXX,XX +XXX,XX @@ | ||
92 | #include "qapi/qapi-commands-qom.h" | ||
93 | #include "qapi/qmp/qdict.h" | ||
94 | #include "qapi/qmp/qerror.h" | ||
95 | -#include "qapi/qobject-input-visitor.h" | ||
96 | #include "qemu/cutils.h" | ||
97 | #include "qom/object_interfaces.h" | ||
98 | #include "qom/qom-qobject.h" | ||
99 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
100 | { | ||
101 | QObject *props; | ||
102 | QDict *pdict; | ||
103 | - Visitor *v; | ||
104 | - Object *obj; | ||
105 | - g_autofree char *type = NULL; | ||
106 | - g_autofree char *id = NULL; | ||
107 | - | ||
108 | - type = g_strdup(qdict_get_try_str(qdict, "qom-type")); | ||
109 | - if (!type) { | ||
110 | - error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); | ||
26 | - return; | 111 | - return; |
27 | - } | 112 | - } |
113 | - qdict_del(qdict, "qom-type"); | ||
28 | - | 114 | - |
29 | - *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | | 115 | - id = g_strdup(qdict_get_try_str(qdict, "id")); |
30 | - (c->perm & DEFAULT_PERM_UNCHANGED); | 116 | - if (!id) { |
31 | - *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | | 117 | - error_setg(errp, QERR_MISSING_PARAMETER, "id"); |
32 | - (c->shared_perm & DEFAULT_PERM_UNCHANGED); | 118 | - return; |
33 | + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; | 119 | - } |
34 | + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; | 120 | - qdict_del(qdict, "id"); |
121 | |||
122 | props = qdict_get(qdict, "props"); | ||
123 | if (props) { | ||
124 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
125 | qobject_unref(pdict); | ||
126 | } | ||
127 | |||
128 | - v = qobject_input_visitor_new(QOBJECT(qdict)); | ||
129 | - obj = user_creatable_add_type(type, id, qdict, v, errp); | ||
130 | - visit_free(v); | ||
131 | - object_unref(obj); | ||
132 | + user_creatable_add_dict(qdict, errp); | ||
35 | } | 133 | } |
36 | 134 | ||
37 | void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | 135 | void qmp_object_del(const char *id, Error **errp) |
38 | -- | 136 | -- |
39 | 2.20.1 | 137 | 2.25.3 |
40 | 138 | ||
41 | 139 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | After processing the option string with the keyval parser, we get a |
---|---|---|---|
2 | QDict that contains only strings. This QDict must be fed to a keyval | ||
3 | visitor which converts the strings into the right data types. | ||
2 | 4 | ||
3 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 5 | qmp_object_add(), however, uses the normal QObject input visitor, which |
6 | expects a QDict where all properties already have the QType that matches | ||
7 | the data type required by the QOM object type. | ||
8 | |||
9 | Change the --object implementation in qemu-storage-daemon so that it | ||
10 | doesn't call qmp_object_add(), but calls user_creatable_add_dict() | ||
11 | directly instead and pass it a new keyval boolean that decides which | ||
12 | visitor must be used. | ||
13 | |||
14 | Reported-by: Coiby Xu <coiby.xu@gmail.com> | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | --- | 16 | --- |
6 | tests/test-bdrv-drain.c | 167 ++++++++++++++++++++++++++++++++++++++++ | 17 | include/qom/object_interfaces.h | 6 +++++- |
7 | 1 file changed, 167 insertions(+) | 18 | qemu-storage-daemon.c | 4 +--- |
19 | qom/object_interfaces.c | 8 ++++++-- | ||
20 | qom/qom-qmp-cmds.c | 2 +- | ||
21 | 4 files changed, 13 insertions(+), 7 deletions(-) | ||
8 | 22 | ||
9 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 23 | diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h |
10 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tests/test-bdrv-drain.c | 25 | --- a/include/qom/object_interfaces.h |
12 | +++ b/tests/test-bdrv-drain.c | 26 | +++ b/include/qom/object_interfaces.h |
13 | @@ -XXX,XX +XXX,XX @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, | 27 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, |
14 | nperm, nshared); | 28 | /** |
29 | * user_creatable_add_dict: | ||
30 | * @qdict: the object definition | ||
31 | + * @keyval: if true, use a keyval visitor for processing @qdict (i.e. | ||
32 | + * assume that all @qdict values are strings); otherwise, use | ||
33 | + * the normal QObject visitor (i.e. assume all @qdict values | ||
34 | + * have the QType expected by the QOM object type) | ||
35 | * @errp: if an error occurs, a pointer to an area to store the error | ||
36 | * | ||
37 | * Create an instance of the user creatable object that is defined by | ||
38 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, | ||
39 | * ID from the key 'id'. The remaining entries in @qdict are used to | ||
40 | * initialize the object properties. | ||
41 | */ | ||
42 | -void user_creatable_add_dict(QDict *qdict, Error **errp); | ||
43 | +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp); | ||
44 | |||
45 | /** | ||
46 | * user_creatable_add_opts: | ||
47 | diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/qemu-storage-daemon.c | ||
50 | +++ b/qemu-storage-daemon.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[]) | ||
52 | QemuOpts *opts; | ||
53 | const char *type; | ||
54 | QDict *args; | ||
55 | - QObject *ret_data = NULL; | ||
56 | |||
57 | /* FIXME The keyval parser rejects 'help' arguments, so we must | ||
58 | * unconditionall try QemuOpts first. */ | ||
59 | @@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[]) | ||
60 | qemu_opts_del(opts); | ||
61 | |||
62 | args = keyval_parse(optarg, "qom-type", &error_fatal); | ||
63 | - qmp_object_add(args, &ret_data, &error_fatal); | ||
64 | + user_creatable_add_dict(args, true, &error_fatal); | ||
65 | qobject_unref(args); | ||
66 | - qobject_unref(ret_data); | ||
67 | break; | ||
68 | } | ||
69 | default: | ||
70 | diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/qom/object_interfaces.c | ||
73 | +++ b/qom/object_interfaces.c | ||
74 | @@ -XXX,XX +XXX,XX @@ out: | ||
75 | return obj; | ||
15 | } | 76 | } |
16 | 77 | ||
17 | +static int bdrv_test_change_backing_file(BlockDriverState *bs, | 78 | -void user_creatable_add_dict(QDict *qdict, Error **errp) |
18 | + const char *backing_file, | 79 | +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp) |
19 | + const char *backing_fmt) | 80 | { |
20 | +{ | 81 | Visitor *v; |
21 | + return 0; | 82 | Object *obj; |
22 | +} | 83 | @@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp) |
23 | + | 84 | } |
24 | static BlockDriver bdrv_test = { | 85 | qdict_del(qdict, "id"); |
25 | .format_name = "test", | 86 | |
26 | .instance_size = sizeof(BDRVTestState), | 87 | - v = qobject_input_visitor_new(QOBJECT(qdict)); |
27 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { | 88 | + if (keyval) { |
28 | .bdrv_co_drain_end = bdrv_test_co_drain_end, | 89 | + v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); |
29 | 90 | + } else { | |
30 | .bdrv_child_perm = bdrv_test_child_perm, | 91 | + v = qobject_input_visitor_new(QOBJECT(qdict)); |
31 | + | 92 | + } |
32 | + .bdrv_change_backing_file = bdrv_test_change_backing_file, | 93 | obj = user_creatable_add_type(type, id, qdict, v, errp); |
33 | }; | 94 | visit_free(v); |
34 | 95 | object_unref(obj); | |
35 | static void aio_ret_cb(void *opaque, int ret) | 96 | diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c |
36 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_commit_by_drained_end(void) | 97 | index XXXXXXX..XXXXXXX 100644 |
37 | bdrv_unref(bs_child); | 98 | --- a/qom/qom-qmp-cmds.c |
99 | +++ b/qom/qom-qmp-cmds.c | ||
100 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
101 | qobject_unref(pdict); | ||
102 | } | ||
103 | |||
104 | - user_creatable_add_dict(qdict, errp); | ||
105 | + user_creatable_add_dict(qdict, false, errp); | ||
38 | } | 106 | } |
39 | 107 | ||
40 | + | 108 | void qmp_object_del(const char *id, Error **errp) |
41 | +typedef struct TestSimpleBlockJob { | ||
42 | + BlockJob common; | ||
43 | + bool should_complete; | ||
44 | + bool *did_complete; | ||
45 | +} TestSimpleBlockJob; | ||
46 | + | ||
47 | +static int coroutine_fn test_simple_job_run(Job *job, Error **errp) | ||
48 | +{ | ||
49 | + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); | ||
50 | + | ||
51 | + while (!s->should_complete) { | ||
52 | + job_sleep_ns(job, 0); | ||
53 | + } | ||
54 | + | ||
55 | + return 0; | ||
56 | +} | ||
57 | + | ||
58 | +static void test_simple_job_clean(Job *job) | ||
59 | +{ | ||
60 | + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); | ||
61 | + *s->did_complete = true; | ||
62 | +} | ||
63 | + | ||
64 | +static const BlockJobDriver test_simple_job_driver = { | ||
65 | + .job_driver = { | ||
66 | + .instance_size = sizeof(TestSimpleBlockJob), | ||
67 | + .free = block_job_free, | ||
68 | + .user_resume = block_job_user_resume, | ||
69 | + .drain = block_job_drain, | ||
70 | + .run = test_simple_job_run, | ||
71 | + .clean = test_simple_job_clean, | ||
72 | + }, | ||
73 | +}; | ||
74 | + | ||
75 | +static int drop_intermediate_poll_update_filename(BdrvChild *child, | ||
76 | + BlockDriverState *new_base, | ||
77 | + const char *filename, | ||
78 | + Error **errp) | ||
79 | +{ | ||
80 | + /* | ||
81 | + * We are free to poll here, which may change the block graph, if | ||
82 | + * it is not drained. | ||
83 | + */ | ||
84 | + | ||
85 | + /* If the job is not drained: Complete it, schedule job_exit() */ | ||
86 | + aio_poll(qemu_get_current_aio_context(), false); | ||
87 | + /* If the job is not drained: Run job_exit(), finish the job */ | ||
88 | + aio_poll(qemu_get_current_aio_context(), false); | ||
89 | + | ||
90 | + return 0; | ||
91 | +} | ||
92 | + | ||
93 | +/** | ||
94 | + * Test a poll in the midst of bdrv_drop_intermediate(). | ||
95 | + * | ||
96 | + * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(), | ||
97 | + * which can yield or poll. This may lead to graph changes, unless | ||
98 | + * the whole subtree in question is drained. | ||
99 | + * | ||
100 | + * We test this on the following graph: | ||
101 | + * | ||
102 | + * Job | ||
103 | + * | ||
104 | + * | | ||
105 | + * job-node | ||
106 | + * | | ||
107 | + * v | ||
108 | + * | ||
109 | + * job-node | ||
110 | + * | ||
111 | + * | | ||
112 | + * backing | ||
113 | + * | | ||
114 | + * v | ||
115 | + * | ||
116 | + * node-2 --chain--> node-1 --chain--> node-0 | ||
117 | + * | ||
118 | + * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0). | ||
119 | + * | ||
120 | + * This first updates node-2's backing filename by invoking | ||
121 | + * drop_intermediate_poll_update_filename(), which polls twice. This | ||
122 | + * causes the job to finish, which in turns causes the job-node to be | ||
123 | + * deleted. | ||
124 | + * | ||
125 | + * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it | ||
126 | + * already has a pointer to the BdrvChild edge between job-node and | ||
127 | + * node-1. When it tries to handle that edge, we probably get a | ||
128 | + * segmentation fault because the object no longer exists. | ||
129 | + * | ||
130 | + * | ||
131 | + * The solution is for bdrv_drop_intermediate() to drain top's | ||
132 | + * subtree. This prevents graph changes from happening just because | ||
133 | + * BdrvChildRole.update_filename() yields or polls. Thus, the block | ||
134 | + * job is paused during that drained section and must finish before or | ||
135 | + * after. | ||
136 | + * | ||
137 | + * (In addition, bdrv_replace_child() must keep the job paused.) | ||
138 | + */ | ||
139 | +static void test_drop_intermediate_poll(void) | ||
140 | +{ | ||
141 | + static BdrvChildRole chain_child_role; | ||
142 | + BlockDriverState *chain[3]; | ||
143 | + TestSimpleBlockJob *job; | ||
144 | + BlockDriverState *job_node; | ||
145 | + bool job_has_completed = false; | ||
146 | + int i; | ||
147 | + int ret; | ||
148 | + | ||
149 | + chain_child_role = child_backing; | ||
150 | + chain_child_role.update_filename = drop_intermediate_poll_update_filename; | ||
151 | + | ||
152 | + for (i = 0; i < 3; i++) { | ||
153 | + char name[32]; | ||
154 | + snprintf(name, 32, "node-%i", i); | ||
155 | + | ||
156 | + chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort); | ||
157 | + } | ||
158 | + | ||
159 | + job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR, | ||
160 | + &error_abort); | ||
161 | + bdrv_set_backing_hd(job_node, chain[1], &error_abort); | ||
162 | + | ||
163 | + /* | ||
164 | + * Establish the chain last, so the chain links are the first | ||
165 | + * elements in the BDS.parents lists | ||
166 | + */ | ||
167 | + for (i = 0; i < 3; i++) { | ||
168 | + if (i) { | ||
169 | + /* Takes the reference to chain[i - 1] */ | ||
170 | + chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1], | ||
171 | + "chain", &chain_child_role, | ||
172 | + &error_abort); | ||
173 | + } | ||
174 | + } | ||
175 | + | ||
176 | + job = block_job_create("job", &test_simple_job_driver, NULL, job_node, | ||
177 | + 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); | ||
178 | + | ||
179 | + /* The job has a reference now */ | ||
180 | + bdrv_unref(job_node); | ||
181 | + | ||
182 | + job->did_complete = &job_has_completed; | ||
183 | + | ||
184 | + job_start(&job->common.job); | ||
185 | + job->should_complete = true; | ||
186 | + | ||
187 | + g_assert(!job_has_completed); | ||
188 | + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); | ||
189 | + g_assert(ret == 0); | ||
190 | + g_assert(job_has_completed); | ||
191 | + | ||
192 | + bdrv_unref(chain[2]); | ||
193 | +} | ||
194 | + | ||
195 | int main(int argc, char **argv) | ||
196 | { | ||
197 | int ret; | ||
198 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
199 | g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end", | ||
200 | test_blockjob_commit_by_drained_end); | ||
201 | |||
202 | + g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll", | ||
203 | + test_drop_intermediate_poll); | ||
204 | + | ||
205 | ret = g_test_run(); | ||
206 | qemu_event_destroy(&done_event); | ||
207 | return ret; | ||
208 | -- | 109 | -- |
209 | 2.20.1 | 110 | 2.25.3 |
210 | 111 | ||
211 | 112 | diff view generated by jsdifflib |