1 | The following changes since commit af352675efb7e92a1f5f6461a042a12015ab3d12: | 1 | The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/aurel/tags/pull-target-sh4-20171218' into staging (2017-12-19 19:11:11 +0000) | 3 | Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to b4d526c87b26aff6d8b353951aa175a1236ad887: | 9 | for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809: |
10 | 10 | ||
11 | nvme: Add tracing (2017-12-20 11:05:49 +0100) | 11 | qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches | 14 | Block layer patches: |
15 | |||
16 | - Fix resize (extending) of short overlays | ||
17 | - nvme: introduce PMR support from NVMe 1.4 spec | ||
18 | - qemu-storage-daemon: Fix non-string --object properties | ||
15 | 19 | ||
16 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
17 | Doug Gale (1): | 21 | Alberto Garcia (1): |
18 | nvme: Add tracing | 22 | qcow2: Add incompatibility note between backing files and raw external data files |
19 | 23 | ||
20 | Edgar Kaziakhmedov (1): | 24 | Andrzej Jakowski (1): |
21 | qcow2: get rid of qcow2_backing_read1 routine | 25 | nvme: introduce PMR support from NVMe 1.4 spec |
22 | 26 | ||
23 | Fam Zheng (1): | 27 | Kevin Wolf (12): |
24 | qemu-img: Document --force-share / -U | 28 | block: Add flags to BlockDriver.bdrv_co_truncate() |
29 | block: Add flags to bdrv(_co)_truncate() | ||
30 | block-backend: Add flags to blk_truncate() | ||
31 | qcow2: Support BDRV_REQ_ZERO_WRITE for truncate | ||
32 | raw-format: Support BDRV_REQ_ZERO_WRITE for truncate | ||
33 | file-posix: Support BDRV_REQ_ZERO_WRITE for truncate | ||
34 | block: truncate: Don't make backing file data visible | ||
35 | iotests: Filter testfiles out in filter_img_info() | ||
36 | iotests: Test committing to short backing file | ||
37 | qcow2: Forward ZERO_WRITE flag for full preallocation | ||
38 | qom: Factor out user_creatable_add_dict() | ||
39 | qemu-storage-daemon: Fix non-string --object properties | ||
25 | 40 | ||
26 | John Snow (1): | 41 | Paolo Bonzini (1): |
27 | iotests: fix 197 for vpc | 42 | qemu-iotests: allow qcow2 external discarded clusters to contain stale data |
28 | 43 | ||
29 | Kevin Wolf (9): | 44 | docs/interop/qcow2.txt | 3 + |
30 | block: Formats don't need CONSISTENT_READ with NO_IO | 45 | hw/block/nvme.h | 2 + |
31 | block: Make bdrv_drain_invoke() recursive | 46 | include/block/block.h | 5 +- |
32 | block: Call .drain_begin only once in bdrv_drain_all_begin() | 47 | include/block/block_int.h | 10 +- |
33 | test-bdrv-drain: Test BlockDriver callbacks for drain | 48 | include/block/nvme.h | 172 ++++++++++++++++++++++++++ |
34 | block: bdrv_drain_recurse(): Remove unused begin parameter | 49 | include/qom/object_interfaces.h | 16 +++ |
35 | block: Don't wait for requests in bdrv_drain*_end() | 50 | include/sysemu/block-backend.h | 2 +- |
36 | block: Unify order in drain functions | 51 | block.c | 3 +- |
37 | block: Don't acquire AioContext in hmp_qemu_io() | 52 | block/block-backend.c | 4 +- |
38 | block: Document that x-blockdev-change breaks quorum children list | 53 | block/commit.c | 4 +- |
54 | block/crypto.c | 7 +- | ||
55 | block/file-posix.c | 6 +- | ||
56 | block/file-win32.c | 2 +- | ||
57 | block/gluster.c | 1 + | ||
58 | block/io.c | 43 ++++++- | ||
59 | block/iscsi.c | 2 +- | ||
60 | block/mirror.c | 2 +- | ||
61 | block/nfs.c | 3 +- | ||
62 | block/parallels.c | 6 +- | ||
63 | block/qcow.c | 4 +- | ||
64 | block/qcow2-cluster.c | 2 +- | ||
65 | block/qcow2-refcount.c | 2 +- | ||
66 | block/qcow2.c | 73 +++++++++-- | ||
67 | block/qed.c | 3 +- | ||
68 | block/raw-format.c | 6 +- | ||
69 | block/rbd.c | 1 + | ||
70 | block/sheepdog.c | 4 +- | ||
71 | block/ssh.c | 2 +- | ||
72 | block/vdi.c | 2 +- | ||
73 | block/vhdx-log.c | 2 +- | ||
74 | block/vhdx.c | 6 +- | ||
75 | block/vmdk.c | 8 +- | ||
76 | block/vpc.c | 2 +- | ||
77 | blockdev.c | 2 +- | ||
78 | hw/block/nvme.c | 109 ++++++++++++++++ | ||
79 | qemu-img.c | 2 +- | ||
80 | qemu-io-cmds.c | 2 +- | ||
81 | qemu-storage-daemon.c | 4 +- | ||
82 | qom/object_interfaces.c | 31 +++++ | ||
83 | qom/qom-qmp-cmds.c | 24 +--- | ||
84 | tests/test-block-iothread.c | 9 +- | ||
85 | tests/qemu-iotests/iotests.py | 5 +- | ||
86 | hw/block/Makefile.objs | 2 +- | ||
87 | hw/block/trace-events | 4 + | ||
88 | tests/qemu-iotests/244 | 10 +- | ||
89 | tests/qemu-iotests/244.out | 9 +- | ||
90 | tests/qemu-iotests/274 | 155 +++++++++++++++++++++++ | ||
91 | tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++ | ||
92 | tests/qemu-iotests/group | 1 + | ||
93 | 49 files changed, 951 insertions(+), 96 deletions(-) | ||
94 | create mode 100755 tests/qemu-iotests/274 | ||
95 | create mode 100644 tests/qemu-iotests/274.out | ||
39 | 96 | ||
40 | Mao Zhongyi (1): | ||
41 | hw/block/nvme: Convert to realize | ||
42 | 97 | ||
43 | qapi/block-core.json | 4 + | ||
44 | block/qcow2.h | 3 - | ||
45 | block.c | 6 +- | ||
46 | block/io.c | 31 ++-- | ||
47 | block/qcow2.c | 51 +----- | ||
48 | hmp.c | 6 - | ||
49 | hw/block/nvme.c | 367 ++++++++++++++++++++++++++++++++------- | ||
50 | tests/test-bdrv-drain.c | 137 +++++++++++++++ | ||
51 | hw/block/trace-events | 93 ++++++++++ | ||
52 | qemu-img.texi | 9 + | ||
53 | tests/Makefile.include | 2 + | ||
54 | tests/qemu-iotests/197 | 4 + | ||
55 | tests/qemu-iotests/common.filter | 3 +- | ||
56 | 13 files changed, 591 insertions(+), 125 deletions(-) | ||
57 | create mode 100644 tests/test-bdrv-drain.c | ||
58 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alberto Garcia <berto@igalia.com> | ||
1 | 2 | ||
3 | Backing files and raw external data files are mutually exclusive. | ||
4 | The documentation of the raw external data bit (in autoclear_features) | ||
5 | already indicates that, but we should also mention it on the other | ||
6 | side. | ||
7 | |||
8 | Suggested-by: Eric Blake <eblake@redhat.com> | ||
9 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
10 | Message-Id: <20200410121816.8334-1-berto@igalia.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | ||
14 | docs/interop/qcow2.txt | 3 +++ | ||
15 | 1 file changed, 3 insertions(+) | ||
16 | |||
17 | diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/docs/interop/qcow2.txt | ||
20 | +++ b/docs/interop/qcow2.txt | ||
21 | @@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header: | ||
22 | is stored (NB: The string is not null terminated). 0 if the | ||
23 | image doesn't have a backing file. | ||
24 | |||
25 | + Note: backing files are incompatible with raw external data | ||
26 | + files (auto-clear feature bit 1). | ||
27 | + | ||
28 | 16 - 19: backing_file_size | ||
29 | Length of the backing file name in bytes. Must not be | ||
30 | longer than 1023 bytes. Undefined if the image doesn't have | ||
31 | -- | ||
32 | 2.25.3 | ||
33 | |||
34 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Fam Zheng <famz@redhat.com> | 3 | Test 244 checks the expected behavior of qcow2 external data files |
4 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 4 | with respect to zero and discarded clusters. Filesystems however |
5 | Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com> | 5 | are free to ignore discard requests, and this seems to be the |
6 | case for overlayfs. Relax the tests to skip checks on the | ||
7 | external data file for discarded areas, which implies not using | ||
8 | qemu-img compare in the data_file_raw=on case. | ||
9 | |||
10 | This fixes docker tests on RHEL8. | ||
11 | |||
12 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
13 | Cc: qemu-block@nongnu.org | ||
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Message-Id: <20200409191006.24429-1-pbonzini@redhat.com> | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
7 | --- | 17 | --- |
8 | qemu-img.texi | 9 +++++++++ | 18 | tests/qemu-iotests/244 | 10 ++++++++-- |
9 | 1 file changed, 9 insertions(+) | 19 | tests/qemu-iotests/244.out | 9 ++++++--- |
20 | 2 files changed, 14 insertions(+), 5 deletions(-) | ||
10 | 21 | ||
11 | diff --git a/qemu-img.texi b/qemu-img.texi | 22 | diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 |
23 | index XXXXXXX..XXXXXXX 100755 | ||
24 | --- a/tests/qemu-iotests/244 | ||
25 | +++ b/tests/qemu-iotests/244 | ||
26 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \ | ||
27 | echo | ||
28 | $QEMU_IO -c 'read -P 0 0 1M' \ | ||
29 | -c 'read -P 0x11 1M 1M' \ | ||
30 | - -c 'read -P 0 2M 2M' \ | ||
31 | -c 'read -P 0x11 4M 1M' \ | ||
32 | -c 'read -P 0 5M 1M' \ | ||
33 | -f raw "$TEST_IMG.data" | | ||
34 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \ | ||
35 | -f $IMGFMT "$TEST_IMG" | | ||
36 | _filter_qemu_io | ||
37 | |||
38 | +# Discarded clusters are only marked as such in the qcow2 metadata, but | ||
39 | +# they can contain stale data in the external data file. Instead, zero | ||
40 | +# clusters must be zeroed in the external data file too. | ||
41 | echo | ||
42 | -$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data" | ||
43 | +$QEMU_IO -c 'read -P 0 0 1M' \ | ||
44 | + -c 'read -P 0x11 1M 1M' \ | ||
45 | + -c 'read -P 0 3M 3M' \ | ||
46 | + -f raw "$TEST_IMG".data | | ||
47 | + _filter_qemu_io | ||
48 | |||
49 | echo -n "qcow2 file size after I/O: " | ||
50 | du -b $TEST_IMG | cut -f1 | ||
51 | diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out | ||
12 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/qemu-img.texi | 53 | --- a/tests/qemu-iotests/244.out |
14 | +++ b/qemu-img.texi | 54 | +++ b/tests/qemu-iotests/244.out |
15 | @@ -XXX,XX +XXX,XX @@ exclusive with the @var{-O} parameters. It is currently required to also use | 55 | @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0 |
16 | the @var{-n} parameter to skip image creation. This restriction may be relaxed | 56 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
17 | in a future release. | 57 | read 1048576/1048576 bytes at offset 1048576 |
18 | 58 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | |
19 | +@item --force-share (-U) | 59 | -read 2097152/2097152 bytes at offset 2097152 |
20 | + | 60 | -2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
21 | +If specified, @code{qemu-img} will open the image with shared permissions, | 61 | read 1048576/1048576 bytes at offset 4194304 |
22 | +which makes it less likely to conflict with a running guest's permissions due | 62 | 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
23 | +to image locking. For example, this can be used to get the image information | 63 | read 1048576/1048576 bytes at offset 5242880 |
24 | +(with 'info' subcommand) when the image is used by a running guest. Note that | 64 | @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576 |
25 | +this could produce inconsistent results because of concurrent metadata changes, | 65 | read 4194304/4194304 bytes at offset 2097152 |
26 | +etc. This option is only allowed when opening images in read-only mode. | 66 | 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
27 | + | 67 | |
28 | @item fmt | 68 | -Images are identical. |
29 | is the disk image format. It is guessed automatically in most cases. See below | 69 | +read 1048576/1048576 bytes at offset 0 |
30 | for a description of the supported disk formats. | 70 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
71 | +read 1048576/1048576 bytes at offset 1048576 | ||
72 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
73 | +read 3145728/3145728 bytes at offset 3145728 | ||
74 | +3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
75 | qcow2 file size after I/O: 327680 | ||
76 | |||
77 | === bdrv_co_block_status test for file and offset=0 === | ||
31 | -- | 78 | -- |
32 | 2.13.6 | 79 | 2.25.3 |
33 | 80 | ||
34 | 81 | diff view generated by jsdifflib |
1 | Drain requests are propagated to child nodes, parent nodes and directly | 1 | This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate() |
---|---|---|---|
2 | to the AioContext. The order in which this happened was different | 2 | driver callbacks, and a supported_truncate_flags field in |
3 | between all combinations of drain/drain_all and begin/end. | 3 | BlockDriverState that allows drivers to advertise support for request |
4 | 4 | flags in the context of truncate. | |
5 | The correct order is to keep children only drained when their parents | 5 | |
6 | are also drained. This means that at the start of a drained section, the | 6 | For now, we always pass 0 and no drivers declare support for any flag. |
7 | AioContext needs to be drained first, the parents second and only then | ||
8 | the children. The correct order for the end of a drained section is the | ||
9 | opposite. | ||
10 | |||
11 | This patch changes the three other functions to follow the example of | ||
12 | bdrv_drained_begin(), which is the only one that got it right. | ||
13 | 7 | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
10 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
11 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
12 | Message-Id: <20200424125448.63318-2-kwolf@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
16 | --- | 14 | --- |
17 | block/io.c | 12 ++++++++---- | 15 | include/block/block_int.h | 10 +++++++++- |
18 | 1 file changed, 8 insertions(+), 4 deletions(-) | 16 | block/crypto.c | 3 ++- |
19 | 17 | block/file-posix.c | 2 +- | |
18 | block/file-win32.c | 2 +- | ||
19 | block/gluster.c | 1 + | ||
20 | block/io.c | 8 +++++++- | ||
21 | block/iscsi.c | 2 +- | ||
22 | block/nfs.c | 3 ++- | ||
23 | block/qcow2.c | 2 +- | ||
24 | block/qed.c | 1 + | ||
25 | block/raw-format.c | 2 +- | ||
26 | block/rbd.c | 1 + | ||
27 | block/sheepdog.c | 4 ++-- | ||
28 | block/ssh.c | 2 +- | ||
29 | tests/test-block-iothread.c | 3 ++- | ||
30 | 15 files changed, 33 insertions(+), 13 deletions(-) | ||
31 | |||
32 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/include/block/block_int.h | ||
35 | +++ b/include/block/block_int.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
37 | */ | ||
38 | int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, | ||
39 | bool exact, PreallocMode prealloc, | ||
40 | - Error **errp); | ||
41 | + BdrvRequestFlags flags, Error **errp); | ||
42 | |||
43 | int64_t (*bdrv_getlength)(BlockDriverState *bs); | ||
44 | bool has_variable_length; | ||
45 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
46 | /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, | ||
47 | * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ | ||
48 | unsigned int supported_zero_flags; | ||
49 | + /* | ||
50 | + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). | ||
51 | + * | ||
52 | + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure | ||
53 | + * that any added space reads as all zeros. If this can't be guaranteed, | ||
54 | + * the operation must fail. | ||
55 | + */ | ||
56 | + unsigned int supported_truncate_flags; | ||
57 | |||
58 | /* the following member gives a name to every node on the bs graph. */ | ||
59 | char node_name[32]; | ||
60 | diff --git a/block/crypto.c b/block/crypto.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/block/crypto.c | ||
63 | +++ b/block/crypto.c | ||
64 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs, | ||
65 | |||
66 | static int coroutine_fn | ||
67 | block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
68 | - PreallocMode prealloc, Error **errp) | ||
69 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
70 | + Error **errp) | ||
71 | { | ||
72 | BlockCrypto *crypto = bs->opaque; | ||
73 | uint64_t payload_offset = | ||
74 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/block/file-posix.c | ||
77 | +++ b/block/file-posix.c | ||
78 | @@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, | ||
79 | |||
80 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
81 | bool exact, PreallocMode prealloc, | ||
82 | - Error **errp) | ||
83 | + BdrvRequestFlags flags, Error **errp) | ||
84 | { | ||
85 | BDRVRawState *s = bs->opaque; | ||
86 | struct stat st; | ||
87 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/block/file-win32.c | ||
90 | +++ b/block/file-win32.c | ||
91 | @@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs) | ||
92 | |||
93 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
94 | bool exact, PreallocMode prealloc, | ||
95 | - Error **errp) | ||
96 | + BdrvRequestFlags flags, Error **errp) | ||
97 | { | ||
98 | BDRVRawState *s = bs->opaque; | ||
99 | LONG low, high; | ||
100 | diff --git a/block/gluster.c b/block/gluster.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/block/gluster.c | ||
103 | +++ b/block/gluster.c | ||
104 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, | ||
105 | int64_t offset, | ||
106 | bool exact, | ||
107 | PreallocMode prealloc, | ||
108 | + BdrvRequestFlags flags, | ||
109 | Error **errp) | ||
110 | { | ||
111 | BDRVGlusterState *s = bs->opaque; | ||
20 | diff --git a/block/io.c b/block/io.c | 112 | diff --git a/block/io.c b/block/io.c |
21 | index XXXXXXX..XXXXXXX 100644 | 113 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/io.c | 114 | --- a/block/io.c |
23 | +++ b/block/io.c | 115 | +++ b/block/io.c |
24 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | 116 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
25 | return; | 117 | BlockDriverState *bs = child->bs; |
118 | BlockDriver *drv = bs->drv; | ||
119 | BdrvTrackedRequest req; | ||
120 | + BdrvRequestFlags flags = 0; | ||
121 | int64_t old_size, new_bytes; | ||
122 | int ret; | ||
123 | |||
124 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
26 | } | 125 | } |
27 | 126 | ||
28 | + /* Stop things in parent-to-child order */ | 127 | if (drv->bdrv_co_truncate) { |
29 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | 128 | - ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); |
30 | aio_disable_external(bdrv_get_aio_context(bs)); | 129 | + if (flags & ~bs->supported_truncate_flags) { |
31 | bdrv_parent_drained_begin(bs); | 130 | + error_setg(errp, "Block driver does not support requested flags"); |
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | 131 | + ret = -ENOTSUP; |
33 | return; | 132 | + goto out; |
34 | } | 133 | + } |
35 | 134 | + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); | |
36 | - bdrv_parent_drained_end(bs); | 135 | } else if (bs->file && drv->is_filter) { |
37 | + /* Re-enable things in child-to-parent order */ | 136 | ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); |
38 | bdrv_drain_invoke(bs, false); | 137 | } else { |
39 | + bdrv_parent_drained_end(bs); | 138 | diff --git a/block/iscsi.c b/block/iscsi.c |
40 | aio_enable_external(bdrv_get_aio_context(bs)); | 139 | index XXXXXXX..XXXXXXX 100644 |
140 | --- a/block/iscsi.c | ||
141 | +++ b/block/iscsi.c | ||
142 | @@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) | ||
143 | |||
144 | static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, | ||
145 | bool exact, PreallocMode prealloc, | ||
146 | - Error **errp) | ||
147 | + BdrvRequestFlags flags, Error **errp) | ||
148 | { | ||
149 | IscsiLun *iscsilun = bs->opaque; | ||
150 | int64_t cur_length; | ||
151 | diff --git a/block/nfs.c b/block/nfs.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/block/nfs.c | ||
154 | +++ b/block/nfs.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) | ||
156 | |||
157 | static int coroutine_fn | ||
158 | nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
159 | - PreallocMode prealloc, Error **errp) | ||
160 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
161 | + Error **errp) | ||
162 | { | ||
163 | NFSClient *client = bs->opaque; | ||
164 | int ret; | ||
165 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/block/qcow2.c | ||
168 | +++ b/block/qcow2.c | ||
169 | @@ -XXX,XX +XXX,XX @@ fail: | ||
170 | |||
171 | static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
172 | bool exact, PreallocMode prealloc, | ||
173 | - Error **errp) | ||
174 | + BdrvRequestFlags flags, Error **errp) | ||
175 | { | ||
176 | BDRVQcow2State *s = bs->opaque; | ||
177 | uint64_t old_length; | ||
178 | diff --git a/block/qed.c b/block/qed.c | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/block/qed.c | ||
181 | +++ b/block/qed.c | ||
182 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, | ||
183 | int64_t offset, | ||
184 | bool exact, | ||
185 | PreallocMode prealloc, | ||
186 | + BdrvRequestFlags flags, | ||
187 | Error **errp) | ||
188 | { | ||
189 | BDRVQEDState *s = bs->opaque; | ||
190 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/block/raw-format.c | ||
193 | +++ b/block/raw-format.c | ||
194 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) | ||
195 | |||
196 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
197 | bool exact, PreallocMode prealloc, | ||
198 | - Error **errp) | ||
199 | + BdrvRequestFlags flags, Error **errp) | ||
200 | { | ||
201 | BDRVRawState *s = bs->opaque; | ||
202 | |||
203 | diff --git a/block/rbd.c b/block/rbd.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/block/rbd.c | ||
206 | +++ b/block/rbd.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, | ||
208 | int64_t offset, | ||
209 | bool exact, | ||
210 | PreallocMode prealloc, | ||
211 | + BdrvRequestFlags flags, | ||
212 | Error **errp) | ||
213 | { | ||
214 | int r; | ||
215 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
216 | index XXXXXXX..XXXXXXX 100644 | ||
217 | --- a/block/sheepdog.c | ||
218 | +++ b/block/sheepdog.c | ||
219 | @@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs) | ||
220 | |||
221 | static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, | ||
222 | bool exact, PreallocMode prealloc, | ||
223 | - Error **errp) | ||
224 | + BdrvRequestFlags flags, Error **errp) | ||
225 | { | ||
226 | BDRVSheepdogState *s = bs->opaque; | ||
227 | int ret, fd; | ||
228 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
229 | |||
230 | assert(!flags); | ||
231 | if (offset > s->inode.vdi_size) { | ||
232 | - ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); | ||
233 | + ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); | ||
234 | if (ret < 0) { | ||
235 | return ret; | ||
236 | } | ||
237 | diff --git a/block/ssh.c b/block/ssh.c | ||
238 | index XXXXXXX..XXXXXXX 100644 | ||
239 | --- a/block/ssh.c | ||
240 | +++ b/block/ssh.c | ||
241 | @@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs) | ||
242 | |||
243 | static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, | ||
244 | bool exact, PreallocMode prealloc, | ||
245 | - Error **errp) | ||
246 | + BdrvRequestFlags flags, Error **errp) | ||
247 | { | ||
248 | BDRVSSHState *s = bs->opaque; | ||
249 | |||
250 | diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c | ||
251 | index XXXXXXX..XXXXXXX 100644 | ||
252 | --- a/tests/test-block-iothread.c | ||
253 | +++ b/tests/test-block-iothread.c | ||
254 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, | ||
255 | |||
256 | static int coroutine_fn | ||
257 | bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
258 | - PreallocMode prealloc, Error **errp) | ||
259 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
260 | + Error **errp) | ||
261 | { | ||
262 | return 0; | ||
41 | } | 263 | } |
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
44 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
45 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
46 | |||
47 | + /* Stop things in parent-to-child order */ | ||
48 | aio_context_acquire(aio_context); | ||
49 | - bdrv_parent_drained_begin(bs); | ||
50 | aio_disable_external(aio_context); | ||
51 | + bdrv_parent_drained_begin(bs); | ||
52 | bdrv_drain_invoke(bs, true); | ||
53 | aio_context_release(aio_context); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
57 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
58 | |||
59 | + /* Re-enable things in child-to-parent order */ | ||
60 | aio_context_acquire(aio_context); | ||
61 | - aio_enable_external(aio_context); | ||
62 | - bdrv_parent_drained_end(bs); | ||
63 | bdrv_drain_invoke(bs, false); | ||
64 | + bdrv_parent_drained_end(bs); | ||
65 | + aio_enable_external(aio_context); | ||
66 | aio_context_release(aio_context); | ||
67 | } | ||
68 | |||
69 | -- | 264 | -- |
70 | 2.13.6 | 265 | 2.25.3 |
71 | 266 | ||
72 | 267 | diff view generated by jsdifflib |
1 | bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver | 1 | Now that block drivers can support flags for .bdrv_co_truncate, expose |
---|---|---|---|
2 | callback inside its polling loop. This means that how many times it got | 2 | the parameter in the node level interfaces bdrv_co_truncate() and |
3 | called for each node depended on long it had to poll the event loop. | 3 | bdrv_truncate(). |
4 | 4 | ||
5 | This is obviously not right and results in nodes that stay drained even | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per | 6 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
7 | node. | 7 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
8 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
9 | Message-Id: <20200424125448.63318-3-kwolf@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | ||
12 | include/block/block.h | 5 +++-- | ||
13 | block/block-backend.c | 2 +- | ||
14 | block/crypto.c | 2 +- | ||
15 | block/io.c | 12 +++++++----- | ||
16 | block/parallels.c | 6 +++--- | ||
17 | block/qcow.c | 4 ++-- | ||
18 | block/qcow2-refcount.c | 2 +- | ||
19 | block/qcow2.c | 15 +++++++++------ | ||
20 | block/raw-format.c | 2 +- | ||
21 | block/vhdx-log.c | 2 +- | ||
22 | block/vhdx.c | 2 +- | ||
23 | block/vmdk.c | 2 +- | ||
24 | tests/test-block-iothread.c | 6 +++--- | ||
25 | 13 files changed, 34 insertions(+), 28 deletions(-) | ||
8 | 26 | ||
9 | Fix bdrv_drain_all_begin() to call the callback only once, too. | 27 | diff --git a/include/block/block.h b/include/block/block.h |
10 | 28 | index XXXXXXX..XXXXXXX 100644 | |
11 | Cc: qemu-stable@nongnu.org | 29 | --- a/include/block/block.h |
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 30 | +++ b/include/block/block.h |
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 31 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, |
14 | --- | 32 | void bdrv_refresh_filename(BlockDriverState *bs); |
15 | block/io.c | 3 +-- | 33 | |
16 | 1 file changed, 1 insertion(+), 2 deletions(-) | 34 | int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
17 | 35 | - PreallocMode prealloc, Error **errp); | |
36 | + PreallocMode prealloc, BdrvRequestFlags flags, | ||
37 | + Error **errp); | ||
38 | int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
39 | - PreallocMode prealloc, Error **errp); | ||
40 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); | ||
41 | |||
42 | int64_t bdrv_nb_sectors(BlockDriverState *bs); | ||
43 | int64_t bdrv_getlength(BlockDriverState *bs); | ||
44 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/block/block-backend.c | ||
47 | +++ b/block/block-backend.c | ||
48 | @@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, | ||
49 | return -ENOMEDIUM; | ||
50 | } | ||
51 | |||
52 | - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); | ||
53 | + return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); | ||
54 | } | ||
55 | |||
56 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, | ||
57 | diff --git a/block/crypto.c b/block/crypto.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/block/crypto.c | ||
60 | +++ b/block/crypto.c | ||
61 | @@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, | ||
62 | |||
63 | offset += payload_offset; | ||
64 | |||
65 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); | ||
66 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); | ||
67 | } | ||
68 | |||
69 | static void block_crypto_close(BlockDriverState *bs) | ||
18 | diff --git a/block/io.c b/block/io.c | 70 | diff --git a/block/io.c b/block/io.c |
19 | index XXXXXXX..XXXXXXX 100644 | 71 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/io.c | 72 | --- a/block/io.c |
21 | +++ b/block/io.c | 73 | +++ b/block/io.c |
22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | 74 | @@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) |
23 | aio_context_acquire(aio_context); | 75 | * 'offset' bytes in length. |
24 | bdrv_parent_drained_begin(bs); | 76 | */ |
25 | aio_disable_external(aio_context); | 77 | int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
26 | + bdrv_drain_invoke(bs, true); | 78 | - PreallocMode prealloc, Error **errp) |
27 | aio_context_release(aio_context); | 79 | + PreallocMode prealloc, BdrvRequestFlags flags, |
28 | 80 | + Error **errp) | |
29 | if (!g_slist_find(aio_ctxs, aio_context)) { | 81 | { |
30 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | 82 | BlockDriverState *bs = child->bs; |
31 | aio_context_acquire(aio_context); | 83 | BlockDriver *drv = bs->drv; |
32 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | 84 | BdrvTrackedRequest req; |
33 | if (aio_context == bdrv_get_aio_context(bs)) { | 85 | - BdrvRequestFlags flags = 0; |
34 | - /* FIXME Calling this multiple times is wrong */ | 86 | int64_t old_size, new_bytes; |
35 | - bdrv_drain_invoke(bs, true); | 87 | int ret; |
36 | waited |= bdrv_drain_recurse(bs, true); | 88 | |
37 | } | 89 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
90 | } | ||
91 | ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); | ||
92 | } else if (bs->file && drv->is_filter) { | ||
93 | - ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); | ||
94 | + ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); | ||
95 | } else { | ||
96 | error_setg(errp, "Image format driver does not support resize"); | ||
97 | ret = -ENOTSUP; | ||
98 | @@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo { | ||
99 | int64_t offset; | ||
100 | bool exact; | ||
101 | PreallocMode prealloc; | ||
102 | + BdrvRequestFlags flags; | ||
103 | Error **errp; | ||
104 | int ret; | ||
105 | } TruncateCo; | ||
106 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque) | ||
107 | { | ||
108 | TruncateCo *tco = opaque; | ||
109 | tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, | ||
110 | - tco->prealloc, tco->errp); | ||
111 | + tco->prealloc, tco->flags, tco->errp); | ||
112 | aio_wait_kick(); | ||
113 | } | ||
114 | |||
115 | int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
116 | - PreallocMode prealloc, Error **errp) | ||
117 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) | ||
118 | { | ||
119 | Coroutine *co; | ||
120 | TruncateCo tco = { | ||
121 | @@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
122 | .offset = offset, | ||
123 | .exact = exact, | ||
124 | .prealloc = prealloc, | ||
125 | + .flags = flags, | ||
126 | .errp = errp, | ||
127 | .ret = NOT_DONE, | ||
128 | }; | ||
129 | diff --git a/block/parallels.c b/block/parallels.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/parallels.c | ||
132 | +++ b/block/parallels.c | ||
133 | @@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, | ||
134 | } else { | ||
135 | ret = bdrv_truncate(bs->file, | ||
136 | (s->data_end + space) << BDRV_SECTOR_BITS, | ||
137 | - false, PREALLOC_MODE_OFF, NULL); | ||
138 | + false, PREALLOC_MODE_OFF, 0, NULL); | ||
139 | } | ||
140 | if (ret < 0) { | ||
141 | return ret; | ||
142 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, | ||
143 | * That means we have to pass exact=true. | ||
144 | */ | ||
145 | ret = bdrv_truncate(bs->file, res->image_end_offset, true, | ||
146 | - PREALLOC_MODE_OFF, &local_err); | ||
147 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
148 | if (ret < 0) { | ||
149 | error_report_err(local_err); | ||
150 | res->check_errors++; | ||
151 | @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) | ||
152 | |||
153 | /* errors are ignored, so we might as well pass exact=true */ | ||
154 | bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, | ||
155 | - PREALLOC_MODE_OFF, NULL); | ||
156 | + PREALLOC_MODE_OFF, 0, NULL); | ||
157 | } | ||
158 | |||
159 | g_free(s->bat_dirty_bmap); | ||
160 | diff --git a/block/qcow.c b/block/qcow.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/block/qcow.c | ||
163 | +++ b/block/qcow.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | ||
165 | return -E2BIG; | ||
166 | } | ||
167 | ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, | ||
168 | - false, PREALLOC_MODE_OFF, NULL); | ||
169 | + false, PREALLOC_MODE_OFF, 0, NULL); | ||
170 | if (ret < 0) { | ||
171 | return ret; | ||
172 | } | ||
173 | @@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs) | ||
174 | l1_length) < 0) | ||
175 | return -1; | ||
176 | ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, | ||
177 | - PREALLOC_MODE_OFF, NULL); | ||
178 | + PREALLOC_MODE_OFF, 0, NULL); | ||
179 | if (ret < 0) | ||
180 | return ret; | ||
181 | |||
182 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/block/qcow2-refcount.c | ||
185 | +++ b/block/qcow2-refcount.c | ||
186 | @@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, | ||
187 | } | ||
188 | |||
189 | ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, | ||
190 | - PREALLOC_MODE_OFF, &local_err); | ||
191 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
192 | if (ret < 0) { | ||
193 | error_report_err(local_err); | ||
194 | goto resize_fail; | ||
195 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
196 | index XXXXXXX..XXXXXXX 100644 | ||
197 | --- a/block/qcow2.c | ||
198 | +++ b/block/qcow2.c | ||
199 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, | ||
200 | mode = PREALLOC_MODE_OFF; | ||
201 | } | ||
202 | ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, | ||
203 | - mode, errp); | ||
204 | + mode, 0, errp); | ||
205 | if (ret < 0) { | ||
206 | return ret; | ||
207 | } | ||
208 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
209 | * always fulfilled, so there is no need to pass it on.) | ||
210 | */ | ||
211 | bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, | ||
212 | - false, PREALLOC_MODE_OFF, &local_err); | ||
213 | + false, PREALLOC_MODE_OFF, 0, &local_err); | ||
214 | if (local_err) { | ||
215 | warn_reportf_err(local_err, | ||
216 | "Failed to truncate the tail of the image: "); | ||
217 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
218 | * file should be resized to the exact target size, too, | ||
219 | * so we pass @exact here. | ||
220 | */ | ||
221 | - ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); | ||
222 | + ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, | ||
223 | + errp); | ||
224 | if (ret < 0) { | ||
225 | goto fail; | ||
38 | } | 226 | } |
227 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
228 | new_file_size = allocation_start + | ||
229 | nb_new_data_clusters * s->cluster_size; | ||
230 | /* Image file grows, so @exact does not matter */ | ||
231 | - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); | ||
232 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, | ||
233 | + errp); | ||
234 | if (ret < 0) { | ||
235 | error_prepend(errp, "Failed to resize underlying file: "); | ||
236 | qcow2_free_clusters(bs, allocation_start, | ||
237 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, | ||
238 | if (len < 0) { | ||
239 | return len; | ||
240 | } | ||
241 | - return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); | ||
242 | + return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, | ||
243 | + NULL); | ||
244 | } | ||
245 | |||
246 | if (offset_into_cluster(s, offset)) { | ||
247 | @@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs) | ||
248 | } | ||
249 | |||
250 | ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, | ||
251 | - PREALLOC_MODE_OFF, &local_err); | ||
252 | + PREALLOC_MODE_OFF, 0, &local_err); | ||
253 | if (ret < 0) { | ||
254 | error_report_err(local_err); | ||
255 | goto fail; | ||
256 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
257 | index XXXXXXX..XXXXXXX 100644 | ||
258 | --- a/block/raw-format.c | ||
259 | +++ b/block/raw-format.c | ||
260 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
261 | |||
262 | s->size = offset; | ||
263 | offset += s->offset; | ||
264 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); | ||
265 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); | ||
266 | } | ||
267 | |||
268 | static void raw_eject(BlockDriverState *bs, bool eject_flag) | ||
269 | diff --git a/block/vhdx-log.c b/block/vhdx-log.c | ||
270 | index XXXXXXX..XXXXXXX 100644 | ||
271 | --- a/block/vhdx-log.c | ||
272 | +++ b/block/vhdx-log.c | ||
273 | @@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, | ||
274 | goto exit; | ||
275 | } | ||
276 | ret = bdrv_truncate(bs->file, new_file_size, false, | ||
277 | - PREALLOC_MODE_OFF, NULL); | ||
278 | + PREALLOC_MODE_OFF, 0, NULL); | ||
279 | if (ret < 0) { | ||
280 | goto exit; | ||
281 | } | ||
282 | diff --git a/block/vhdx.c b/block/vhdx.c | ||
283 | index XXXXXXX..XXXXXXX 100644 | ||
284 | --- a/block/vhdx.c | ||
285 | +++ b/block/vhdx.c | ||
286 | @@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, | ||
287 | } | ||
288 | |||
289 | return bdrv_truncate(bs->file, *new_offset + s->block_size, false, | ||
290 | - PREALLOC_MODE_OFF, NULL); | ||
291 | + PREALLOC_MODE_OFF, 0, NULL); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | diff --git a/block/vmdk.c b/block/vmdk.c | ||
296 | index XXXXXXX..XXXXXXX 100644 | ||
297 | --- a/block/vmdk.c | ||
298 | +++ b/block/vmdk.c | ||
299 | @@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
300 | } | ||
301 | length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); | ||
302 | ret = bdrv_truncate(s->extents[i].file, length, false, | ||
303 | - PREALLOC_MODE_OFF, NULL); | ||
304 | + PREALLOC_MODE_OFF, 0, NULL); | ||
305 | if (ret < 0) { | ||
306 | return ret; | ||
307 | } | ||
308 | diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c | ||
309 | index XXXXXXX..XXXXXXX 100644 | ||
310 | --- a/tests/test-block-iothread.c | ||
311 | +++ b/tests/test-block-iothread.c | ||
312 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c) | ||
313 | int ret; | ||
314 | |||
315 | /* Normal success path */ | ||
316 | - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); | ||
317 | + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
318 | g_assert_cmpint(ret, ==, 0); | ||
319 | |||
320 | /* Early error: Negative offset */ | ||
321 | - ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); | ||
322 | + ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL); | ||
323 | g_assert_cmpint(ret, ==, -EINVAL); | ||
324 | |||
325 | /* Error: Read-only image */ | ||
326 | c->bs->read_only = true; | ||
327 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
328 | |||
329 | - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); | ||
330 | + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
331 | g_assert_cmpint(ret, ==, -EACCES); | ||
332 | |||
333 | c->bs->read_only = false; | ||
39 | -- | 334 | -- |
40 | 2.13.6 | 335 | 2.25.3 |
41 | 336 | ||
42 | 337 | diff view generated by jsdifflib |
1 | Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently | 1 | Now that node level interface bdrv_truncate() supports passing request |
---|---|---|---|
2 | in use as a mirror target. It is not enough for image formats, though, | 2 | flags to the block driver, expose this on the BlockBackend level, too. |
3 | as these still unconditionally request BLK_PERM_CONSISTENT_READ. | 3 | |
4 | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | |
5 | As this permission is geared towards whether the guest-visible data is | 5 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
6 | consistent, and has no impact on whether the metadata is sane, and | 6 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
7 | 'qemu-img info' does not read guest-visible data (except for the raw | 7 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
8 | format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there | 8 | Message-Id: <20200424125448.63318-4-kwolf@redhat.com> |
9 | is not going to be any guest I/O performed, regardless of image format. | ||
10 | |||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 10 | --- |
13 | block.c | 6 +++++- | 11 | include/sysemu/block-backend.h | 2 +- |
14 | 1 file changed, 5 insertions(+), 1 deletion(-) | 12 | block.c | 3 ++- |
15 | 13 | block/block-backend.c | 4 ++-- | |
14 | block/commit.c | 4 ++-- | ||
15 | block/crypto.c | 2 +- | ||
16 | block/mirror.c | 2 +- | ||
17 | block/qcow2.c | 4 ++-- | ||
18 | block/qed.c | 2 +- | ||
19 | block/vdi.c | 2 +- | ||
20 | block/vhdx.c | 4 ++-- | ||
21 | block/vmdk.c | 6 +++--- | ||
22 | block/vpc.c | 2 +- | ||
23 | blockdev.c | 2 +- | ||
24 | qemu-img.c | 2 +- | ||
25 | qemu-io-cmds.c | 2 +- | ||
26 | 15 files changed, 22 insertions(+), 21 deletions(-) | ||
27 | |||
28 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/include/sysemu/block-backend.h | ||
31 | +++ b/include/sysemu/block-backend.h | ||
32 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, | ||
33 | int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, | ||
34 | int bytes); | ||
35 | int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, | ||
36 | - PreallocMode prealloc, Error **errp); | ||
37 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); | ||
38 | int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); | ||
39 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, | ||
40 | int64_t pos, int size); | ||
16 | diff --git a/block.c b/block.c | 41 | diff --git a/block.c b/block.c |
17 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block.c | 43 | --- a/block.c |
19 | +++ b/block.c | 44 | +++ b/block.c |
20 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | 45 | @@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, |
21 | assert(role == &child_backing || role == &child_file); | 46 | int64_t size; |
22 | 47 | int ret; | |
23 | if (!backing) { | 48 | |
24 | + int flags = bdrv_reopen_get_flags(reopen_queue, bs); | 49 | - ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); |
25 | + | 50 | + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, |
26 | /* Apart from the modifications below, the same permissions are | 51 | + &local_err); |
27 | * forwarded and left alone as for filters */ | 52 | if (ret < 0 && ret != -ENOTSUP) { |
28 | bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, | 53 | error_propagate(errp, local_err); |
29 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | 54 | return ret; |
30 | 55 | diff --git a/block/block-backend.c b/block/block-backend.c | |
31 | /* bs->file always needs to be consistent because of the metadata. We | 56 | index XXXXXXX..XXXXXXX 100644 |
32 | * can never allow other users to resize or write to it. */ | 57 | --- a/block/block-backend.c |
33 | - perm |= BLK_PERM_CONSISTENT_READ; | 58 | +++ b/block/block-backend.c |
34 | + if (!(flags & BDRV_O_NO_IO)) { | 59 | @@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, |
35 | + perm |= BLK_PERM_CONSISTENT_READ; | 60 | } |
36 | + } | 61 | |
37 | shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); | 62 | int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, |
63 | - PreallocMode prealloc, Error **errp) | ||
64 | + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) | ||
65 | { | ||
66 | if (!blk_is_available(blk)) { | ||
67 | error_setg(errp, "No medium inserted"); | ||
68 | return -ENOMEDIUM; | ||
69 | } | ||
70 | |||
71 | - return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); | ||
72 | + return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); | ||
73 | } | ||
74 | |||
75 | int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, | ||
76 | diff --git a/block/commit.c b/block/commit.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/block/commit.c | ||
79 | +++ b/block/commit.c | ||
80 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp) | ||
81 | } | ||
82 | |||
83 | if (base_len < len) { | ||
84 | - ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); | ||
85 | + ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); | ||
86 | if (ret) { | ||
87 | goto out; | ||
88 | } | ||
89 | @@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs) | ||
90 | * grow the backing file image if possible. If not possible, | ||
91 | * we must return an error */ | ||
92 | if (length > backing_length) { | ||
93 | - ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, | ||
94 | + ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, | ||
95 | &local_err); | ||
96 | if (ret < 0) { | ||
97 | error_report_err(local_err); | ||
98 | diff --git a/block/crypto.c b/block/crypto.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/block/crypto.c | ||
101 | +++ b/block/crypto.c | ||
102 | @@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, | ||
103 | * which will be used by the crypto header | ||
104 | */ | ||
105 | return blk_truncate(data->blk, data->size + headerlen, false, | ||
106 | - data->prealloc, errp); | ||
107 | + data->prealloc, 0, errp); | ||
108 | } | ||
109 | |||
110 | |||
111 | diff --git a/block/mirror.c b/block/mirror.c | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/block/mirror.c | ||
114 | +++ b/block/mirror.c | ||
115 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp) | ||
116 | |||
117 | if (s->bdev_length > base_length) { | ||
118 | ret = blk_truncate(s->target, s->bdev_length, false, | ||
119 | - PREALLOC_MODE_OFF, NULL); | ||
120 | + PREALLOC_MODE_OFF, 0, NULL); | ||
121 | if (ret < 0) { | ||
122 | goto immediate_exit; | ||
123 | } | ||
124 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/block/qcow2.c | ||
127 | +++ b/block/qcow2.c | ||
128 | @@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) | ||
129 | |||
130 | /* Okay, now that we have a valid image, let's give it the right size */ | ||
131 | ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, | ||
132 | - errp); | ||
133 | + 0, errp); | ||
134 | if (ret < 0) { | ||
135 | error_prepend(errp, "Could not resize image: "); | ||
136 | goto out; | ||
137 | @@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, | ||
138 | * Amending image options should ensure that the image has | ||
139 | * exactly the given new values, so pass exact=true here. | ||
140 | */ | ||
141 | - ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); | ||
142 | + ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); | ||
143 | blk_unref(blk); | ||
144 | if (ret < 0) { | ||
145 | return ret; | ||
146 | diff --git a/block/qed.c b/block/qed.c | ||
147 | index XXXXXXX..XXXXXXX 100644 | ||
148 | --- a/block/qed.c | ||
149 | +++ b/block/qed.c | ||
150 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, | ||
151 | * The QED format associates file length with allocation status, | ||
152 | * so a new file (which is empty) must have a length of 0. | ||
153 | */ | ||
154 | - ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); | ||
155 | + ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); | ||
156 | if (ret < 0) { | ||
157 | goto out; | ||
158 | } | ||
159 | diff --git a/block/vdi.c b/block/vdi.c | ||
160 | index XXXXXXX..XXXXXXX 100644 | ||
161 | --- a/block/vdi.c | ||
162 | +++ b/block/vdi.c | ||
163 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, | ||
164 | |||
165 | if (image_type == VDI_TYPE_STATIC) { | ||
166 | ret = blk_truncate(blk, offset + blocks * block_size, false, | ||
167 | - PREALLOC_MODE_OFF, errp); | ||
168 | + PREALLOC_MODE_OFF, 0, errp); | ||
169 | if (ret < 0) { | ||
170 | error_prepend(errp, "Failed to statically allocate file"); | ||
171 | goto exit; | ||
172 | diff --git a/block/vhdx.c b/block/vhdx.c | ||
173 | index XXXXXXX..XXXXXXX 100644 | ||
174 | --- a/block/vhdx.c | ||
175 | +++ b/block/vhdx.c | ||
176 | @@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, | ||
177 | /* All zeroes, so we can just extend the file - the end of the BAT | ||
178 | * is the furthest thing we have written yet */ | ||
179 | ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, | ||
180 | - errp); | ||
181 | + 0, errp); | ||
182 | if (ret < 0) { | ||
183 | goto exit; | ||
184 | } | ||
185 | } else if (type == VHDX_TYPE_FIXED) { | ||
186 | ret = blk_truncate(blk, data_file_offset + image_size, false, | ||
187 | - PREALLOC_MODE_OFF, errp); | ||
188 | + PREALLOC_MODE_OFF, 0, errp); | ||
189 | if (ret < 0) { | ||
190 | goto exit; | ||
191 | } | ||
192 | diff --git a/block/vmdk.c b/block/vmdk.c | ||
193 | index XXXXXXX..XXXXXXX 100644 | ||
194 | --- a/block/vmdk.c | ||
195 | +++ b/block/vmdk.c | ||
196 | @@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk, | ||
197 | int gd_buf_size; | ||
198 | |||
199 | if (flat) { | ||
200 | - ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); | ||
201 | + ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); | ||
202 | goto exit; | ||
203 | } | ||
204 | magic = cpu_to_be32(VMDK4_MAGIC); | ||
205 | @@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk, | ||
206 | } | ||
207 | |||
208 | ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, | ||
209 | - PREALLOC_MODE_OFF, errp); | ||
210 | + PREALLOC_MODE_OFF, 0, errp); | ||
211 | if (ret < 0) { | ||
212 | goto exit; | ||
213 | } | ||
214 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size, | ||
215 | /* bdrv_pwrite write padding zeros to align to sector, we don't need that | ||
216 | * for description file */ | ||
217 | if (desc_offset == 0) { | ||
218 | - ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); | ||
219 | + ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); | ||
220 | if (ret < 0) { | ||
221 | goto exit; | ||
222 | } | ||
223 | diff --git a/block/vpc.c b/block/vpc.c | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/block/vpc.c | ||
226 | +++ b/block/vpc.c | ||
227 | @@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, | ||
228 | /* Add footer to total size */ | ||
229 | total_size += HEADER_SIZE; | ||
230 | |||
231 | - ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); | ||
232 | + ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); | ||
233 | if (ret < 0) { | ||
234 | return ret; | ||
235 | } | ||
236 | diff --git a/blockdev.c b/blockdev.c | ||
237 | index XXXXXXX..XXXXXXX 100644 | ||
238 | --- a/blockdev.c | ||
239 | +++ b/blockdev.c | ||
240 | @@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device, | ||
241 | } | ||
242 | |||
243 | bdrv_drained_begin(bs); | ||
244 | - ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); | ||
245 | + ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); | ||
246 | bdrv_drained_end(bs); | ||
247 | |||
248 | out: | ||
249 | diff --git a/qemu-img.c b/qemu-img.c | ||
250 | index XXXXXXX..XXXXXXX 100644 | ||
251 | --- a/qemu-img.c | ||
252 | +++ b/qemu-img.c | ||
253 | @@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv) | ||
254 | * resizing, so pass @exact=true. It is of no use to report | ||
255 | * success when the image has not actually been resized. | ||
256 | */ | ||
257 | - ret = blk_truncate(blk, total_size, true, prealloc, &err); | ||
258 | + ret = blk_truncate(blk, total_size, true, prealloc, 0, &err); | ||
259 | if (!ret) { | ||
260 | qprintf(quiet, "Image resized.\n"); | ||
38 | } else { | 261 | } else { |
39 | /* We want consistent read from backing files if the parent needs it. | 262 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c |
263 | index XXXXXXX..XXXXXXX 100644 | ||
264 | --- a/qemu-io-cmds.c | ||
265 | +++ b/qemu-io-cmds.c | ||
266 | @@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) | ||
267 | * exact=true. It is better to err on the "emit more errors" side | ||
268 | * than to be overly permissive. | ||
269 | */ | ||
270 | - ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); | ||
271 | + ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err); | ||
272 | if (ret < 0) { | ||
273 | error_report_err(local_err); | ||
274 | return ret; | ||
40 | -- | 275 | -- |
41 | 2.13.6 | 276 | 2.25.3 |
42 | 277 | ||
43 | 278 | diff view generated by jsdifflib |
1 | Commit 15afd94a047 added code to acquire and release the AioContext in | 1 | If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling |
---|---|---|---|
2 | qemuio_command(). This means that the lock is taken twice now in the | 2 | qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't |
3 | call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for | 3 | undo any previous preallocation, but just adds the zero flag to all |
4 | any requests issued to nodes in a non-mainloop AioContext. | 4 | relevant L2 entries. If an external data file is in use, a write_zeroes |
5 | 5 | request to the data file is made instead. | |
6 | Dropping the first locking from hmp_qemu_io() fixes the problem. | ||
7 | 6 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Message-Id: <20200424125448.63318-5-kwolf@redhat.com> |
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | 12 | --- |
11 | hmp.c | 6 ------ | 13 | block/qcow2-cluster.c | 2 +- |
12 | 1 file changed, 6 deletions(-) | 14 | block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++ |
15 | 2 files changed, 35 insertions(+), 1 deletion(-) | ||
13 | 16 | ||
14 | diff --git a/hmp.c b/hmp.c | 17 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c |
15 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hmp.c | 19 | --- a/block/qcow2-cluster.c |
17 | +++ b/hmp.c | 20 | +++ b/block/qcow2-cluster.c |
18 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) | 21 | @@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, |
19 | { | 22 | /* Caller must pass aligned values, except at image end */ |
20 | BlockBackend *blk; | 23 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); |
21 | BlockBackend *local_blk = NULL; | 24 | assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || |
22 | - AioContext *aio_context; | 25 | - end_offset == bs->total_sectors << BDRV_SECTOR_BITS); |
23 | const char* device = qdict_get_str(qdict, "device"); | 26 | + end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); |
24 | const char* command = qdict_get_str(qdict, "command"); | 27 | |
25 | Error *err = NULL; | 28 | /* The zero flag is only supported by version 3 and newer */ |
26 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) | 29 | if (s->qcow_version < 3) { |
27 | } | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/block/qcow2.c | ||
33 | +++ b/block/qcow2.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
35 | |||
36 | bs->supported_zero_flags = header.version >= 3 ? | ||
37 | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0; | ||
38 | + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; | ||
39 | |||
40 | /* Repair image if dirty */ | ||
41 | if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && | ||
42 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
43 | g_assert_not_reached(); | ||
28 | } | 44 | } |
29 | 45 | ||
30 | - aio_context = blk_get_aio_context(blk); | 46 | + if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { |
31 | - aio_context_acquire(aio_context); | 47 | + uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); |
32 | - | 48 | + |
33 | /* | 49 | + /* |
34 | * Notably absent: Proper permission management. This is sad, but it seems | 50 | + * Use zero clusters as much as we can. qcow2_cluster_zeroize() |
35 | * almost impossible to achieve without changing the semantics and thereby | 51 | + * requires a cluster-aligned start. The end may be unaligned if it is |
36 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) | 52 | + * at the end of the image (which it is here). |
37 | */ | 53 | + */ |
38 | qemuio_command(blk, command); | 54 | + ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); |
39 | 55 | + if (ret < 0) { | |
40 | - aio_context_release(aio_context); | 56 | + error_setg_errno(errp, -ret, "Failed to zero out new clusters"); |
41 | - | 57 | + goto fail; |
42 | fail: | 58 | + } |
43 | blk_unref(local_blk); | 59 | + |
44 | hmp_handle_error(mon, &err); | 60 | + /* Write explicit zeros for the unaligned head */ |
61 | + if (zero_start > old_length) { | ||
62 | + uint64_t len = zero_start - old_length; | ||
63 | + uint8_t *buf = qemu_blockalign0(bs, len); | ||
64 | + QEMUIOVector qiov; | ||
65 | + qemu_iovec_init_buf(&qiov, buf, len); | ||
66 | + | ||
67 | + qemu_co_mutex_unlock(&s->lock); | ||
68 | + ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); | ||
69 | + qemu_co_mutex_lock(&s->lock); | ||
70 | + | ||
71 | + qemu_vfree(buf); | ||
72 | + if (ret < 0) { | ||
73 | + error_setg_errno(errp, -ret, "Failed to zero out the new area"); | ||
74 | + goto fail; | ||
75 | + } | ||
76 | + } | ||
77 | + } | ||
78 | + | ||
79 | if (prealloc != PREALLOC_MODE_OFF) { | ||
80 | /* Flush metadata before actually changing the image size */ | ||
81 | ret = qcow2_write_caches(bs); | ||
45 | -- | 82 | -- |
46 | 2.13.6 | 83 | 2.25.3 |
47 | 84 | ||
48 | 85 | diff view generated by jsdifflib |
1 | Now that the bdrv_drain_invoke() calls are pulled up to the callers of | 1 | The raw format driver can simply forward the flag and let its bs->file |
---|---|---|---|
2 | bdrv_drain_recurse(), the 'begin' parameter isn't needed any more. | 2 | child take care of actually providing the zeros. |
3 | 3 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20200424125448.63318-6-kwolf@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | --- | 10 | --- |
7 | block/io.c | 12 ++++++------ | 11 | block/raw-format.c | 4 +++- |
8 | 1 file changed, 6 insertions(+), 6 deletions(-) | 12 | 1 file changed, 3 insertions(+), 1 deletion(-) |
9 | 13 | ||
10 | diff --git a/block/io.c b/block/io.c | 14 | diff --git a/block/raw-format.c b/block/raw-format.c |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/block/io.c | 16 | --- a/block/raw-format.c |
13 | +++ b/block/io.c | 17 | +++ b/block/raw-format.c |
14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 18 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, |
15 | } | 19 | |
20 | s->size = offset; | ||
21 | offset += s->offset; | ||
22 | - return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); | ||
23 | + return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); | ||
16 | } | 24 | } |
17 | 25 | ||
18 | -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | 26 | static void raw_eject(BlockDriverState *bs, bool eject_flag) |
19 | +static bool bdrv_drain_recurse(BlockDriverState *bs) | 27 | @@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, |
20 | { | 28 | bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | |
21 | BdrvChild *child, *tmp; | 29 | ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & |
22 | bool waited; | 30 | bs->file->bs->supported_zero_flags); |
23 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | 31 | + bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & |
24 | */ | 32 | + BDRV_REQ_ZERO_WRITE; |
25 | bdrv_ref(bs); | 33 | |
26 | } | 34 | if (bs->probed && !bdrv_is_read_only(bs)) { |
27 | - waited |= bdrv_drain_recurse(bs, begin); | 35 | bdrv_refresh_filename(bs->file->bs); |
28 | + waited |= bdrv_drain_recurse(bs); | ||
29 | if (in_main_loop) { | ||
30 | bdrv_unref(bs); | ||
31 | } | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | ||
33 | } | ||
34 | |||
35 | bdrv_drain_invoke(bs, true); | ||
36 | - bdrv_drain_recurse(bs, true); | ||
37 | + bdrv_drain_recurse(bs); | ||
38 | } | ||
39 | |||
40 | void bdrv_drained_end(BlockDriverState *bs) | ||
41 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
42 | |||
43 | bdrv_parent_drained_end(bs); | ||
44 | bdrv_drain_invoke(bs, false); | ||
45 | - bdrv_drain_recurse(bs, false); | ||
46 | + bdrv_drain_recurse(bs); | ||
47 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
48 | } | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
51 | aio_context_acquire(aio_context); | ||
52 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
53 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
54 | - waited |= bdrv_drain_recurse(bs, true); | ||
55 | + waited |= bdrv_drain_recurse(bs); | ||
56 | } | ||
57 | } | ||
58 | aio_context_release(aio_context); | ||
59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
60 | aio_enable_external(aio_context); | ||
61 | bdrv_parent_drained_end(bs); | ||
62 | bdrv_drain_invoke(bs, false); | ||
63 | - bdrv_drain_recurse(bs, false); | ||
64 | + bdrv_drain_recurse(bs); | ||
65 | aio_context_release(aio_context); | ||
66 | } | ||
67 | |||
68 | -- | 36 | -- |
69 | 2.13.6 | 37 | 2.25.3 |
70 | 38 | ||
71 | 39 | diff view generated by jsdifflib |
1 | Removing a quorum child node with x-blockdev-change results in a quorum | 1 | For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the |
---|---|---|---|
2 | driver state that cannot be recreated with create options because it | 2 | OS, so we can advertise the flag and just ignore it. |
3 | would require a list with gaps. This causes trouble in at least | ||
4 | .bdrv_refresh_filename(). | ||
5 | |||
6 | Document this problem so that we won't accidentally mark the command | ||
7 | stable without having addressed it. | ||
8 | 3 | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 6 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
7 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
8 | Message-Id: <20200424125448.63318-7-kwolf@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | 10 | --- |
12 | qapi/block-core.json | 4 ++++ | 11 | block/file-posix.c | 4 ++++ |
13 | 1 file changed, 4 insertions(+) | 12 | 1 file changed, 4 insertions(+) |
14 | 13 | ||
15 | diff --git a/qapi/block-core.json b/qapi/block-core.json | 14 | diff --git a/block/file-posix.c b/block/file-posix.c |
16 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/qapi/block-core.json | 16 | --- a/block/file-posix.c |
18 | +++ b/qapi/block-core.json | 17 | +++ b/block/file-posix.c |
19 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, |
20 | # does not support all kinds of operations, all kinds of children, nor | 19 | #endif |
21 | # all block drivers. | 20 | |
22 | # | 21 | bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; |
23 | +# FIXME Removing children from a quorum node means introducing gaps in the | 22 | + if (S_ISREG(st.st_mode)) { |
24 | +# child indices. This cannot be represented in the 'children' list of | 23 | + /* When extending regular files, we get zeros from the OS */ |
25 | +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). | 24 | + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; |
26 | +# | 25 | + } |
27 | # Warning: The data in a new quorum child MUST be consistent with that of | 26 | ret = 0; |
28 | # the rest of the array. | 27 | fail: |
29 | # | 28 | if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { |
30 | -- | 29 | -- |
31 | 2.13.6 | 30 | 2.25.3 |
32 | 31 | ||
33 | 32 | diff view generated by jsdifflib |
1 | The device is drained, so there is no point in waiting for requests at | 1 | When extending the size of an image that has a backing file larger than |
---|---|---|---|
2 | the end of the drained section. Remove the bdrv_drain_recurse() calls | 2 | its old size, make sure that the backing file data doesn't become |
3 | there. | 3 | visible in the guest, but the added area is properly zeroed out. |
4 | 4 | ||
5 | The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e | 5 | Consider the following scenario where the overlay is shorter than its |
6 | in order to call the .bdrv_co_drain_end() driver callback. This is now | 6 | backing file: |
7 | done by a separate bdrv_drain_invoke() call. | 7 | |
8 | base.qcow2: AAAAAAAA | ||
9 | overlay.qcow2: BBBB | ||
10 | |||
11 | When resizing (extending) overlay.qcow2, the new blocks should not stay | ||
12 | unallocated and make the additional As from base.qcow2 visible like | ||
13 | before this patch, but zeros should be read. | ||
14 | |||
15 | A similar case happens with the various variants of a commit job when an | ||
16 | intermediate file is short (- for unallocated): | ||
17 | |||
18 | base.qcow2: A-A-AAAA | ||
19 | mid.qcow2: BB-B | ||
20 | top.qcow2: C--C--C- | ||
21 | |||
22 | After commit top.qcow2 to mid.qcow2, the following happens: | ||
23 | |||
24 | mid.qcow2: CB-C00C0 (correct result) | ||
25 | mid.qcow2: CB-C--C- (before this fix) | ||
26 | |||
27 | Without the fix, blocks that previously read as zeros on top.qcow2 | ||
28 | suddenly turn into A. | ||
8 | 29 | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 30 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 31 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 32 | Message-Id: <20200424125448.63318-8-kwolf@redhat.com> |
33 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
34 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | --- | 35 | --- |
13 | block/io.c | 2 -- | 36 | block/io.c | 25 +++++++++++++++++++++++++ |
14 | 1 file changed, 2 deletions(-) | 37 | 1 file changed, 25 insertions(+) |
15 | 38 | ||
16 | diff --git a/block/io.c b/block/io.c | 39 | diff --git a/block/io.c b/block/io.c |
17 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/io.c | 41 | --- a/block/io.c |
19 | +++ b/block/io.c | 42 | +++ b/block/io.c |
20 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | 43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, |
21 | 44 | goto out; | |
22 | bdrv_parent_drained_end(bs); | ||
23 | bdrv_drain_invoke(bs, false); | ||
24 | - bdrv_drain_recurse(bs); | ||
25 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
26 | } | ||
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
29 | aio_enable_external(aio_context); | ||
30 | bdrv_parent_drained_end(bs); | ||
31 | bdrv_drain_invoke(bs, false); | ||
32 | - bdrv_drain_recurse(bs); | ||
33 | aio_context_release(aio_context); | ||
34 | } | 45 | } |
35 | 46 | ||
47 | + /* | ||
48 | + * If the image has a backing file that is large enough that it would | ||
49 | + * provide data for the new area, we cannot leave it unallocated because | ||
50 | + * then the backing file content would become visible. Instead, zero-fill | ||
51 | + * the new area. | ||
52 | + * | ||
53 | + * Note that if the image has a backing file, but was opened without the | ||
54 | + * backing file, taking care of keeping things consistent with that backing | ||
55 | + * file is the user's responsibility. | ||
56 | + */ | ||
57 | + if (new_bytes && bs->backing) { | ||
58 | + int64_t backing_len; | ||
59 | + | ||
60 | + backing_len = bdrv_getlength(backing_bs(bs)); | ||
61 | + if (backing_len < 0) { | ||
62 | + ret = backing_len; | ||
63 | + error_setg_errno(errp, -ret, "Could not get backing file size"); | ||
64 | + goto out; | ||
65 | + } | ||
66 | + | ||
67 | + if (backing_len > old_size) { | ||
68 | + flags |= BDRV_REQ_ZERO_WRITE; | ||
69 | + } | ||
70 | + } | ||
71 | + | ||
72 | if (drv->bdrv_co_truncate) { | ||
73 | if (flags & ~bs->supported_truncate_flags) { | ||
74 | error_setg(errp, "Block driver does not support requested flags"); | ||
36 | -- | 75 | -- |
37 | 2.13.6 | 76 | 2.25.3 |
38 | 77 | ||
39 | 78 | diff view generated by jsdifflib |
1 | From: John Snow <jsnow@redhat.com> | 1 | We want to keep TEST_IMG for the full path of the main test image, but |
---|---|---|---|
2 | filter_testfiles() must be called for other test images before replacing | ||
3 | other things like the image format because the test directory path could | ||
4 | contain the format as a substring. | ||
2 | 5 | ||
3 | VPC has some difficulty creating geometries of particular size. | 6 | Insert a filter_testfiles() call between both. |
4 | However, we can indeed force it to use a literal one, so let's | ||
5 | do that for the sake of test 197, which is testing some specific | ||
6 | offsets. | ||
7 | 7 | ||
8 | Signed-off-by: John Snow <jsnow@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Lukáš Doktor <ldoktor@redhat.com> | 9 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
10 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Message-Id: <20200424125448.63318-9-kwolf@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | 13 | --- |
14 | tests/qemu-iotests/197 | 4 ++++ | 14 | tests/qemu-iotests/iotests.py | 5 +++-- |
15 | tests/qemu-iotests/common.filter | 3 ++- | 15 | 1 file changed, 3 insertions(+), 2 deletions(-) |
16 | 2 files changed, 6 insertions(+), 1 deletion(-) | ||
17 | 16 | ||
18 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 | 17 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py |
19 | index XXXXXXX..XXXXXXX 100755 | ||
20 | --- a/tests/qemu-iotests/197 | ||
21 | +++ b/tests/qemu-iotests/197 | ||
22 | @@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ===' | ||
23 | echo | ||
24 | |||
25 | # Prep the images | ||
26 | +# VPC rounds image sizes to a specific geometry, force a specific size. | ||
27 | +if [ "$IMGFMT" = "vpc" ]; then | ||
28 | + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") | ||
29 | +fi | ||
30 | _make_test_img 4G | ||
31 | $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io | ||
32 | IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ | ||
33 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
34 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/tests/qemu-iotests/common.filter | 19 | --- a/tests/qemu-iotests/iotests.py |
36 | +++ b/tests/qemu-iotests/common.filter | 20 | +++ b/tests/qemu-iotests/iotests.py |
37 | @@ -XXX,XX +XXX,XX @@ _filter_img_create() | 21 | @@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename): |
38 | -e "s# log_size=[0-9]\\+##g" \ | 22 | for line in output.split('\n'): |
39 | -e "s# refcount_bits=[0-9]\\+##g" \ | 23 | if 'disk size' in line or 'actual-size' in line: |
40 | -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ | 24 | continue |
41 | - -e "s# iter-time=[0-9]\\+##g" | 25 | - line = line.replace(filename, 'TEST_IMG') \ |
42 | + -e "s# iter-time=[0-9]\\+##g" \ | 26 | - .replace(imgfmt, 'IMGFMT') |
43 | + -e "s# force_size=\\(on\\|off\\)##g" | 27 | + line = line.replace(filename, 'TEST_IMG') |
44 | } | 28 | + line = filter_testfiles(line) |
45 | 29 | + line = line.replace(imgfmt, 'IMGFMT') | |
46 | _filter_img_info() | 30 | line = re.sub('iters: [0-9]+', 'iters: XXX', line) |
31 | line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line) | ||
32 | line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line) | ||
47 | -- | 33 | -- |
48 | 2.13.6 | 34 | 2.25.3 |
49 | 35 | ||
50 | 36 | diff view generated by jsdifflib |
1 | This adds a test case that the BlockDriver callbacks for drain are | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
---|---|---|---|
2 | called in bdrv_drained_all_begin/end(), and that both of them are called | 2 | Message-Id: <20200424125448.63318-10-kwolf@redhat.com> |
3 | exactly once. | 3 | Reviewed-by: Max Reitz <mreitz@redhat.com> |
4 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | --- | ||
7 | tests/qemu-iotests/274 | 155 +++++++++++++++++++++ | ||
8 | tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++ | ||
9 | tests/qemu-iotests/group | 1 + | ||
10 | 3 files changed, 424 insertions(+) | ||
11 | create mode 100755 tests/qemu-iotests/274 | ||
12 | create mode 100644 tests/qemu-iotests/274.out | ||
4 | 13 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274 |
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 15 | new file mode 100755 |
7 | Reviewed-by: Eric Blake <eblake@redhat.com> | 16 | index XXXXXXX..XXXXXXX |
8 | --- | 17 | --- /dev/null |
9 | tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ | 18 | +++ b/tests/qemu-iotests/274 |
10 | tests/Makefile.include | 2 + | 19 | @@ -XXX,XX +XXX,XX @@ |
11 | 2 files changed, 139 insertions(+) | 20 | +#!/usr/bin/env python3 |
12 | create mode 100644 tests/test-bdrv-drain.c | 21 | +# |
13 | 22 | +# Copyright (C) 2019 Red Hat, Inc. | |
14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 23 | +# |
24 | +# This program is free software; you can redistribute it and/or modify | ||
25 | +# it under the terms of the GNU General Public License as published by | ||
26 | +# the Free Software Foundation; either version 2 of the License, or | ||
27 | +# (at your option) any later version. | ||
28 | +# | ||
29 | +# This program is distributed in the hope that it will be useful, | ||
30 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
31 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
32 | +# GNU General Public License for more details. | ||
33 | +# | ||
34 | +# You should have received a copy of the GNU General Public License | ||
35 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
36 | +# | ||
37 | +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> | ||
38 | +# | ||
39 | +# Some tests for short backing files and short overlays | ||
40 | + | ||
41 | +import iotests | ||
42 | + | ||
43 | +iotests.verify_image_format(supported_fmts=['qcow2']) | ||
44 | +iotests.verify_platform(['linux']) | ||
45 | + | ||
46 | +size_short = 1 * 1024 * 1024 | ||
47 | +size_long = 2 * 1024 * 1024 | ||
48 | +size_diff = size_long - size_short | ||
49 | + | ||
50 | +def create_chain() -> None: | ||
51 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, | ||
52 | + str(size_long)) | ||
53 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid, | ||
54 | + str(size_short)) | ||
55 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top, | ||
56 | + str(size_long)) | ||
57 | + | ||
58 | + iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base) | ||
59 | + | ||
60 | +def create_vm() -> iotests.VM: | ||
61 | + vm = iotests.VM() | ||
62 | + vm.add_blockdev('file,filename=%s,node-name=base-file' % base) | ||
63 | + vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt) | ||
64 | + vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid) | ||
65 | + vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base' | ||
66 | + % iotests.imgfmt) | ||
67 | + vm.add_drive(top, 'backing=mid,node-name=top') | ||
68 | + return vm | ||
69 | + | ||
70 | +with iotests.FilePath('base') as base, \ | ||
71 | + iotests.FilePath('mid') as mid, \ | ||
72 | + iotests.FilePath('top') as top: | ||
73 | + | ||
74 | + iotests.log('== Commit tests ==') | ||
75 | + | ||
76 | + create_chain() | ||
77 | + | ||
78 | + iotests.log('=== Check visible data ===') | ||
79 | + | ||
80 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top) | ||
81 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top) | ||
82 | + | ||
83 | + iotests.log('=== Checking allocation status ===') | ||
84 | + | ||
85 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, | ||
86 | + '-c', 'alloc %d %d' % (size_short, size_diff), | ||
87 | + base) | ||
88 | + | ||
89 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, | ||
90 | + '-c', 'alloc %d %d' % (size_short, size_diff), | ||
91 | + mid) | ||
92 | + | ||
93 | + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, | ||
94 | + '-c', 'alloc %d %d' % (size_short, size_diff), | ||
95 | + top) | ||
96 | + | ||
97 | + iotests.log('=== Checking map ===') | ||
98 | + | ||
99 | + iotests.qemu_img_log('map', '--output=json', base) | ||
100 | + iotests.qemu_img_log('map', '--output=human', base) | ||
101 | + iotests.qemu_img_log('map', '--output=json', mid) | ||
102 | + iotests.qemu_img_log('map', '--output=human', mid) | ||
103 | + iotests.qemu_img_log('map', '--output=json', top) | ||
104 | + iotests.qemu_img_log('map', '--output=human', top) | ||
105 | + | ||
106 | + iotests.log('=== Testing qemu-img commit (top -> mid) ===') | ||
107 | + | ||
108 | + iotests.qemu_img_log('commit', top) | ||
109 | + iotests.img_info_log(mid) | ||
110 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
111 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
112 | + | ||
113 | + iotests.log('=== Testing HMP commit (top -> mid) ===') | ||
114 | + | ||
115 | + create_chain() | ||
116 | + with create_vm() as vm: | ||
117 | + vm.launch() | ||
118 | + vm.qmp_log('human-monitor-command', command_line='commit drive0') | ||
119 | + | ||
120 | + iotests.img_info_log(mid) | ||
121 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
122 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
123 | + | ||
124 | + iotests.log('=== Testing QMP active commit (top -> mid) ===') | ||
125 | + | ||
126 | + create_chain() | ||
127 | + with create_vm() as vm: | ||
128 | + vm.launch() | ||
129 | + vm.qmp_log('block-commit', device='top', base_node='mid', | ||
130 | + job_id='job0', auto_dismiss=False) | ||
131 | + vm.run_job('job0', wait=5) | ||
132 | + | ||
133 | + iotests.img_info_log(mid) | ||
134 | + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) | ||
135 | + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) | ||
136 | + | ||
137 | + | ||
138 | + iotests.log('== Resize tests ==') | ||
139 | + | ||
140 | + # Use different sizes for different allocation modes: | ||
141 | + # | ||
142 | + # We want to have at least one test where 32 bit truncation in the size of | ||
143 | + # the overlapping area becomes visible. This is covered by the | ||
144 | + # prealloc='off' case (1G to 6G is an overlap of 5G). | ||
145 | + # | ||
146 | + # However, we can only do this for modes that don't preallocate data | ||
147 | + # because otherwise we might run out of space on the test host. | ||
148 | + # | ||
149 | + # We also want to test some unaligned combinations. | ||
150 | + for (prealloc, base_size, top_size_old, top_size_new, off) in [ | ||
151 | + ('off', '6G', '1G', '8G', '5G'), | ||
152 | + ('metadata', '32G', '30G', '33G', '31G'), | ||
153 | + ('falloc', '10M', '5M', '15M', '9M'), | ||
154 | + ('full', '16M', '8M', '12M', '11M'), | ||
155 | + ('off', '384k', '253k', '512k', '253k'), | ||
156 | + ('off', '400k', '256k', '512k', '336k'), | ||
157 | + ('off', '512k', '256k', '500k', '436k')]: | ||
158 | + | ||
159 | + iotests.log('=== preallocation=%s ===' % prealloc) | ||
160 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size) | ||
161 | + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top, | ||
162 | + top_size_old) | ||
163 | + iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base) | ||
164 | + | ||
165 | + # After this, top_size_old to base_size should be allocated/zeroed. | ||
166 | + # | ||
167 | + # In theory, leaving base_size to top_size_new unallocated would be | ||
168 | + # correct, but in practice, if we zero out anything, we zero out | ||
169 | + # everything up to top_size_new. | ||
170 | + iotests.qemu_img_log('resize', '-f', iotests.imgfmt, | ||
171 | + '--preallocation', prealloc, top, top_size_new) | ||
172 | + iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top) | ||
173 | + iotests.qemu_io_log('-c', 'map', top) | ||
174 | + iotests.qemu_img_log('map', '--output=json', top) | ||
175 | diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out | ||
15 | new file mode 100644 | 176 | new file mode 100644 |
16 | index XXXXXXX..XXXXXXX | 177 | index XXXXXXX..XXXXXXX |
17 | --- /dev/null | 178 | --- /dev/null |
18 | +++ b/tests/test-bdrv-drain.c | 179 | +++ b/tests/qemu-iotests/274.out |
19 | @@ -XXX,XX +XXX,XX @@ | 180 | @@ -XXX,XX +XXX,XX @@ |
20 | +/* | 181 | +== Commit tests == |
21 | + * Block node draining tests | 182 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
22 | + * | 183 | + |
23 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> | 184 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
24 | + * | 185 | + |
25 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | 186 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
26 | + * of this software and associated documentation files (the "Software"), to deal | 187 | + |
27 | + * in the Software without restriction, including without limitation the rights | 188 | +wrote 2097152/2097152 bytes at offset 0 |
28 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 189 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
29 | + * copies of the Software, and to permit persons to whom the Software is | 190 | + |
30 | + * furnished to do so, subject to the following conditions: | 191 | +=== Check visible data === |
31 | + * | 192 | +read 1048576/1048576 bytes at offset 0 |
32 | + * The above copyright notice and this permission notice shall be included in | 193 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
33 | + * all copies or substantial portions of the Software. | 194 | + |
34 | + * | 195 | +read 1048576/1048576 bytes at offset 1048576 |
35 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 196 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
36 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 197 | + |
37 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | 198 | +=== Checking allocation status === |
38 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 199 | +1048576/1048576 bytes allocated at offset 0 bytes |
39 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 200 | +1048576/1048576 bytes allocated at offset 1 MiB |
40 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 201 | + |
41 | + * THE SOFTWARE. | 202 | +0/1048576 bytes allocated at offset 0 bytes |
42 | + */ | 203 | +0/0 bytes allocated at offset 1 MiB |
43 | + | 204 | + |
44 | +#include "qemu/osdep.h" | 205 | +0/1048576 bytes allocated at offset 0 bytes |
45 | +#include "block/block.h" | 206 | +0/1048576 bytes allocated at offset 1 MiB |
46 | +#include "sysemu/block-backend.h" | 207 | + |
47 | +#include "qapi/error.h" | 208 | +=== Checking map === |
48 | + | 209 | +[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}] |
49 | +typedef struct BDRVTestState { | 210 | + |
50 | + int drain_count; | 211 | +Offset Length Mapped to File |
51 | +} BDRVTestState; | 212 | +0 0x200000 0x50000 TEST_DIR/PID-base |
52 | + | 213 | + |
53 | +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | 214 | +[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}] |
54 | +{ | 215 | + |
55 | + BDRVTestState *s = bs->opaque; | 216 | +Offset Length Mapped to File |
56 | + s->drain_count++; | 217 | +0 0x100000 0x50000 TEST_DIR/PID-base |
57 | +} | 218 | + |
58 | + | 219 | +[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680}, |
59 | +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) | 220 | +{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}] |
60 | +{ | 221 | + |
61 | + BDRVTestState *s = bs->opaque; | 222 | +Offset Length Mapped to File |
62 | + s->drain_count--; | 223 | +0 0x100000 0x50000 TEST_DIR/PID-base |
63 | +} | 224 | + |
64 | + | 225 | +=== Testing qemu-img commit (top -> mid) === |
65 | +static void bdrv_test_close(BlockDriverState *bs) | 226 | +Image committed. |
66 | +{ | 227 | + |
67 | + BDRVTestState *s = bs->opaque; | 228 | +image: TEST_IMG |
68 | + g_assert_cmpint(s->drain_count, >, 0); | 229 | +file format: IMGFMT |
69 | +} | 230 | +virtual size: 2 MiB (2097152 bytes) |
70 | + | 231 | +cluster_size: 65536 |
71 | +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | 232 | +backing file: TEST_DIR/PID-base |
72 | + uint64_t offset, uint64_t bytes, | 233 | +Format specific information: |
73 | + QEMUIOVector *qiov, int flags) | 234 | + compat: 1.1 |
74 | +{ | 235 | + lazy refcounts: false |
75 | + /* We want this request to stay until the polling loop in drain waits for | 236 | + refcount bits: 16 |
76 | + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes | 237 | + corrupt: false |
77 | + * first and polls its result, too, but it shouldn't accidentally complete | 238 | + |
78 | + * this request yet. */ | 239 | +read 1048576/1048576 bytes at offset 0 |
79 | + co_aio_sleep_ns(qemu_get_aio_context(), QEMU_CLOCK_REALTIME, 100000); | 240 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
80 | + | 241 | + |
81 | + return 0; | 242 | +read 1048576/1048576 bytes at offset 1048576 |
82 | +} | 243 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
83 | + | 244 | + |
84 | +static BlockDriver bdrv_test = { | 245 | +=== Testing HMP commit (top -> mid) === |
85 | + .format_name = "test", | 246 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
86 | + .instance_size = sizeof(BDRVTestState), | 247 | + |
87 | + | 248 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
88 | + .bdrv_close = bdrv_test_close, | 249 | + |
89 | + .bdrv_co_preadv = bdrv_test_co_preadv, | 250 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
90 | + | 251 | + |
91 | + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | 252 | +wrote 2097152/2097152 bytes at offset 0 |
92 | + .bdrv_co_drain_end = bdrv_test_co_drain_end, | 253 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
93 | +}; | 254 | + |
94 | + | 255 | +{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}} |
95 | +static void aio_ret_cb(void *opaque, int ret) | 256 | +{"return": ""} |
96 | +{ | 257 | +image: TEST_IMG |
97 | + int *aio_ret = opaque; | 258 | +file format: IMGFMT |
98 | + *aio_ret = ret; | 259 | +virtual size: 2 MiB (2097152 bytes) |
99 | +} | 260 | +cluster_size: 65536 |
100 | + | 261 | +backing file: TEST_DIR/PID-base |
101 | +static void test_drv_cb_drain_all(void) | 262 | +Format specific information: |
102 | +{ | 263 | + compat: 1.1 |
103 | + BlockBackend *blk; | 264 | + lazy refcounts: false |
104 | + BlockDriverState *bs; | 265 | + refcount bits: 16 |
105 | + BDRVTestState *s; | 266 | + corrupt: false |
106 | + BlockAIOCB *acb; | 267 | + |
107 | + int aio_ret; | 268 | +read 1048576/1048576 bytes at offset 0 |
108 | + | 269 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
109 | + QEMUIOVector qiov; | 270 | + |
110 | + struct iovec iov = { | 271 | +read 1048576/1048576 bytes at offset 1048576 |
111 | + .iov_base = NULL, | 272 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
112 | + .iov_len = 0, | 273 | + |
113 | + }; | 274 | +=== Testing QMP active commit (top -> mid) === |
114 | + qemu_iovec_init_external(&qiov, &iov, 1); | 275 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
115 | + | 276 | + |
116 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 277 | +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
117 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | 278 | + |
118 | + &error_abort); | 279 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
119 | + s = bs->opaque; | 280 | + |
120 | + blk_insert_bs(blk, bs, &error_abort); | 281 | +wrote 2097152/2097152 bytes at offset 0 |
121 | + | 282 | +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
122 | + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | 283 | + |
123 | + g_assert_cmpint(s->drain_count, ==, 0); | 284 | +{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}} |
124 | + bdrv_drain_all_begin(); | 285 | +{"return": {}} |
125 | + g_assert_cmpint(s->drain_count, ==, 1); | 286 | +{"execute": "job-complete", "arguments": {"id": "job0"}} |
126 | + bdrv_drain_all_end(); | 287 | +{"return": {}} |
127 | + g_assert_cmpint(s->drain_count, ==, 0); | 288 | +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} |
128 | + | 289 | +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} |
129 | + /* Now do the same while a request is pending */ | 290 | +{"execute": "job-dismiss", "arguments": {"id": "job0"}} |
130 | + aio_ret = -EINPROGRESS; | 291 | +{"return": {}} |
131 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); | 292 | +image: TEST_IMG |
132 | + g_assert(acb != NULL); | 293 | +file format: IMGFMT |
133 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | 294 | +virtual size: 2 MiB (2097152 bytes) |
134 | + | 295 | +cluster_size: 65536 |
135 | + g_assert_cmpint(s->drain_count, ==, 0); | 296 | +backing file: TEST_DIR/PID-base |
136 | + bdrv_drain_all_begin(); | 297 | +Format specific information: |
137 | + g_assert_cmpint(aio_ret, ==, 0); | 298 | + compat: 1.1 |
138 | + g_assert_cmpint(s->drain_count, ==, 1); | 299 | + lazy refcounts: false |
139 | + bdrv_drain_all_end(); | 300 | + refcount bits: 16 |
140 | + g_assert_cmpint(s->drain_count, ==, 0); | 301 | + corrupt: false |
141 | + | 302 | + |
142 | + bdrv_unref(bs); | 303 | +read 1048576/1048576 bytes at offset 0 |
143 | + blk_unref(blk); | 304 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
144 | +} | 305 | + |
145 | + | 306 | +read 1048576/1048576 bytes at offset 1048576 |
146 | +int main(int argc, char **argv) | 307 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
147 | +{ | 308 | + |
148 | + bdrv_init(); | 309 | +== Resize tests == |
149 | + qemu_init_main_loop(&error_abort); | 310 | +=== preallocation=off === |
150 | + | 311 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
151 | + g_test_init(&argc, &argv, NULL); | 312 | + |
152 | + | 313 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
153 | + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | 314 | + |
154 | + | 315 | +wrote 65536/65536 bytes at offset 5368709120 |
155 | + return g_test_run(); | 316 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) |
156 | +} | 317 | + |
157 | diff --git a/tests/Makefile.include b/tests/Makefile.include | 318 | +Image resized. |
319 | + | ||
320 | +read 65536/65536 bytes at offset 5368709120 | ||
321 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
322 | + | ||
323 | +1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0) | ||
324 | +7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000) | ||
325 | + | ||
326 | +[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false}, | ||
327 | +{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] | ||
328 | + | ||
329 | +=== preallocation=metadata === | ||
330 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
331 | + | ||
332 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
333 | + | ||
334 | +wrote 65536/65536 bytes at offset 33285996544 | ||
335 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
336 | + | ||
337 | +Image resized. | ||
338 | + | ||
339 | +read 65536/65536 bytes at offset 33285996544 | ||
340 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
341 | + | ||
342 | +30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0) | ||
343 | +3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000) | ||
344 | + | ||
345 | +[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false}, | ||
346 | +{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680}, | ||
347 | +{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128}, | ||
348 | +{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576}, | ||
349 | +{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024}, | ||
350 | +{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008}, | ||
351 | +{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] | ||
352 | + | ||
353 | +=== preallocation=falloc === | ||
354 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
355 | + | ||
356 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
357 | + | ||
358 | +wrote 65536/65536 bytes at offset 9437184 | ||
359 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
360 | + | ||
361 | +Image resized. | ||
362 | + | ||
363 | +read 65536/65536 bytes at offset 9437184 | ||
364 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
365 | + | ||
366 | +5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0) | ||
367 | +10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) | ||
368 | + | ||
369 | +[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, | ||
370 | +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] | ||
371 | + | ||
372 | +=== preallocation=full === | ||
373 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
374 | + | ||
375 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
376 | + | ||
377 | +wrote 65536/65536 bytes at offset 11534336 | ||
378 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
379 | + | ||
380 | +Image resized. | ||
381 | + | ||
382 | +read 65536/65536 bytes at offset 11534336 | ||
383 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
384 | + | ||
385 | +8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0) | ||
386 | +4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) | ||
387 | + | ||
388 | +[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, | ||
389 | +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] | ||
390 | + | ||
391 | +=== preallocation=off === | ||
392 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
393 | + | ||
394 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
395 | + | ||
396 | +wrote 65536/65536 bytes at offset 259072 | ||
397 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
398 | + | ||
399 | +Image resized. | ||
400 | + | ||
401 | +read 65536/65536 bytes at offset 259072 | ||
402 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
403 | + | ||
404 | +192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0) | ||
405 | +320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000) | ||
406 | + | ||
407 | +[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false}, | ||
408 | +{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680}, | ||
409 | +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] | ||
410 | + | ||
411 | +=== preallocation=off === | ||
412 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
413 | + | ||
414 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
415 | + | ||
416 | +wrote 65536/65536 bytes at offset 344064 | ||
417 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
418 | + | ||
419 | +Image resized. | ||
420 | + | ||
421 | +read 65536/65536 bytes at offset 344064 | ||
422 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
423 | + | ||
424 | +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) | ||
425 | +256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000) | ||
426 | + | ||
427 | +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, | ||
428 | +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] | ||
429 | + | ||
430 | +=== preallocation=off === | ||
431 | +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
432 | + | ||
433 | +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
434 | + | ||
435 | +wrote 65536/65536 bytes at offset 446464 | ||
436 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
437 | + | ||
438 | +Image resized. | ||
439 | + | ||
440 | +read 65536/65536 bytes at offset 446464 | ||
441 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
442 | + | ||
443 | +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) | ||
444 | +244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000) | ||
445 | + | ||
446 | +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, | ||
447 | +{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}] | ||
448 | + | ||
449 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | ||
158 | index XXXXXXX..XXXXXXX 100644 | 450 | index XXXXXXX..XXXXXXX 100644 |
159 | --- a/tests/Makefile.include | 451 | --- a/tests/qemu-iotests/group |
160 | +++ b/tests/Makefile.include | 452 | +++ b/tests/qemu-iotests/group |
161 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c | 453 | @@ -XXX,XX +XXX,XX @@ |
162 | gcov-files-test-hbitmap-y = util/hbitmap.c | 454 | 270 rw backing quick |
163 | check-unit-y += tests/test-hbitmap$(EXESUF) | 455 | 272 rw |
164 | gcov-files-test-hbitmap-y = blockjob.c | 456 | 273 backing quick |
165 | +check-unit-y += tests/test-bdrv-drain$(EXESUF) | 457 | +274 rw backing |
166 | check-unit-y += tests/test-blockjob$(EXESUF) | 458 | 277 rw quick |
167 | check-unit-y += tests/test-blockjob-txn$(EXESUF) | 459 | 279 rw backing quick |
168 | check-unit-y += tests/test-x86-cpuid$(EXESUF) | 460 | 280 rw migration quick |
169 | @@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) | ||
170 | tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) | ||
171 | tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) | ||
172 | tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) | ||
173 | +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) | ||
174 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) | ||
175 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) | ||
176 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) | ||
177 | -- | 461 | -- |
178 | 2.13.6 | 462 | 2.25.3 |
179 | 463 | ||
180 | 464 | diff view generated by jsdifflib |
1 | From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> | 1 | The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the |
---|---|---|---|
2 | image is possibly preallocated and then the zero flag is added to all | ||
3 | clusters. This means that a copy-on-write operation may be needed when | ||
4 | writing to these clusters, despite having used preallocation, negating | ||
5 | one of the major benefits of preallocation. | ||
2 | 6 | ||
3 | Since bdrv_co_preadv does all neccessary checks including | 7 | Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver, |
4 | reading after the end of the backing file, avoid duplication | 8 | and if the protocol driver can ensure that the new area reads as zeros, |
5 | of verification before bdrv_co_preadv call. | 9 | we can skip setting the zero flag in the qcow2 layer. |
6 | 10 | ||
7 | Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> | 11 | Unfortunately, the same approach doesn't work for metadata |
12 | preallocation, so we'll still set the zero flag there. | ||
13 | |||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
15 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
16 | Message-Id: <20200424142701.67053-1-kwolf@redhat.com> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 17 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 18 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 19 | --- |
12 | block/qcow2.h | 3 --- | 20 | block/qcow2.c | 22 +++++++++++++++++++--- |
13 | block/qcow2.c | 51 ++++++++------------------------------------------- | 21 | tests/qemu-iotests/274.out | 4 ++-- |
14 | 2 files changed, 8 insertions(+), 46 deletions(-) | 22 | 2 files changed, 21 insertions(+), 5 deletions(-) |
15 | 23 | ||
16 | diff --git a/block/qcow2.h b/block/qcow2.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/qcow2.h | ||
19 | +++ b/block/qcow2.h | ||
20 | @@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) | ||
21 | } | ||
22 | |||
23 | /* qcow2.c functions */ | ||
24 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, | ||
25 | - int64_t sector_num, int nb_sectors); | ||
26 | - | ||
27 | int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, | ||
28 | int refcount_order, bool generous_increase, | ||
29 | uint64_t *refblock_count); | ||
30 | diff --git a/block/qcow2.c b/block/qcow2.c | 24 | diff --git a/block/qcow2.c b/block/qcow2.c |
31 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/block/qcow2.c | 26 | --- a/block/qcow2.c |
33 | +++ b/block/qcow2.c | 27 | +++ b/block/qcow2.c |
34 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, | 28 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, |
35 | return status; | 29 | /* Allocate the data area */ |
36 | } | 30 | new_file_size = allocation_start + |
37 | 31 | nb_new_data_clusters * s->cluster_size; | |
38 | -/* handle reading after the end of the backing file */ | 32 | - /* Image file grows, so @exact does not matter */ |
39 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, | 33 | - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, |
40 | - int64_t offset, int bytes) | 34 | - errp); |
41 | -{ | 35 | + /* |
42 | - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; | 36 | + * Image file grows, so @exact does not matter. |
43 | - int n1; | 37 | + * |
44 | - | 38 | + * If we need to zero out the new area, try first whether the protocol |
45 | - if ((offset + bytes) <= bs_size) { | 39 | + * driver can already take care of this. |
46 | - return bytes; | 40 | + */ |
47 | - } | 41 | + if (flags & BDRV_REQ_ZERO_WRITE) { |
48 | - | 42 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, |
49 | - if (offset >= bs_size) { | 43 | + BDRV_REQ_ZERO_WRITE, NULL); |
50 | - n1 = 0; | 44 | + if (ret >= 0) { |
51 | - } else { | 45 | + flags &= ~BDRV_REQ_ZERO_WRITE; |
52 | - n1 = bs_size - offset; | 46 | + } |
53 | - } | 47 | + } else { |
54 | - | 48 | + ret = -1; |
55 | - qemu_iovec_memset(qiov, n1, 0, bytes - n1); | 49 | + } |
56 | - | 50 | + if (ret < 0) { |
57 | - return n1; | 51 | + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, |
58 | -} | 52 | + errp); |
59 | - | 53 | + } |
60 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | 54 | if (ret < 0) { |
61 | uint64_t bytes, QEMUIOVector *qiov, | 55 | error_prepend(errp, "Failed to resize underlying file: "); |
62 | int flags) | 56 | qcow2_free_clusters(bs, allocation_start, |
63 | { | 57 | diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out |
64 | BDRVQcow2State *s = bs->opaque; | 58 | index XXXXXXX..XXXXXXX 100644 |
65 | - int offset_in_cluster, n1; | 59 | --- a/tests/qemu-iotests/274.out |
66 | + int offset_in_cluster; | 60 | +++ b/tests/qemu-iotests/274.out |
67 | int ret; | 61 | @@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184 |
68 | unsigned int cur_bytes; /* number of bytes in current iteration */ | 62 | 10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) |
69 | uint64_t cluster_offset = 0; | 63 | |
70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | 64 | [{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, |
71 | case QCOW2_CLUSTER_UNALLOCATED: | 65 | -{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] |
72 | 66 | +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] | |
73 | if (bs->backing) { | 67 | |
74 | - /* read from the base image */ | 68 | === preallocation=full === |
75 | - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, | 69 | Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
76 | - offset, cur_bytes); | 70 | @@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336 |
77 | - if (n1 > 0) { | 71 | 4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) |
78 | - QEMUIOVector local_qiov; | 72 | |
79 | - | 73 | [{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, |
80 | - qemu_iovec_init(&local_qiov, hd_qiov.niov); | 74 | -{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] |
81 | - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); | 75 | +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] |
82 | - | 76 | |
83 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | 77 | === preallocation=off === |
84 | - qemu_co_mutex_unlock(&s->lock); | 78 | Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 |
85 | - ret = bdrv_co_preadv(bs->backing, offset, n1, | ||
86 | - &local_qiov, 0); | ||
87 | - qemu_co_mutex_lock(&s->lock); | ||
88 | - | ||
89 | - qemu_iovec_destroy(&local_qiov); | ||
90 | - | ||
91 | - if (ret < 0) { | ||
92 | - goto fail; | ||
93 | - } | ||
94 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
95 | + qemu_co_mutex_unlock(&s->lock); | ||
96 | + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, | ||
97 | + &hd_qiov, 0); | ||
98 | + qemu_co_mutex_lock(&s->lock); | ||
99 | + if (ret < 0) { | ||
100 | + goto fail; | ||
101 | } | ||
102 | } else { | ||
103 | /* Note: in this case, no need to wait */ | ||
104 | -- | 79 | -- |
105 | 2.13.6 | 80 | 2.25.3 |
106 | 81 | ||
107 | 82 | diff view generated by jsdifflib |
1 | From: Doug Gale <doug16k@gmail.com> | 1 | From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com> |
---|---|---|---|
2 | 2 | ||
3 | Add trace output for commands, errors, and undefined behavior. | 3 | This patch introduces support for PMR that has been defined as part of NVMe 1.4 |
4 | Add guest error log output for undefined behavior. | 4 | spec. User can now specify a pmrdev option that should point to HostMemoryBackend. |
5 | Report invalid undefined accesses to MMIO. | 5 | pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe |
6 | Annotate unlikely error checks with unlikely. | 6 | device. Guest OS can perform mmio read and writes to the PMR region that will stay |
7 | persistent across system reboot. | ||
7 | 8 | ||
8 | Signed-off-by: Doug Gale <doug16k@gmail.com> | 9 | Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com> |
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 10 | Reviewed-by: Klaus Jensen <k.jensen@samsung.com> |
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com> | ||
13 | Reviewed-by: Keith Busch <kbusch@kernel.org> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 15 | --- |
13 | hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++-------- | 16 | hw/block/nvme.h | 2 + |
14 | hw/block/trace-events | 93 ++++++++++++++ | 17 | include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++ |
15 | 2 files changed, 390 insertions(+), 52 deletions(-) | 18 | hw/block/nvme.c | 109 ++++++++++++++++++++++++++ |
19 | hw/block/Makefile.objs | 2 +- | ||
20 | hw/block/trace-events | 4 + | ||
21 | 5 files changed, 288 insertions(+), 1 deletion(-) | ||
16 | 22 | ||
23 | diff --git a/hw/block/nvme.h b/hw/block/nvme.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/hw/block/nvme.h | ||
26 | +++ b/hw/block/nvme.h | ||
27 | @@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl { | ||
28 | uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ | ||
29 | |||
30 | char *serial; | ||
31 | + HostMemoryBackend *pmrdev; | ||
32 | + | ||
33 | NvmeNamespace *namespaces; | ||
34 | NvmeSQueue **sq; | ||
35 | NvmeCQueue **cq; | ||
36 | diff --git a/include/block/nvme.h b/include/block/nvme.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/include/block/nvme.h | ||
39 | +++ b/include/block/nvme.h | ||
40 | @@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar { | ||
41 | uint64_t acq; | ||
42 | uint32_t cmbloc; | ||
43 | uint32_t cmbsz; | ||
44 | + uint8_t padding[3520]; /* not used by QEMU */ | ||
45 | + uint32_t pmrcap; | ||
46 | + uint32_t pmrctl; | ||
47 | + uint32_t pmrsts; | ||
48 | + uint32_t pmrebs; | ||
49 | + uint32_t pmrswtp; | ||
50 | + uint32_t pmrmsc; | ||
51 | } NvmeBar; | ||
52 | |||
53 | enum NvmeCapShift { | ||
54 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapShift { | ||
55 | CAP_CSS_SHIFT = 37, | ||
56 | CAP_MPSMIN_SHIFT = 48, | ||
57 | CAP_MPSMAX_SHIFT = 52, | ||
58 | + CAP_PMR_SHIFT = 56, | ||
59 | }; | ||
60 | |||
61 | enum NvmeCapMask { | ||
62 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapMask { | ||
63 | CAP_CSS_MASK = 0xff, | ||
64 | CAP_MPSMIN_MASK = 0xf, | ||
65 | CAP_MPSMAX_MASK = 0xf, | ||
66 | + CAP_PMR_MASK = 0x1, | ||
67 | }; | ||
68 | |||
69 | #define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) | ||
70 | @@ -XXX,XX +XXX,XX @@ enum NvmeCapMask { | ||
71 | << CAP_MPSMIN_SHIFT) | ||
72 | #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ | ||
73 | << CAP_MPSMAX_SHIFT) | ||
74 | +#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\ | ||
75 | + << CAP_PMR_SHIFT) | ||
76 | |||
77 | enum NvmeCcShift { | ||
78 | CC_EN_SHIFT = 0, | ||
79 | @@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask { | ||
80 | #define NVME_CMBSZ_GETSIZE(cmbsz) \ | ||
81 | (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) | ||
82 | |||
83 | +enum NvmePmrcapShift { | ||
84 | + PMRCAP_RDS_SHIFT = 3, | ||
85 | + PMRCAP_WDS_SHIFT = 4, | ||
86 | + PMRCAP_BIR_SHIFT = 5, | ||
87 | + PMRCAP_PMRTU_SHIFT = 8, | ||
88 | + PMRCAP_PMRWBM_SHIFT = 10, | ||
89 | + PMRCAP_PMRTO_SHIFT = 16, | ||
90 | + PMRCAP_CMSS_SHIFT = 24, | ||
91 | +}; | ||
92 | + | ||
93 | +enum NvmePmrcapMask { | ||
94 | + PMRCAP_RDS_MASK = 0x1, | ||
95 | + PMRCAP_WDS_MASK = 0x1, | ||
96 | + PMRCAP_BIR_MASK = 0x7, | ||
97 | + PMRCAP_PMRTU_MASK = 0x3, | ||
98 | + PMRCAP_PMRWBM_MASK = 0xf, | ||
99 | + PMRCAP_PMRTO_MASK = 0xff, | ||
100 | + PMRCAP_CMSS_MASK = 0x1, | ||
101 | +}; | ||
102 | + | ||
103 | +#define NVME_PMRCAP_RDS(pmrcap) \ | ||
104 | + ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK) | ||
105 | +#define NVME_PMRCAP_WDS(pmrcap) \ | ||
106 | + ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK) | ||
107 | +#define NVME_PMRCAP_BIR(pmrcap) \ | ||
108 | + ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK) | ||
109 | +#define NVME_PMRCAP_PMRTU(pmrcap) \ | ||
110 | + ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK) | ||
111 | +#define NVME_PMRCAP_PMRWBM(pmrcap) \ | ||
112 | + ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK) | ||
113 | +#define NVME_PMRCAP_PMRTO(pmrcap) \ | ||
114 | + ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK) | ||
115 | +#define NVME_PMRCAP_CMSS(pmrcap) \ | ||
116 | + ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK) | ||
117 | + | ||
118 | +#define NVME_PMRCAP_SET_RDS(pmrcap, val) \ | ||
119 | + (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT) | ||
120 | +#define NVME_PMRCAP_SET_WDS(pmrcap, val) \ | ||
121 | + (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT) | ||
122 | +#define NVME_PMRCAP_SET_BIR(pmrcap, val) \ | ||
123 | + (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT) | ||
124 | +#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \ | ||
125 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT) | ||
126 | +#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \ | ||
127 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT) | ||
128 | +#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \ | ||
129 | + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT) | ||
130 | +#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \ | ||
131 | + (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT) | ||
132 | + | ||
133 | +enum NvmePmrctlShift { | ||
134 | + PMRCTL_EN_SHIFT = 0, | ||
135 | +}; | ||
136 | + | ||
137 | +enum NvmePmrctlMask { | ||
138 | + PMRCTL_EN_MASK = 0x1, | ||
139 | +}; | ||
140 | + | ||
141 | +#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK) | ||
142 | + | ||
143 | +#define NVME_PMRCTL_SET_EN(pmrctl, val) \ | ||
144 | + (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT) | ||
145 | + | ||
146 | +enum NvmePmrstsShift { | ||
147 | + PMRSTS_ERR_SHIFT = 0, | ||
148 | + PMRSTS_NRDY_SHIFT = 8, | ||
149 | + PMRSTS_HSTS_SHIFT = 9, | ||
150 | + PMRSTS_CBAI_SHIFT = 12, | ||
151 | +}; | ||
152 | + | ||
153 | +enum NvmePmrstsMask { | ||
154 | + PMRSTS_ERR_MASK = 0xff, | ||
155 | + PMRSTS_NRDY_MASK = 0x1, | ||
156 | + PMRSTS_HSTS_MASK = 0x7, | ||
157 | + PMRSTS_CBAI_MASK = 0x1, | ||
158 | +}; | ||
159 | + | ||
160 | +#define NVME_PMRSTS_ERR(pmrsts) \ | ||
161 | + ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK) | ||
162 | +#define NVME_PMRSTS_NRDY(pmrsts) \ | ||
163 | + ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK) | ||
164 | +#define NVME_PMRSTS_HSTS(pmrsts) \ | ||
165 | + ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK) | ||
166 | +#define NVME_PMRSTS_CBAI(pmrsts) \ | ||
167 | + ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK) | ||
168 | + | ||
169 | +#define NVME_PMRSTS_SET_ERR(pmrsts, val) \ | ||
170 | + (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT) | ||
171 | +#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \ | ||
172 | + (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT) | ||
173 | +#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \ | ||
174 | + (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT) | ||
175 | +#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \ | ||
176 | + (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT) | ||
177 | + | ||
178 | +enum NvmePmrebsShift { | ||
179 | + PMREBS_PMRSZU_SHIFT = 0, | ||
180 | + PMREBS_RBB_SHIFT = 4, | ||
181 | + PMREBS_PMRWBZ_SHIFT = 8, | ||
182 | +}; | ||
183 | + | ||
184 | +enum NvmePmrebsMask { | ||
185 | + PMREBS_PMRSZU_MASK = 0xf, | ||
186 | + PMREBS_RBB_MASK = 0x1, | ||
187 | + PMREBS_PMRWBZ_MASK = 0xffffff, | ||
188 | +}; | ||
189 | + | ||
190 | +#define NVME_PMREBS_PMRSZU(pmrebs) \ | ||
191 | + ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK) | ||
192 | +#define NVME_PMREBS_RBB(pmrebs) \ | ||
193 | + ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK) | ||
194 | +#define NVME_PMREBS_PMRWBZ(pmrebs) \ | ||
195 | + ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK) | ||
196 | + | ||
197 | +#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \ | ||
198 | + (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT) | ||
199 | +#define NVME_PMREBS_SET_RBB(pmrebs, val) \ | ||
200 | + (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT) | ||
201 | +#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \ | ||
202 | + (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT) | ||
203 | + | ||
204 | +enum NvmePmrswtpShift { | ||
205 | + PMRSWTP_PMRSWTU_SHIFT = 0, | ||
206 | + PMRSWTP_PMRSWTV_SHIFT = 8, | ||
207 | +}; | ||
208 | + | ||
209 | +enum NvmePmrswtpMask { | ||
210 | + PMRSWTP_PMRSWTU_MASK = 0xf, | ||
211 | + PMRSWTP_PMRSWTV_MASK = 0xffffff, | ||
212 | +}; | ||
213 | + | ||
214 | +#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \ | ||
215 | + ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK) | ||
216 | +#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \ | ||
217 | + ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK) | ||
218 | + | ||
219 | +#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \ | ||
220 | + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT) | ||
221 | +#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ | ||
222 | + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) | ||
223 | + | ||
224 | +enum NvmePmrmscShift { | ||
225 | + PMRMSC_CMSE_SHIFT = 1, | ||
226 | + PMRMSC_CBA_SHIFT = 12, | ||
227 | +}; | ||
228 | + | ||
229 | +enum NvmePmrmscMask { | ||
230 | + PMRMSC_CMSE_MASK = 0x1, | ||
231 | + PMRMSC_CBA_MASK = 0xfffffffffffff, | ||
232 | +}; | ||
233 | + | ||
234 | +#define NVME_PMRMSC_CMSE(pmrmsc) \ | ||
235 | + ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) | ||
236 | +#define NVME_PMRMSC_CBA(pmrmsc) \ | ||
237 | + ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) | ||
238 | + | ||
239 | +#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ | ||
240 | + (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) | ||
241 | +#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ | ||
242 | + (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) | ||
243 | + | ||
244 | typedef struct NvmeCmd { | ||
245 | uint8_t opcode; | ||
246 | uint8_t fuse; | ||
17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c | 247 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c |
18 | index XXXXXXX..XXXXXXX 100644 | 248 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/block/nvme.c | 249 | --- a/hw/block/nvme.c |
20 | +++ b/hw/block/nvme.c | 250 | +++ b/hw/block/nvme.c |
21 | @@ -XXX,XX +XXX,XX @@ | 251 | @@ -XXX,XX +XXX,XX @@ |
252 | * -drive file=<file>,if=none,id=<drive_id> | ||
253 | * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ | ||
254 | * cmb_size_mb=<cmb_size_mb[optional]>, \ | ||
255 | + * [pmrdev=<mem_backend_file_id>,] \ | ||
256 | * num_queues=<N[optional]> | ||
257 | * | ||
258 | * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at | ||
259 | * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. | ||
260 | + * | ||
261 | + * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation | ||
262 | + * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when | ||
263 | + * both provided. | ||
264 | + * Enabling pmr emulation can be achieved by pointing to memory-backend-file. | ||
265 | + * For example: | ||
266 | + * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \ | ||
267 | + * size=<size> .... -device nvme,...,pmrdev=<mem_id> | ||
268 | */ | ||
269 | |||
270 | #include "qemu/osdep.h" | ||
271 | @@ -XXX,XX +XXX,XX @@ | ||
272 | #include "sysemu/sysemu.h" | ||
273 | #include "qapi/error.h" | ||
22 | #include "qapi/visitor.h" | 274 | #include "qapi/visitor.h" |
275 | +#include "sysemu/hostmem.h" | ||
23 | #include "sysemu/block-backend.h" | 276 | #include "sysemu/block-backend.h" |
24 | 277 | +#include "exec/ram_addr.h" | |
25 | +#include "qemu/log.h" | 278 | |
26 | +#include "trace.h" | 279 | #include "qemu/log.h" |
27 | #include "nvme.h" | 280 | #include "qemu/module.h" |
28 | 281 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | |
29 | +#define NVME_GUEST_ERR(trace, fmt, ...) \ | 282 | NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, |
30 | + do { \ | 283 | "invalid write to read only CMBSZ, ignored"); |
31 | + (trace_##trace)(__VA_ARGS__); \ | 284 | return; |
32 | + qemu_log_mask(LOG_GUEST_ERROR, #trace \ | 285 | + case 0xE00: /* PMRCAP */ |
33 | + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ | 286 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, |
34 | + } while (0) | 287 | + "invalid write to PMRCAP register, ignored"); |
35 | + | 288 | + return; |
36 | static void nvme_process_sq(void *opaque); | 289 | + case 0xE04: /* TODO PMRCTL */ |
37 | 290 | + break; | |
38 | static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) | 291 | + case 0xE08: /* PMRSTS */ |
39 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) | 292 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, |
40 | { | 293 | + "invalid write to PMRSTS register, ignored"); |
41 | if (cq->irq_enabled) { | 294 | + return; |
42 | if (msix_enabled(&(n->parent_obj))) { | 295 | + case 0xE0C: /* PMREBS */ |
43 | + trace_nvme_irq_msix(cq->vector); | 296 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, |
44 | msix_notify(&(n->parent_obj), cq->vector); | 297 | + "invalid write to PMREBS register, ignored"); |
45 | } else { | 298 | + return; |
46 | + trace_nvme_irq_pin(); | 299 | + case 0xE10: /* PMRSWTP */ |
47 | pci_irq_pulse(&n->parent_obj); | 300 | + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, |
48 | } | 301 | + "invalid write to PMRSWTP register, ignored"); |
49 | + } else { | 302 | + return; |
50 | + trace_nvme_irq_masked(); | 303 | + case 0xE14: /* TODO PMRMSC */ |
304 | + break; | ||
305 | default: | ||
306 | NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
307 | "invalid MMIO write," | ||
308 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
51 | } | 309 | } |
52 | } | 310 | |
53 | 311 | if (addr < sizeof(n->bar)) { | |
54 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | 312 | + /* |
55 | trans_len = MIN(len, trans_len); | 313 | + * When PMRWBM bit 1 is set then read from |
56 | int num_prps = (len >> n->page_bits) + 1; | 314 | + * from PMRSTS should ensure prior writes |
57 | 315 | + * made it to persistent media | |
58 | - if (!prp1) { | 316 | + */ |
59 | + if (unlikely(!prp1)) { | 317 | + if (addr == 0xE08 && |
60 | + trace_nvme_err_invalid_prp(); | 318 | + (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { |
61 | return NVME_INVALID_FIELD | NVME_DNR; | 319 | + qemu_ram_writeback(n->pmrdev->mr.ram_block, |
62 | } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && | 320 | + 0, n->pmrdev->size); |
63 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { | 321 | + } |
64 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | 322 | memcpy(&val, ptr + addr, size); |
65 | } | ||
66 | len -= trans_len; | ||
67 | if (len) { | ||
68 | - if (!prp2) { | ||
69 | + if (unlikely(!prp2)) { | ||
70 | + trace_nvme_err_invalid_prp2_missing(); | ||
71 | goto unmap; | ||
72 | } | ||
73 | if (len > n->page_size) { | ||
74 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); | ||
76 | |||
77 | if (i == n->max_prp_ents - 1 && len > n->page_size) { | ||
78 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
79 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
80 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
81 | goto unmap; | ||
82 | } | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
85 | prp_ent = le64_to_cpu(prp_list[i]); | ||
86 | } | ||
87 | |||
88 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
89 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
90 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
91 | goto unmap; | ||
92 | } | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
95 | i++; | ||
96 | } | ||
97 | } else { | ||
98 | - if (prp2 & (n->page_size - 1)) { | ||
99 | + if (unlikely(prp2 & (n->page_size - 1))) { | ||
100 | + trace_nvme_err_invalid_prp2_align(prp2); | ||
101 | goto unmap; | ||
102 | } | ||
103 | if (qsg->nsg) { | ||
104 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, | ||
105 | QEMUIOVector iov; | ||
106 | uint16_t status = NVME_SUCCESS; | ||
107 | |||
108 | + trace_nvme_dma_read(prp1, prp2); | ||
109 | + | ||
110 | if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { | ||
111 | return NVME_INVALID_FIELD | NVME_DNR; | ||
112 | } | ||
113 | if (qsg.nsg > 0) { | ||
114 | - if (dma_buf_read(ptr, len, &qsg)) { | ||
115 | + if (unlikely(dma_buf_read(ptr, len, &qsg))) { | ||
116 | + trace_nvme_err_invalid_dma(); | ||
117 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
118 | } | ||
119 | qemu_sglist_destroy(&qsg); | ||
120 | } else { | 323 | } else { |
121 | - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { | 324 | NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, |
122 | + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { | 325 | @@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) |
123 | + trace_nvme_err_invalid_dma(); | 326 | error_setg(errp, "serial property not set"); |
124 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
125 | } | ||
126 | qemu_iovec_destroy(&iov); | ||
127 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
128 | uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); | ||
129 | uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); | ||
130 | |||
131 | - if (slba + nlb > ns->id_ns.nsze) { | ||
132 | + if (unlikely(slba + nlb > ns->id_ns.nsze)) { | ||
133 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
134 | return NVME_LBA_RANGE | NVME_DNR; | ||
135 | } | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
138 | int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; | ||
139 | enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; | ||
140 | |||
141 | - if ((slba + nlb) > ns->id_ns.nsze) { | ||
142 | + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); | ||
143 | + | ||
144 | + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { | ||
145 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); | ||
146 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
147 | return NVME_LBA_RANGE | NVME_DNR; | ||
148 | } | ||
149 | |||
150 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
151 | NvmeNamespace *ns; | ||
152 | uint32_t nsid = le32_to_cpu(cmd->nsid); | ||
153 | |||
154 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
155 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
156 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
157 | return NVME_INVALID_NSID | NVME_DNR; | ||
158 | } | ||
159 | |||
160 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
161 | case NVME_CMD_READ: | ||
162 | return nvme_rw(n, ns, cmd, req); | ||
163 | default: | ||
164 | + trace_nvme_err_invalid_opc(cmd->opcode); | ||
165 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
166 | } | ||
167 | } | ||
168 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
169 | NvmeCQueue *cq; | ||
170 | uint16_t qid = le16_to_cpu(c->qid); | ||
171 | |||
172 | - if (!qid || nvme_check_sqid(n, qid)) { | ||
173 | + if (unlikely(!qid || nvme_check_sqid(n, qid))) { | ||
174 | + trace_nvme_err_invalid_del_sq(qid); | ||
175 | return NVME_INVALID_QID | NVME_DNR; | ||
176 | } | ||
177 | |||
178 | + trace_nvme_del_sq(qid); | ||
179 | + | ||
180 | sq = n->sq[qid]; | ||
181 | while (!QTAILQ_EMPTY(&sq->out_req_list)) { | ||
182 | req = QTAILQ_FIRST(&sq->out_req_list); | ||
183 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
184 | uint16_t qflags = le16_to_cpu(c->sq_flags); | ||
185 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
186 | |||
187 | - if (!cqid || nvme_check_cqid(n, cqid)) { | ||
188 | + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); | ||
189 | + | ||
190 | + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { | ||
191 | + trace_nvme_err_invalid_create_sq_cqid(cqid); | ||
192 | return NVME_INVALID_CQID | NVME_DNR; | ||
193 | } | ||
194 | - if (!sqid || !nvme_check_sqid(n, sqid)) { | ||
195 | + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { | ||
196 | + trace_nvme_err_invalid_create_sq_sqid(sqid); | ||
197 | return NVME_INVALID_QID | NVME_DNR; | ||
198 | } | ||
199 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
200 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
201 | + trace_nvme_err_invalid_create_sq_size(qsize); | ||
202 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
203 | } | ||
204 | - if (!prp1 || prp1 & (n->page_size - 1)) { | ||
205 | + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { | ||
206 | + trace_nvme_err_invalid_create_sq_addr(prp1); | ||
207 | return NVME_INVALID_FIELD | NVME_DNR; | ||
208 | } | ||
209 | - if (!(NVME_SQ_FLAGS_PC(qflags))) { | ||
210 | + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { | ||
211 | + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); | ||
212 | return NVME_INVALID_FIELD | NVME_DNR; | ||
213 | } | ||
214 | sq = g_malloc0(sizeof(*sq)); | ||
215 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
216 | NvmeCQueue *cq; | ||
217 | uint16_t qid = le16_to_cpu(c->qid); | ||
218 | |||
219 | - if (!qid || nvme_check_cqid(n, qid)) { | ||
220 | + if (unlikely(!qid || nvme_check_cqid(n, qid))) { | ||
221 | + trace_nvme_err_invalid_del_cq_cqid(qid); | ||
222 | return NVME_INVALID_CQID | NVME_DNR; | ||
223 | } | ||
224 | |||
225 | cq = n->cq[qid]; | ||
226 | - if (!QTAILQ_EMPTY(&cq->sq_list)) { | ||
227 | + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { | ||
228 | + trace_nvme_err_invalid_del_cq_notempty(qid); | ||
229 | return NVME_INVALID_QUEUE_DEL; | ||
230 | } | ||
231 | + trace_nvme_del_cq(qid); | ||
232 | nvme_free_cq(cq, n); | ||
233 | return NVME_SUCCESS; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
236 | uint16_t qflags = le16_to_cpu(c->cq_flags); | ||
237 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
238 | |||
239 | - if (!cqid || !nvme_check_cqid(n, cqid)) { | ||
240 | + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, | ||
241 | + NVME_CQ_FLAGS_IEN(qflags) != 0); | ||
242 | + | ||
243 | + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { | ||
244 | + trace_nvme_err_invalid_create_cq_cqid(cqid); | ||
245 | return NVME_INVALID_CQID | NVME_DNR; | ||
246 | } | ||
247 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
248 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
249 | + trace_nvme_err_invalid_create_cq_size(qsize); | ||
250 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
251 | } | ||
252 | - if (!prp1) { | ||
253 | + if (unlikely(!prp1)) { | ||
254 | + trace_nvme_err_invalid_create_cq_addr(prp1); | ||
255 | return NVME_INVALID_FIELD | NVME_DNR; | ||
256 | } | ||
257 | - if (vector > n->num_queues) { | ||
258 | + if (unlikely(vector > n->num_queues)) { | ||
259 | + trace_nvme_err_invalid_create_cq_vector(vector); | ||
260 | return NVME_INVALID_IRQ_VECTOR | NVME_DNR; | ||
261 | } | ||
262 | - if (!(NVME_CQ_FLAGS_PC(qflags))) { | ||
263 | + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { | ||
264 | + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); | ||
265 | return NVME_INVALID_FIELD | NVME_DNR; | ||
266 | } | ||
267 | |||
268 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) | ||
269 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
270 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
271 | |||
272 | + trace_nvme_identify_ctrl(); | ||
273 | + | ||
274 | return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), | ||
275 | prp1, prp2); | ||
276 | } | ||
277 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) | ||
278 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
279 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
280 | |||
281 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
282 | + trace_nvme_identify_ns(nsid); | ||
283 | + | ||
284 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
285 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
286 | return NVME_INVALID_NSID | NVME_DNR; | ||
287 | } | ||
288 | |||
289 | ns = &n->namespaces[nsid - 1]; | ||
290 | + | ||
291 | return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), | ||
292 | prp1, prp2); | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) | ||
295 | uint16_t ret; | ||
296 | int i, j = 0; | ||
297 | |||
298 | + trace_nvme_identify_nslist(min_nsid); | ||
299 | + | ||
300 | list = g_malloc0(data_len); | ||
301 | for (i = 0; i < n->num_namespaces; i++) { | ||
302 | if (i < min_nsid) { | ||
303 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) | ||
304 | case 0x02: | ||
305 | return nvme_identify_nslist(n, c); | ||
306 | default: | ||
307 | + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); | ||
308 | return NVME_INVALID_FIELD | NVME_DNR; | ||
309 | } | ||
310 | } | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
312 | switch (dw10) { | ||
313 | case NVME_VOLATILE_WRITE_CACHE: | ||
314 | result = blk_enable_write_cache(n->conf.blk); | ||
315 | + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); | ||
316 | break; | ||
317 | case NVME_NUMBER_OF_QUEUES: | ||
318 | result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
319 | + trace_nvme_getfeat_numq(result); | ||
320 | break; | ||
321 | default: | ||
322 | + trace_nvme_err_invalid_getfeat(dw10); | ||
323 | return NVME_INVALID_FIELD | NVME_DNR; | ||
324 | } | ||
325 | |||
326 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
327 | blk_set_enable_write_cache(n->conf.blk, dw11 & 1); | ||
328 | break; | ||
329 | case NVME_NUMBER_OF_QUEUES: | ||
330 | + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, | ||
331 | + ((dw11 >> 16) & 0xFFFF) + 1, | ||
332 | + n->num_queues - 1, n->num_queues - 1); | ||
333 | req->cqe.result = | ||
334 | cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
335 | break; | ||
336 | default: | ||
337 | + trace_nvme_err_invalid_setfeat(dw10); | ||
338 | return NVME_INVALID_FIELD | NVME_DNR; | ||
339 | } | ||
340 | return NVME_SUCCESS; | ||
341 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
342 | case NVME_ADM_CMD_GET_FEATURES: | ||
343 | return nvme_get_feature(n, cmd, req); | ||
344 | default: | ||
345 | + trace_nvme_err_invalid_admin_opc(cmd->opcode); | ||
346 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
347 | } | ||
348 | } | ||
349 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
350 | uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; | ||
351 | uint32_t page_size = 1 << page_bits; | ||
352 | |||
353 | - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || | ||
354 | - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || | ||
355 | - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || | ||
356 | - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || | ||
357 | - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || | ||
358 | - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || | ||
359 | - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || | ||
360 | - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || | ||
361 | - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { | ||
362 | + if (unlikely(n->cq[0])) { | ||
363 | + trace_nvme_err_startfail_cq(); | ||
364 | + return -1; | ||
365 | + } | ||
366 | + if (unlikely(n->sq[0])) { | ||
367 | + trace_nvme_err_startfail_sq(); | ||
368 | + return -1; | ||
369 | + } | ||
370 | + if (unlikely(!n->bar.asq)) { | ||
371 | + trace_nvme_err_startfail_nbarasq(); | ||
372 | + return -1; | ||
373 | + } | ||
374 | + if (unlikely(!n->bar.acq)) { | ||
375 | + trace_nvme_err_startfail_nbaracq(); | ||
376 | + return -1; | ||
377 | + } | ||
378 | + if (unlikely(n->bar.asq & (page_size - 1))) { | ||
379 | + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); | ||
380 | + return -1; | ||
381 | + } | ||
382 | + if (unlikely(n->bar.acq & (page_size - 1))) { | ||
383 | + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); | ||
384 | + return -1; | ||
385 | + } | ||
386 | + if (unlikely(NVME_CC_MPS(n->bar.cc) < | ||
387 | + NVME_CAP_MPSMIN(n->bar.cap))) { | ||
388 | + trace_nvme_err_startfail_page_too_small( | ||
389 | + NVME_CC_MPS(n->bar.cc), | ||
390 | + NVME_CAP_MPSMIN(n->bar.cap)); | ||
391 | + return -1; | ||
392 | + } | ||
393 | + if (unlikely(NVME_CC_MPS(n->bar.cc) > | ||
394 | + NVME_CAP_MPSMAX(n->bar.cap))) { | ||
395 | + trace_nvme_err_startfail_page_too_large( | ||
396 | + NVME_CC_MPS(n->bar.cc), | ||
397 | + NVME_CAP_MPSMAX(n->bar.cap)); | ||
398 | + return -1; | ||
399 | + } | ||
400 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < | ||
401 | + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { | ||
402 | + trace_nvme_err_startfail_cqent_too_small( | ||
403 | + NVME_CC_IOCQES(n->bar.cc), | ||
404 | + NVME_CTRL_CQES_MIN(n->bar.cap)); | ||
405 | + return -1; | ||
406 | + } | ||
407 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > | ||
408 | + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { | ||
409 | + trace_nvme_err_startfail_cqent_too_large( | ||
410 | + NVME_CC_IOCQES(n->bar.cc), | ||
411 | + NVME_CTRL_CQES_MAX(n->bar.cap)); | ||
412 | + return -1; | ||
413 | + } | ||
414 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < | ||
415 | + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { | ||
416 | + trace_nvme_err_startfail_sqent_too_small( | ||
417 | + NVME_CC_IOSQES(n->bar.cc), | ||
418 | + NVME_CTRL_SQES_MIN(n->bar.cap)); | ||
419 | + return -1; | ||
420 | + } | ||
421 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > | ||
422 | + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { | ||
423 | + trace_nvme_err_startfail_sqent_too_large( | ||
424 | + NVME_CC_IOSQES(n->bar.cc), | ||
425 | + NVME_CTRL_SQES_MAX(n->bar.cap)); | ||
426 | + return -1; | ||
427 | + } | ||
428 | + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { | ||
429 | + trace_nvme_err_startfail_asqent_sz_zero(); | ||
430 | + return -1; | ||
431 | + } | ||
432 | + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { | ||
433 | + trace_nvme_err_startfail_acqent_sz_zero(); | ||
434 | return -1; | ||
435 | } | ||
436 | |||
437 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
438 | static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
439 | unsigned size) | ||
440 | { | ||
441 | + if (unlikely(offset & (sizeof(uint32_t) - 1))) { | ||
442 | + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, | ||
443 | + "MMIO write not 32-bit aligned," | ||
444 | + " offset=0x%"PRIx64"", offset); | ||
445 | + /* should be ignored, fall through for now */ | ||
446 | + } | ||
447 | + | ||
448 | + if (unlikely(size < sizeof(uint32_t))) { | ||
449 | + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, | ||
450 | + "MMIO write smaller than 32-bits," | ||
451 | + " offset=0x%"PRIx64", size=%u", | ||
452 | + offset, size); | ||
453 | + /* should be ignored, fall through for now */ | ||
454 | + } | ||
455 | + | ||
456 | switch (offset) { | ||
457 | - case 0xc: | ||
458 | + case 0xc: /* INTMS */ | ||
459 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
460 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
461 | + "undefined access to interrupt mask set" | ||
462 | + " when MSI-X is enabled"); | ||
463 | + /* should be ignored, fall through for now */ | ||
464 | + } | ||
465 | n->bar.intms |= data & 0xffffffff; | ||
466 | n->bar.intmc = n->bar.intms; | ||
467 | + trace_nvme_mmio_intm_set(data & 0xffffffff, | ||
468 | + n->bar.intmc); | ||
469 | break; | ||
470 | - case 0x10: | ||
471 | + case 0x10: /* INTMC */ | ||
472 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
473 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
474 | + "undefined access to interrupt mask clr" | ||
475 | + " when MSI-X is enabled"); | ||
476 | + /* should be ignored, fall through for now */ | ||
477 | + } | ||
478 | n->bar.intms &= ~(data & 0xffffffff); | ||
479 | n->bar.intmc = n->bar.intms; | ||
480 | + trace_nvme_mmio_intm_clr(data & 0xffffffff, | ||
481 | + n->bar.intmc); | ||
482 | break; | ||
483 | - case 0x14: | ||
484 | + case 0x14: /* CC */ | ||
485 | + trace_nvme_mmio_cfg(data & 0xffffffff); | ||
486 | /* Windows first sends data, then sends enable bit */ | ||
487 | if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && | ||
488 | !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
490 | |||
491 | if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { | ||
492 | n->bar.cc = data; | ||
493 | - if (nvme_start_ctrl(n)) { | ||
494 | + if (unlikely(nvme_start_ctrl(n))) { | ||
495 | + trace_nvme_err_startfail(); | ||
496 | n->bar.csts = NVME_CSTS_FAILED; | ||
497 | } else { | ||
498 | + trace_nvme_mmio_start_success(); | ||
499 | n->bar.csts = NVME_CSTS_READY; | ||
500 | } | ||
501 | } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { | ||
502 | + trace_nvme_mmio_stopped(); | ||
503 | nvme_clear_ctrl(n); | ||
504 | n->bar.csts &= ~NVME_CSTS_READY; | ||
505 | } | ||
506 | if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { | ||
507 | - nvme_clear_ctrl(n); | ||
508 | - n->bar.cc = data; | ||
509 | - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
510 | + trace_nvme_mmio_shutdown_set(); | ||
511 | + nvme_clear_ctrl(n); | ||
512 | + n->bar.cc = data; | ||
513 | + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
514 | } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { | ||
515 | - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
516 | - n->bar.cc = data; | ||
517 | + trace_nvme_mmio_shutdown_cleared(); | ||
518 | + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
519 | + n->bar.cc = data; | ||
520 | + } | ||
521 | + break; | ||
522 | + case 0x1C: /* CSTS */ | ||
523 | + if (data & (1 << 4)) { | ||
524 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, | ||
525 | + "attempted to W1C CSTS.NSSRO" | ||
526 | + " but CAP.NSSRS is zero (not supported)"); | ||
527 | + } else if (data != 0) { | ||
528 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, | ||
529 | + "attempted to set a read only bit" | ||
530 | + " of controller status"); | ||
531 | + } | ||
532 | + break; | ||
533 | + case 0x20: /* NSSR */ | ||
534 | + if (data == 0x4E564D65) { | ||
535 | + trace_nvme_ub_mmiowr_ssreset_unsupported(); | ||
536 | + } else { | ||
537 | + /* The spec says that writes of other values have no effect */ | ||
538 | + return; | ||
539 | } | ||
540 | break; | ||
541 | - case 0x24: | ||
542 | + case 0x24: /* AQA */ | ||
543 | n->bar.aqa = data & 0xffffffff; | ||
544 | + trace_nvme_mmio_aqattr(data & 0xffffffff); | ||
545 | break; | ||
546 | - case 0x28: | ||
547 | + case 0x28: /* ASQ */ | ||
548 | n->bar.asq = data; | ||
549 | + trace_nvme_mmio_asqaddr(data); | ||
550 | break; | ||
551 | - case 0x2c: | ||
552 | + case 0x2c: /* ASQ hi */ | ||
553 | n->bar.asq |= data << 32; | ||
554 | + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); | ||
555 | break; | ||
556 | - case 0x30: | ||
557 | + case 0x30: /* ACQ */ | ||
558 | + trace_nvme_mmio_acqaddr(data); | ||
559 | n->bar.acq = data; | ||
560 | break; | ||
561 | - case 0x34: | ||
562 | + case 0x34: /* ACQ hi */ | ||
563 | n->bar.acq |= data << 32; | ||
564 | + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); | ||
565 | break; | ||
566 | + case 0x38: /* CMBLOC */ | ||
567 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, | ||
568 | + "invalid write to reserved CMBLOC" | ||
569 | + " when CMBSZ is zero, ignored"); | ||
570 | + return; | ||
571 | + case 0x3C: /* CMBSZ */ | ||
572 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
573 | + "invalid write to read only CMBSZ, ignored"); | ||
574 | + return; | ||
575 | default: | ||
576 | + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
577 | + "invalid MMIO write," | ||
578 | + " offset=0x%"PRIx64", data=%"PRIx64"", | ||
579 | + offset, data); | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
584 | uint8_t *ptr = (uint8_t *)&n->bar; | ||
585 | uint64_t val = 0; | ||
586 | |||
587 | + if (unlikely(addr & (sizeof(uint32_t) - 1))) { | ||
588 | + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, | ||
589 | + "MMIO read not 32-bit aligned," | ||
590 | + " offset=0x%"PRIx64"", addr); | ||
591 | + /* should RAZ, fall through for now */ | ||
592 | + } else if (unlikely(size < sizeof(uint32_t))) { | ||
593 | + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, | ||
594 | + "MMIO read smaller than 32-bits," | ||
595 | + " offset=0x%"PRIx64"", addr); | ||
596 | + /* should RAZ, fall through for now */ | ||
597 | + } | ||
598 | + | ||
599 | if (addr < sizeof(n->bar)) { | ||
600 | memcpy(&val, ptr + addr, size); | ||
601 | + } else { | ||
602 | + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
603 | + "MMIO read beyond last register," | ||
604 | + " offset=0x%"PRIx64", returning 0", addr); | ||
605 | } | ||
606 | + | ||
607 | return val; | ||
608 | } | ||
609 | |||
610 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
611 | { | ||
612 | uint32_t qid; | ||
613 | |||
614 | - if (addr & ((1 << 2) - 1)) { | ||
615 | + if (unlikely(addr & ((1 << 2) - 1))) { | ||
616 | + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, | ||
617 | + "doorbell write not 32-bit aligned," | ||
618 | + " offset=0x%"PRIx64", ignoring", addr); | ||
619 | return; | 327 | return; |
620 | } | 328 | } |
621 | 329 | + | |
622 | if (((addr - 0x1000) >> 2) & 1) { | 330 | + if (!n->cmb_size_mb && n->pmrdev) { |
623 | + /* Completion queue doorbell write */ | 331 | + if (host_memory_backend_is_mapped(n->pmrdev)) { |
624 | + | 332 | + char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); |
625 | uint16_t new_head = val & 0xffff; | 333 | + error_setg(errp, "can't use already busy memdev: %s", path); |
626 | int start_sqs; | 334 | + g_free(path); |
627 | NvmeCQueue *cq; | 335 | + return; |
628 | 336 | + } | |
629 | qid = (addr - (0x1000 + (1 << 2))) >> 3; | 337 | + |
630 | - if (nvme_check_cqid(n, qid)) { | 338 | + if (!is_power_of_2(n->pmrdev->size)) { |
631 | + if (unlikely(nvme_check_cqid(n, qid))) { | 339 | + error_setg(errp, "pmr backend size needs to be power of 2 in size"); |
632 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, | 340 | + return; |
633 | + "completion queue doorbell write" | 341 | + } |
634 | + " for nonexistent queue," | 342 | + |
635 | + " sqid=%"PRIu32", ignoring", qid); | 343 | + host_memory_backend_set_mapped(n->pmrdev, true); |
636 | return; | 344 | + } |
637 | } | 345 | + |
638 | 346 | blkconf_blocksizes(&n->conf); | |
639 | cq = n->cq[qid]; | 347 | if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), |
640 | - if (new_head >= cq->size) { | 348 | false, errp)) { |
641 | + if (unlikely(new_head >= cq->size)) { | 349 | @@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) |
642 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, | 350 | PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | |
643 | + "completion queue doorbell write value" | 351 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); |
644 | + " beyond queue size, sqid=%"PRIu32"," | 352 | |
645 | + " new_head=%"PRIu16", ignoring", | 353 | + } else if (n->pmrdev) { |
646 | + qid, new_head); | 354 | + /* Controller Capabilities register */ |
647 | return; | 355 | + NVME_CAP_SET_PMRS(n->bar.cap, 1); |
648 | } | 356 | + |
649 | 357 | + /* PMR Capabities register */ | |
650 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | 358 | + n->bar.pmrcap = 0; |
651 | nvme_isr_notify(n, cq); | 359 | + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); |
652 | } | 360 | + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); |
653 | } else { | 361 | + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); |
654 | + /* Submission queue doorbell write */ | 362 | + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); |
655 | + | 363 | + /* Turn on bit 1 support */ |
656 | uint16_t new_tail = val & 0xffff; | 364 | + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); |
657 | NvmeSQueue *sq; | 365 | + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); |
658 | 366 | + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); | |
659 | qid = (addr - 0x1000) >> 3; | 367 | + |
660 | - if (nvme_check_sqid(n, qid)) { | 368 | + /* PMR Control register */ |
661 | + if (unlikely(nvme_check_sqid(n, qid))) { | 369 | + n->bar.pmrctl = 0; |
662 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, | 370 | + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); |
663 | + "submission queue doorbell write" | 371 | + |
664 | + " for nonexistent queue," | 372 | + /* PMR Status register */ |
665 | + " sqid=%"PRIu32", ignoring", qid); | 373 | + n->bar.pmrsts = 0; |
666 | return; | 374 | + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); |
667 | } | 375 | + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); |
668 | 376 | + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); | |
669 | sq = n->sq[qid]; | 377 | + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); |
670 | - if (new_tail >= sq->size) { | 378 | + |
671 | + if (unlikely(new_tail >= sq->size)) { | 379 | + /* PMR Elasticity Buffer Size register */ |
672 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, | 380 | + n->bar.pmrebs = 0; |
673 | + "submission queue doorbell write value" | 381 | + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); |
674 | + " beyond queue size, sqid=%"PRIu32"," | 382 | + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); |
675 | + " new_tail=%"PRIu16", ignoring", | 383 | + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); |
676 | + qid, new_tail); | 384 | + |
677 | return; | 385 | + /* PMR Sustained Write Throughput register */ |
678 | } | 386 | + n->bar.pmrswtp = 0; |
679 | 387 | + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); | |
388 | + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); | ||
389 | + | ||
390 | + /* PMR Memory Space Control register */ | ||
391 | + n->bar.pmrmsc = 0; | ||
392 | + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); | ||
393 | + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); | ||
394 | + | ||
395 | + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), | ||
396 | + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | | ||
397 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); | ||
398 | } | ||
399 | |||
400 | for (i = 0; i < n->num_namespaces; i++) { | ||
401 | @@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev) | ||
402 | if (n->cmb_size_mb) { | ||
403 | g_free(n->cmbuf); | ||
404 | } | ||
405 | + | ||
406 | + if (n->pmrdev) { | ||
407 | + host_memory_backend_set_mapped(n->pmrdev, false); | ||
408 | + } | ||
409 | msix_uninit_exclusive_bar(pci_dev); | ||
410 | } | ||
411 | |||
412 | static Property nvme_props[] = { | ||
413 | DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), | ||
414 | + DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, | ||
415 | + HostMemoryBackend *), | ||
416 | DEFINE_PROP_STRING("serial", NvmeCtrl, serial), | ||
417 | DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), | ||
418 | DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), | ||
419 | diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs | ||
420 | index XXXXXXX..XXXXXXX 100644 | ||
421 | --- a/hw/block/Makefile.objs | ||
422 | +++ b/hw/block/Makefile.objs | ||
423 | @@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o | ||
424 | common-obj-$(CONFIG_XEN) += xen-block.o | ||
425 | common-obj-$(CONFIG_ECC) += ecc.o | ||
426 | common-obj-$(CONFIG_ONENAND) += onenand.o | ||
427 | -common-obj-$(CONFIG_NVME_PCI) += nvme.o | ||
428 | common-obj-$(CONFIG_SWIM) += swim.o | ||
429 | |||
430 | common-obj-$(CONFIG_SH4) += tc58128.o | ||
431 | |||
432 | obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o | ||
433 | obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o | ||
434 | +obj-$(CONFIG_NVME_PCI) += nvme.o | ||
435 | |||
436 | obj-y += dataplane/ | ||
680 | diff --git a/hw/block/trace-events b/hw/block/trace-events | 437 | diff --git a/hw/block/trace-events b/hw/block/trace-events |
681 | index XXXXXXX..XXXXXXX 100644 | 438 | index XXXXXXX..XXXXXXX 100644 |
682 | --- a/hw/block/trace-events | 439 | --- a/hw/block/trace-events |
683 | +++ b/hw/block/trace-events | 440 | +++ b/hw/block/trace-events |
684 | @@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 | 441 | @@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA |
685 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" | 442 | nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" |
686 | hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" | 443 | nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" |
687 | 444 | nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | |
688 | +# hw/block/nvme.c | 445 | +nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" |
689 | +# nvme traces for successful events | 446 | +nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" |
690 | +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" | 447 | +nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" |
691 | +nvme_irq_pin(void) "pulsing IRQ pin" | 448 | +nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" |
692 | +nvme_irq_masked(void) "IRQ is masked" | 449 | nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" |
693 | +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" | 450 | nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" |
694 | +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" | 451 | nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" |
695 | +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" | ||
696 | +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" | ||
697 | +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" | ||
698 | +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" | ||
699 | +nvme_identify_ctrl(void) "identify controller" | ||
700 | +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" | ||
701 | +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" | ||
702 | +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" | ||
703 | +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" | ||
704 | +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" | ||
705 | +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
706 | +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
707 | +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" | ||
708 | +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" | ||
709 | +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" | ||
710 | +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" | ||
711 | +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
712 | +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
713 | +nvme_mmio_start_success(void) "setting controller enable bit succeeded" | ||
714 | +nvme_mmio_stopped(void) "cleared controller enable bit" | ||
715 | +nvme_mmio_shutdown_set(void) "shutdown bit set" | ||
716 | +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" | ||
717 | + | ||
718 | +# nvme traces for error conditions | ||
719 | +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" | ||
720 | +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" | ||
721 | +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" | ||
722 | +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" | ||
723 | +nvme_err_invalid_field(void) "invalid field" | ||
724 | +nvme_err_invalid_prp(void) "invalid PRP" | ||
725 | +nvme_err_invalid_sgl(void) "invalid SGL" | ||
726 | +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" | ||
727 | +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" | ||
728 | +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" | ||
729 | +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" | ||
730 | +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" | ||
731 | +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" | ||
732 | +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" | ||
733 | +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" | ||
734 | +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" | ||
735 | +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" | ||
736 | +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" | ||
737 | +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" | ||
738 | +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" | ||
739 | +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" | ||
740 | +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" | ||
741 | +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" | ||
742 | +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" | ||
743 | +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" | ||
744 | +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" | ||
745 | +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" | ||
746 | +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" | ||
747 | +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" | ||
748 | +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" | ||
749 | +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" | ||
750 | +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" | ||
751 | +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" | ||
752 | +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" | ||
753 | +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" | ||
754 | +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" | ||
755 | +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" | ||
756 | +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" | ||
757 | +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" | ||
758 | +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" | ||
759 | +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" | ||
760 | +nvme_err_startfail(void) "setting controller enable bit failed" | ||
761 | + | ||
762 | +# Traces for undefined behavior | ||
763 | +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" | ||
764 | +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" | ||
765 | +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" | ||
766 | +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" | ||
767 | +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" | ||
768 | +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
769 | +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
770 | +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
771 | +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
772 | +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
773 | +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
774 | +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" | ||
775 | +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" | ||
776 | +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" | ||
777 | +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
778 | +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" | ||
779 | +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
780 | + | ||
781 | # hw/block/xen_disk.c | ||
782 | xen_disk_alloc(char *name) "%s" | ||
783 | xen_disk_init(char *name) "%s" | ||
784 | -- | 452 | -- |
785 | 2.13.6 | 453 | 2.25.3 |
786 | 454 | ||
787 | 455 | diff view generated by jsdifflib |
1 | From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | 1 | The QMP handler qmp_object_add() and the implementation of --object in |
---|---|---|---|
2 | qemu-storage-daemon can share most of the code. Currently, | ||
3 | qemu-storage-daemon calls qmp_object_add(), but this is not correct | ||
4 | because different visitors need to be used. | ||
2 | 5 | ||
3 | Convert nvme_init() to realize and rename it to nvme_realize(). | 6 | As a first step towards a fix, make qmp_object_add() a wrapper around a |
7 | new function user_creatable_add_dict() that can get an additional | ||
8 | parameter. The handling of "props" is only required for compatibility | ||
9 | and not required for the qemu-storage-daemon command line, so it stays | ||
10 | in qmp_object_add(). | ||
4 | 11 | ||
5 | Cc: John Snow <jsnow@redhat.com> | ||
6 | Cc: Keith Busch <keith.busch@intel.com> | ||
7 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
8 | Cc: Max Reitz <mreitz@redhat.com> | ||
9 | Cc: Markus Armbruster <armbru@redhat.com> | ||
10 | |||
11 | Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 13 | --- |
14 | hw/block/nvme.c | 18 ++++++++++-------- | 14 | include/qom/object_interfaces.h | 12 ++++++++++++ |
15 | 1 file changed, 10 insertions(+), 8 deletions(-) | 15 | qom/object_interfaces.c | 27 +++++++++++++++++++++++++++ |
16 | qom/qom-qmp-cmds.c | 24 +----------------------- | ||
17 | 3 files changed, 40 insertions(+), 23 deletions(-) | ||
16 | 18 | ||
17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c | 19 | diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h |
18 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/block/nvme.c | 21 | --- a/include/qom/object_interfaces.h |
20 | +++ b/hw/block/nvme.c | 22 | +++ b/include/qom/object_interfaces.h |
21 | @@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps nvme_cmb_ops = { | 23 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, |
22 | }, | 24 | const QDict *qdict, |
23 | }; | 25 | Visitor *v, Error **errp); |
24 | 26 | ||
25 | -static int nvme_init(PCIDevice *pci_dev) | 27 | +/** |
26 | +static void nvme_realize(PCIDevice *pci_dev, Error **errp) | 28 | + * user_creatable_add_dict: |
29 | + * @qdict: the object definition | ||
30 | + * @errp: if an error occurs, a pointer to an area to store the error | ||
31 | + * | ||
32 | + * Create an instance of the user creatable object that is defined by | ||
33 | + * @qdict. The object type is taken from the QDict key 'qom-type', its | ||
34 | + * ID from the key 'id'. The remaining entries in @qdict are used to | ||
35 | + * initialize the object properties. | ||
36 | + */ | ||
37 | +void user_creatable_add_dict(QDict *qdict, Error **errp); | ||
38 | + | ||
39 | /** | ||
40 | * user_creatable_add_opts: | ||
41 | * @opts: the object definition | ||
42 | diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/qom/object_interfaces.c | ||
45 | +++ b/qom/object_interfaces.c | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | #include "qapi/qmp/qerror.h" | ||
48 | #include "qapi/qmp/qjson.h" | ||
49 | #include "qapi/qmp/qstring.h" | ||
50 | +#include "qapi/qobject-input-visitor.h" | ||
51 | #include "qom/object_interfaces.h" | ||
52 | #include "qemu/help_option.h" | ||
53 | #include "qemu/module.h" | ||
54 | @@ -XXX,XX +XXX,XX @@ out: | ||
55 | return obj; | ||
56 | } | ||
57 | |||
58 | +void user_creatable_add_dict(QDict *qdict, Error **errp) | ||
59 | +{ | ||
60 | + Visitor *v; | ||
61 | + Object *obj; | ||
62 | + g_autofree char *type = NULL; | ||
63 | + g_autofree char *id = NULL; | ||
64 | + | ||
65 | + type = g_strdup(qdict_get_try_str(qdict, "qom-type")); | ||
66 | + if (!type) { | ||
67 | + error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); | ||
68 | + return; | ||
69 | + } | ||
70 | + qdict_del(qdict, "qom-type"); | ||
71 | + | ||
72 | + id = g_strdup(qdict_get_try_str(qdict, "id")); | ||
73 | + if (!id) { | ||
74 | + error_setg(errp, QERR_MISSING_PARAMETER, "id"); | ||
75 | + return; | ||
76 | + } | ||
77 | + qdict_del(qdict, "id"); | ||
78 | + | ||
79 | + v = qobject_input_visitor_new(QOBJECT(qdict)); | ||
80 | + obj = user_creatable_add_type(type, id, qdict, v, errp); | ||
81 | + visit_free(v); | ||
82 | + object_unref(obj); | ||
83 | +} | ||
84 | |||
85 | Object *user_creatable_add_opts(QemuOpts *opts, Error **errp) | ||
27 | { | 86 | { |
28 | NvmeCtrl *n = NVME(pci_dev); | 87 | diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c |
29 | NvmeIdCtrl *id = &n->id_ctrl; | 88 | index XXXXXXX..XXXXXXX 100644 |
30 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev) | 89 | --- a/qom/qom-qmp-cmds.c |
31 | Error *local_err = NULL; | 90 | +++ b/qom/qom-qmp-cmds.c |
32 | 91 | @@ -XXX,XX +XXX,XX @@ | |
33 | if (!n->conf.blk) { | 92 | #include "qapi/qapi-commands-qom.h" |
34 | - return -1; | 93 | #include "qapi/qmp/qdict.h" |
35 | + error_setg(errp, "drive property not set"); | 94 | #include "qapi/qmp/qerror.h" |
36 | + return; | 95 | -#include "qapi/qobject-input-visitor.h" |
96 | #include "qemu/cutils.h" | ||
97 | #include "qom/object_interfaces.h" | ||
98 | #include "qom/qom-qobject.h" | ||
99 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
100 | { | ||
101 | QObject *props; | ||
102 | QDict *pdict; | ||
103 | - Visitor *v; | ||
104 | - Object *obj; | ||
105 | - g_autofree char *type = NULL; | ||
106 | - g_autofree char *id = NULL; | ||
107 | - | ||
108 | - type = g_strdup(qdict_get_try_str(qdict, "qom-type")); | ||
109 | - if (!type) { | ||
110 | - error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); | ||
111 | - return; | ||
112 | - } | ||
113 | - qdict_del(qdict, "qom-type"); | ||
114 | - | ||
115 | - id = g_strdup(qdict_get_try_str(qdict, "id")); | ||
116 | - if (!id) { | ||
117 | - error_setg(errp, QERR_MISSING_PARAMETER, "id"); | ||
118 | - return; | ||
119 | - } | ||
120 | - qdict_del(qdict, "id"); | ||
121 | |||
122 | props = qdict_get(qdict, "props"); | ||
123 | if (props) { | ||
124 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
125 | qobject_unref(pdict); | ||
37 | } | 126 | } |
38 | 127 | ||
39 | bs_size = blk_getlength(n->conf.blk); | 128 | - v = qobject_input_visitor_new(QOBJECT(qdict)); |
40 | if (bs_size < 0) { | 129 | - obj = user_creatable_add_type(type, id, qdict, v, errp); |
41 | - return -1; | 130 | - visit_free(v); |
42 | + error_setg(errp, "could not get backing file size"); | 131 | - object_unref(obj); |
43 | + return; | 132 | + user_creatable_add_dict(qdict, errp); |
44 | } | ||
45 | |||
46 | blkconf_serial(&n->conf, &n->serial); | ||
47 | if (!n->serial) { | ||
48 | - return -1; | ||
49 | + error_setg(errp, "serial property not set"); | ||
50 | + return; | ||
51 | } | ||
52 | blkconf_blocksizes(&n->conf); | ||
53 | blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), | ||
54 | false, &local_err); | ||
55 | if (local_err) { | ||
56 | - error_report_err(local_err); | ||
57 | - return -1; | ||
58 | + error_propagate(errp, local_err); | ||
59 | + return; | ||
60 | } | ||
61 | |||
62 | pci_conf = pci_dev->config; | ||
63 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev) | ||
64 | cpu_to_le64(n->ns_size >> | ||
65 | id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); | ||
66 | } | ||
67 | - return 0; | ||
68 | } | 133 | } |
69 | 134 | ||
70 | static void nvme_exit(PCIDevice *pci_dev) | 135 | void qmp_object_del(const char *id, Error **errp) |
71 | @@ -XXX,XX +XXX,XX @@ static void nvme_class_init(ObjectClass *oc, void *data) | ||
72 | DeviceClass *dc = DEVICE_CLASS(oc); | ||
73 | PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); | ||
74 | |||
75 | - pc->init = nvme_init; | ||
76 | + pc->realize = nvme_realize; | ||
77 | pc->exit = nvme_exit; | ||
78 | pc->class_id = PCI_CLASS_STORAGE_EXPRESS; | ||
79 | pc->vendor_id = PCI_VENDOR_ID_INTEL; | ||
80 | -- | 136 | -- |
81 | 2.13.6 | 137 | 2.25.3 |
82 | 138 | ||
83 | 139 | diff view generated by jsdifflib |
1 | This change separates bdrv_drain_invoke(), which calls the BlockDriver | 1 | After processing the option string with the keyval parser, we get a |
---|---|---|---|
2 | drain callbacks, from bdrv_drain_recurse(). Instead, the function | 2 | QDict that contains only strings. This QDict must be fed to a keyval |
3 | performs its own recursion now. | 3 | visitor which converts the strings into the right data types. |
4 | 4 | ||
5 | One reason for this is that bdrv_drain_recurse() can be called multiple | 5 | qmp_object_add(), however, uses the normal QObject input visitor, which |
6 | times by bdrv_drain_all_begin(), but the callbacks may only be called | 6 | expects a QDict where all properties already have the QType that matches |
7 | once. The separation is necessary to fix this bug. | 7 | the data type required by the QOM object type. |
8 | 8 | ||
9 | The other reason is that we intend to go to a model where we call all | 9 | Change the --object implementation in qemu-storage-daemon so that it |
10 | driver callbacks first, and only then start polling. This is not fully | 10 | doesn't call qmp_object_add(), but calls user_creatable_add_dict() |
11 | achieved yet with this patch, as bdrv_drain_invoke() contains a | 11 | directly instead and pass it a new keyval boolean that decides which |
12 | BDRV_POLL_WHILE() loop for the block driver callbacks, which can still | 12 | visitor must be used. |
13 | call callbacks for any unrelated event. It's a step in this direction | ||
14 | anyway. | ||
15 | 13 | ||
16 | Cc: qemu-stable@nongnu.org | 14 | Reported-by: Coiby Xu <coiby.xu@gmail.com> |
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | 16 | --- |
20 | block/io.c | 14 +++++++++++--- | 17 | include/qom/object_interfaces.h | 6 +++++- |
21 | 1 file changed, 11 insertions(+), 3 deletions(-) | 18 | qemu-storage-daemon.c | 4 +--- |
19 | qom/object_interfaces.c | 8 ++++++-- | ||
20 | qom/qom-qmp-cmds.c | 2 +- | ||
21 | 4 files changed, 13 insertions(+), 7 deletions(-) | ||
22 | 22 | ||
23 | diff --git a/block/io.c b/block/io.c | 23 | diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h |
24 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/block/io.c | 25 | --- a/include/qom/object_interfaces.h |
26 | +++ b/block/io.c | 26 | +++ b/include/qom/object_interfaces.h |
27 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | 27 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, |
28 | bdrv_wakeup(bs); | 28 | /** |
29 | * user_creatable_add_dict: | ||
30 | * @qdict: the object definition | ||
31 | + * @keyval: if true, use a keyval visitor for processing @qdict (i.e. | ||
32 | + * assume that all @qdict values are strings); otherwise, use | ||
33 | + * the normal QObject visitor (i.e. assume all @qdict values | ||
34 | + * have the QType expected by the QOM object type) | ||
35 | * @errp: if an error occurs, a pointer to an area to store the error | ||
36 | * | ||
37 | * Create an instance of the user creatable object that is defined by | ||
38 | @@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id, | ||
39 | * ID from the key 'id'. The remaining entries in @qdict are used to | ||
40 | * initialize the object properties. | ||
41 | */ | ||
42 | -void user_creatable_add_dict(QDict *qdict, Error **errp); | ||
43 | +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp); | ||
44 | |||
45 | /** | ||
46 | * user_creatable_add_opts: | ||
47 | diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/qemu-storage-daemon.c | ||
50 | +++ b/qemu-storage-daemon.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[]) | ||
52 | QemuOpts *opts; | ||
53 | const char *type; | ||
54 | QDict *args; | ||
55 | - QObject *ret_data = NULL; | ||
56 | |||
57 | /* FIXME The keyval parser rejects 'help' arguments, so we must | ||
58 | * unconditionall try QemuOpts first. */ | ||
59 | @@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[]) | ||
60 | qemu_opts_del(opts); | ||
61 | |||
62 | args = keyval_parse(optarg, "qom-type", &error_fatal); | ||
63 | - qmp_object_add(args, &ret_data, &error_fatal); | ||
64 | + user_creatable_add_dict(args, true, &error_fatal); | ||
65 | qobject_unref(args); | ||
66 | - qobject_unref(ret_data); | ||
67 | break; | ||
68 | } | ||
69 | default: | ||
70 | diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/qom/object_interfaces.c | ||
73 | +++ b/qom/object_interfaces.c | ||
74 | @@ -XXX,XX +XXX,XX @@ out: | ||
75 | return obj; | ||
29 | } | 76 | } |
30 | 77 | ||
31 | +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ | 78 | -void user_creatable_add_dict(QDict *qdict, Error **errp) |
32 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 79 | +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp) |
33 | { | 80 | { |
34 | + BdrvChild *child, *tmp; | 81 | Visitor *v; |
35 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; | 82 | Object *obj; |
36 | 83 | @@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp) | |
37 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || | 84 | } |
38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 85 | qdict_del(qdict, "id"); |
39 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); | 86 | |
40 | bdrv_coroutine_enter(bs, data.co); | 87 | - v = qobject_input_visitor_new(QOBJECT(qdict)); |
41 | BDRV_POLL_WHILE(bs, !data.done); | 88 | + if (keyval) { |
42 | + | 89 | + v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); |
43 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | 90 | + } else { |
44 | + bdrv_drain_invoke(child->bs, begin); | 91 | + v = qobject_input_visitor_new(QOBJECT(qdict)); |
45 | + } | 92 | + } |
93 | obj = user_creatable_add_type(type, id, qdict, v, errp); | ||
94 | visit_free(v); | ||
95 | object_unref(obj); | ||
96 | diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c | ||
97 | index XXXXXXX..XXXXXXX 100644 | ||
98 | --- a/qom/qom-qmp-cmds.c | ||
99 | +++ b/qom/qom-qmp-cmds.c | ||
100 | @@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) | ||
101 | qobject_unref(pdict); | ||
102 | } | ||
103 | |||
104 | - user_creatable_add_dict(qdict, errp); | ||
105 | + user_creatable_add_dict(qdict, false, errp); | ||
46 | } | 106 | } |
47 | 107 | ||
48 | static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | 108 | void qmp_object_del(const char *id, Error **errp) |
49 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | ||
50 | BdrvChild *child, *tmp; | ||
51 | bool waited; | ||
52 | |||
53 | - /* Ensure any pending metadata writes are submitted to bs->file. */ | ||
54 | - bdrv_drain_invoke(bs, begin); | ||
55 | - | ||
56 | /* Wait for drained requests to finish */ | ||
57 | waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); | ||
58 | |||
59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | ||
60 | bdrv_parent_drained_begin(bs); | ||
61 | } | ||
62 | |||
63 | + bdrv_drain_invoke(bs, true); | ||
64 | bdrv_drain_recurse(bs, true); | ||
65 | } | ||
66 | |||
67 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
68 | } | ||
69 | |||
70 | bdrv_parent_drained_end(bs); | ||
71 | + bdrv_drain_invoke(bs, false); | ||
72 | bdrv_drain_recurse(bs, false); | ||
73 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
74 | } | ||
75 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
76 | aio_context_acquire(aio_context); | ||
77 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
78 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
79 | + /* FIXME Calling this multiple times is wrong */ | ||
80 | + bdrv_drain_invoke(bs, true); | ||
81 | waited |= bdrv_drain_recurse(bs, true); | ||
82 | } | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
85 | aio_context_acquire(aio_context); | ||
86 | aio_enable_external(aio_context); | ||
87 | bdrv_parent_drained_end(bs); | ||
88 | + bdrv_drain_invoke(bs, false); | ||
89 | bdrv_drain_recurse(bs, false); | ||
90 | aio_context_release(aio_context); | ||
91 | } | ||
92 | -- | 109 | -- |
93 | 2.13.6 | 110 | 2.25.3 |
94 | 111 | ||
95 | 112 | diff view generated by jsdifflib |