1
The following changes since commit af352675efb7e92a1f5f6461a042a12015ab3d12:
1
The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e:
2
2
3
Merge remote-tracking branch 'remotes/aurel/tags/pull-target-sh4-20171218' into staging (2017-12-19 19:11:11 +0000)
3
Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to b4d526c87b26aff6d8b353951aa175a1236ad887:
9
for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809:
10
10
11
nvme: Add tracing (2017-12-20 11:05:49 +0100)
11
qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches:
15
16
- Fix resize (extending) of short overlays
17
- nvme: introduce PMR support from NVMe 1.4 spec
18
- qemu-storage-daemon: Fix non-string --object properties
15
19
16
----------------------------------------------------------------
20
----------------------------------------------------------------
17
Doug Gale (1):
21
Alberto Garcia (1):
18
nvme: Add tracing
22
qcow2: Add incompatibility note between backing files and raw external data files
19
23
20
Edgar Kaziakhmedov (1):
24
Andrzej Jakowski (1):
21
qcow2: get rid of qcow2_backing_read1 routine
25
nvme: introduce PMR support from NVMe 1.4 spec
22
26
23
Fam Zheng (1):
27
Kevin Wolf (12):
24
qemu-img: Document --force-share / -U
28
block: Add flags to BlockDriver.bdrv_co_truncate()
29
block: Add flags to bdrv(_co)_truncate()
30
block-backend: Add flags to blk_truncate()
31
qcow2: Support BDRV_REQ_ZERO_WRITE for truncate
32
raw-format: Support BDRV_REQ_ZERO_WRITE for truncate
33
file-posix: Support BDRV_REQ_ZERO_WRITE for truncate
34
block: truncate: Don't make backing file data visible
35
iotests: Filter testfiles out in filter_img_info()
36
iotests: Test committing to short backing file
37
qcow2: Forward ZERO_WRITE flag for full preallocation
38
qom: Factor out user_creatable_add_dict()
39
qemu-storage-daemon: Fix non-string --object properties
25
40
26
John Snow (1):
41
Paolo Bonzini (1):
27
iotests: fix 197 for vpc
42
qemu-iotests: allow qcow2 external discarded clusters to contain stale data
28
43
29
Kevin Wolf (9):
44
docs/interop/qcow2.txt | 3 +
30
block: Formats don't need CONSISTENT_READ with NO_IO
45
hw/block/nvme.h | 2 +
31
block: Make bdrv_drain_invoke() recursive
46
include/block/block.h | 5 +-
32
block: Call .drain_begin only once in bdrv_drain_all_begin()
47
include/block/block_int.h | 10 +-
33
test-bdrv-drain: Test BlockDriver callbacks for drain
48
include/block/nvme.h | 172 ++++++++++++++++++++++++++
34
block: bdrv_drain_recurse(): Remove unused begin parameter
49
include/qom/object_interfaces.h | 16 +++
35
block: Don't wait for requests in bdrv_drain*_end()
50
include/sysemu/block-backend.h | 2 +-
36
block: Unify order in drain functions
51
block.c | 3 +-
37
block: Don't acquire AioContext in hmp_qemu_io()
52
block/block-backend.c | 4 +-
38
block: Document that x-blockdev-change breaks quorum children list
53
block/commit.c | 4 +-
54
block/crypto.c | 7 +-
55
block/file-posix.c | 6 +-
56
block/file-win32.c | 2 +-
57
block/gluster.c | 1 +
58
block/io.c | 43 ++++++-
59
block/iscsi.c | 2 +-
60
block/mirror.c | 2 +-
61
block/nfs.c | 3 +-
62
block/parallels.c | 6 +-
63
block/qcow.c | 4 +-
64
block/qcow2-cluster.c | 2 +-
65
block/qcow2-refcount.c | 2 +-
66
block/qcow2.c | 73 +++++++++--
67
block/qed.c | 3 +-
68
block/raw-format.c | 6 +-
69
block/rbd.c | 1 +
70
block/sheepdog.c | 4 +-
71
block/ssh.c | 2 +-
72
block/vdi.c | 2 +-
73
block/vhdx-log.c | 2 +-
74
block/vhdx.c | 6 +-
75
block/vmdk.c | 8 +-
76
block/vpc.c | 2 +-
77
blockdev.c | 2 +-
78
hw/block/nvme.c | 109 ++++++++++++++++
79
qemu-img.c | 2 +-
80
qemu-io-cmds.c | 2 +-
81
qemu-storage-daemon.c | 4 +-
82
qom/object_interfaces.c | 31 +++++
83
qom/qom-qmp-cmds.c | 24 +---
84
tests/test-block-iothread.c | 9 +-
85
tests/qemu-iotests/iotests.py | 5 +-
86
hw/block/Makefile.objs | 2 +-
87
hw/block/trace-events | 4 +
88
tests/qemu-iotests/244 | 10 +-
89
tests/qemu-iotests/244.out | 9 +-
90
tests/qemu-iotests/274 | 155 +++++++++++++++++++++++
91
tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++
92
tests/qemu-iotests/group | 1 +
93
49 files changed, 951 insertions(+), 96 deletions(-)
94
create mode 100755 tests/qemu-iotests/274
95
create mode 100644 tests/qemu-iotests/274.out
39
96
40
Mao Zhongyi (1):
41
hw/block/nvme: Convert to realize
42
97
43
qapi/block-core.json | 4 +
44
block/qcow2.h | 3 -
45
block.c | 6 +-
46
block/io.c | 31 ++--
47
block/qcow2.c | 51 +-----
48
hmp.c | 6 -
49
hw/block/nvme.c | 367 ++++++++++++++++++++++++++++++++-------
50
tests/test-bdrv-drain.c | 137 +++++++++++++++
51
hw/block/trace-events | 93 ++++++++++
52
qemu-img.texi | 9 +
53
tests/Makefile.include | 2 +
54
tests/qemu-iotests/197 | 4 +
55
tests/qemu-iotests/common.filter | 3 +-
56
13 files changed, 591 insertions(+), 125 deletions(-)
57
create mode 100644 tests/test-bdrv-drain.c
58
diff view generated by jsdifflib
New patch
1
From: Alberto Garcia <berto@igalia.com>
1
2
3
Backing files and raw external data files are mutually exclusive.
4
The documentation of the raw external data bit (in autoclear_features)
5
already indicates that, but we should also mention it on the other
6
side.
7
8
Suggested-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Alberto Garcia <berto@igalia.com>
10
Message-Id: <20200410121816.8334-1-berto@igalia.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
docs/interop/qcow2.txt | 3 +++
15
1 file changed, 3 insertions(+)
16
17
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
18
index XXXXXXX..XXXXXXX 100644
19
--- a/docs/interop/qcow2.txt
20
+++ b/docs/interop/qcow2.txt
21
@@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header:
22
is stored (NB: The string is not null terminated). 0 if the
23
image doesn't have a backing file.
24
25
+ Note: backing files are incompatible with raw external data
26
+ files (auto-clear feature bit 1).
27
+
28
16 - 19: backing_file_size
29
Length of the backing file name in bytes. Must not be
30
longer than 1023 bytes. Undefined if the image doesn't have
31
--
32
2.25.3
33
34
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Signed-off-by: Fam Zheng <famz@redhat.com>
3
Test 244 checks the expected behavior of qcow2 external data files
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
with respect to zero and discarded clusters. Filesystems however
5
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
5
are free to ignore discard requests, and this seems to be the
6
case for overlayfs. Relax the tests to skip checks on the
7
external data file for discarded areas, which implies not using
8
qemu-img compare in the data_file_raw=on case.
9
10
This fixes docker tests on RHEL8.
11
12
Cc: Kevin Wolf <kwolf@redhat.com>
13
Cc: qemu-block@nongnu.org
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Message-Id: <20200409191006.24429-1-pbonzini@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
---
17
---
8
qemu-img.texi | 9 +++++++++
18
tests/qemu-iotests/244 | 10 ++++++++--
9
1 file changed, 9 insertions(+)
19
tests/qemu-iotests/244.out | 9 ++++++---
20
2 files changed, 14 insertions(+), 5 deletions(-)
10
21
11
diff --git a/qemu-img.texi b/qemu-img.texi
22
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
23
index XXXXXXX..XXXXXXX 100755
24
--- a/tests/qemu-iotests/244
25
+++ b/tests/qemu-iotests/244
26
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
27
echo
28
$QEMU_IO -c 'read -P 0 0 1M' \
29
-c 'read -P 0x11 1M 1M' \
30
- -c 'read -P 0 2M 2M' \
31
-c 'read -P 0x11 4M 1M' \
32
-c 'read -P 0 5M 1M' \
33
-f raw "$TEST_IMG.data" |
34
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
35
-f $IMGFMT "$TEST_IMG" |
36
_filter_qemu_io
37
38
+# Discarded clusters are only marked as such in the qcow2 metadata, but
39
+# they can contain stale data in the external data file. Instead, zero
40
+# clusters must be zeroed in the external data file too.
41
echo
42
-$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data"
43
+$QEMU_IO -c 'read -P 0 0 1M' \
44
+ -c 'read -P 0x11 1M 1M' \
45
+ -c 'read -P 0 3M 3M' \
46
+ -f raw "$TEST_IMG".data |
47
+ _filter_qemu_io
48
49
echo -n "qcow2 file size after I/O: "
50
du -b $TEST_IMG | cut -f1
51
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
12
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
13
--- a/qemu-img.texi
53
--- a/tests/qemu-iotests/244.out
14
+++ b/qemu-img.texi
54
+++ b/tests/qemu-iotests/244.out
15
@@ -XXX,XX +XXX,XX @@ exclusive with the @var{-O} parameters. It is currently required to also use
55
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
16
the @var{-n} parameter to skip image creation. This restriction may be relaxed
56
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
17
in a future release.
57
read 1048576/1048576 bytes at offset 1048576
18
58
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
19
+@item --force-share (-U)
59
-read 2097152/2097152 bytes at offset 2097152
20
+
60
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
21
+If specified, @code{qemu-img} will open the image with shared permissions,
61
read 1048576/1048576 bytes at offset 4194304
22
+which makes it less likely to conflict with a running guest's permissions due
62
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
23
+to image locking. For example, this can be used to get the image information
63
read 1048576/1048576 bytes at offset 5242880
24
+(with 'info' subcommand) when the image is used by a running guest. Note that
64
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576
25
+this could produce inconsistent results because of concurrent metadata changes,
65
read 4194304/4194304 bytes at offset 2097152
26
+etc. This option is only allowed when opening images in read-only mode.
66
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
27
+
67
28
@item fmt
68
-Images are identical.
29
is the disk image format. It is guessed automatically in most cases. See below
69
+read 1048576/1048576 bytes at offset 0
30
for a description of the supported disk formats.
70
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
71
+read 1048576/1048576 bytes at offset 1048576
72
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
73
+read 3145728/3145728 bytes at offset 3145728
74
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
75
qcow2 file size after I/O: 327680
76
77
=== bdrv_co_block_status test for file and offset=0 ===
31
--
78
--
32
2.13.6
79
2.25.3
33
80
34
81
diff view generated by jsdifflib
1
Drain requests are propagated to child nodes, parent nodes and directly
1
This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate()
2
to the AioContext. The order in which this happened was different
2
driver callbacks, and a supported_truncate_flags field in
3
between all combinations of drain/drain_all and begin/end.
3
BlockDriverState that allows drivers to advertise support for request
4
4
flags in the context of truncate.
5
The correct order is to keep children only drained when their parents
5
6
are also drained. This means that at the start of a drained section, the
6
For now, we always pass 0 and no drivers declare support for any flag.
7
AioContext needs to be drained first, the parents second and only then
8
the children. The correct order for the end of a drained section is the
9
opposite.
10
11
This patch changes the three other functions to follow the example of
12
bdrv_drained_begin(), which is the only one that got it right.
13
7
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
12
Message-Id: <20200424125448.63318-2-kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
14
---
17
block/io.c | 12 ++++++++----
15
include/block/block_int.h | 10 +++++++++-
18
1 file changed, 8 insertions(+), 4 deletions(-)
16
block/crypto.c | 3 ++-
19
17
block/file-posix.c | 2 +-
18
block/file-win32.c | 2 +-
19
block/gluster.c | 1 +
20
block/io.c | 8 +++++++-
21
block/iscsi.c | 2 +-
22
block/nfs.c | 3 ++-
23
block/qcow2.c | 2 +-
24
block/qed.c | 1 +
25
block/raw-format.c | 2 +-
26
block/rbd.c | 1 +
27
block/sheepdog.c | 4 ++--
28
block/ssh.c | 2 +-
29
tests/test-block-iothread.c | 3 ++-
30
15 files changed, 33 insertions(+), 13 deletions(-)
31
32
diff --git a/include/block/block_int.h b/include/block/block_int.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block_int.h
35
+++ b/include/block/block_int.h
36
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
37
*/
38
int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
39
bool exact, PreallocMode prealloc,
40
- Error **errp);
41
+ BdrvRequestFlags flags, Error **errp);
42
43
int64_t (*bdrv_getlength)(BlockDriverState *bs);
44
bool has_variable_length;
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
46
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
47
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
48
unsigned int supported_zero_flags;
49
+ /*
50
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
51
+ *
52
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
53
+ * that any added space reads as all zeros. If this can't be guaranteed,
54
+ * the operation must fail.
55
+ */
56
+ unsigned int supported_truncate_flags;
57
58
/* the following member gives a name to every node on the bs graph. */
59
char node_name[32];
60
diff --git a/block/crypto.c b/block/crypto.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/block/crypto.c
63
+++ b/block/crypto.c
64
@@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
65
66
static int coroutine_fn
67
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
68
- PreallocMode prealloc, Error **errp)
69
+ PreallocMode prealloc, BdrvRequestFlags flags,
70
+ Error **errp)
71
{
72
BlockCrypto *crypto = bs->opaque;
73
uint64_t payload_offset =
74
diff --git a/block/file-posix.c b/block/file-posix.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/block/file-posix.c
77
+++ b/block/file-posix.c
78
@@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
79
80
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
81
bool exact, PreallocMode prealloc,
82
- Error **errp)
83
+ BdrvRequestFlags flags, Error **errp)
84
{
85
BDRVRawState *s = bs->opaque;
86
struct stat st;
87
diff --git a/block/file-win32.c b/block/file-win32.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/block/file-win32.c
90
+++ b/block/file-win32.c
91
@@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs)
92
93
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
94
bool exact, PreallocMode prealloc,
95
- Error **errp)
96
+ BdrvRequestFlags flags, Error **errp)
97
{
98
BDRVRawState *s = bs->opaque;
99
LONG low, high;
100
diff --git a/block/gluster.c b/block/gluster.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/block/gluster.c
103
+++ b/block/gluster.c
104
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs,
105
int64_t offset,
106
bool exact,
107
PreallocMode prealloc,
108
+ BdrvRequestFlags flags,
109
Error **errp)
110
{
111
BDRVGlusterState *s = bs->opaque;
20
diff --git a/block/io.c b/block/io.c
112
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
113
index XXXXXXX..XXXXXXX 100644
22
--- a/block/io.c
114
--- a/block/io.c
23
+++ b/block/io.c
115
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
116
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
25
return;
117
BlockDriverState *bs = child->bs;
118
BlockDriver *drv = bs->drv;
119
BdrvTrackedRequest req;
120
+ BdrvRequestFlags flags = 0;
121
int64_t old_size, new_bytes;
122
int ret;
123
124
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
26
}
125
}
27
126
28
+ /* Stop things in parent-to-child order */
127
if (drv->bdrv_co_truncate) {
29
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
128
- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
30
aio_disable_external(bdrv_get_aio_context(bs));
129
+ if (flags & ~bs->supported_truncate_flags) {
31
bdrv_parent_drained_begin(bs);
130
+ error_setg(errp, "Block driver does not support requested flags");
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
131
+ ret = -ENOTSUP;
33
return;
132
+ goto out;
34
}
133
+ }
35
134
+ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
36
- bdrv_parent_drained_end(bs);
135
} else if (bs->file && drv->is_filter) {
37
+ /* Re-enable things in child-to-parent order */
136
ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
38
bdrv_drain_invoke(bs, false);
137
} else {
39
+ bdrv_parent_drained_end(bs);
138
diff --git a/block/iscsi.c b/block/iscsi.c
40
aio_enable_external(bdrv_get_aio_context(bs));
139
index XXXXXXX..XXXXXXX 100644
140
--- a/block/iscsi.c
141
+++ b/block/iscsi.c
142
@@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
143
144
static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
145
bool exact, PreallocMode prealloc,
146
- Error **errp)
147
+ BdrvRequestFlags flags, Error **errp)
148
{
149
IscsiLun *iscsilun = bs->opaque;
150
int64_t cur_length;
151
diff --git a/block/nfs.c b/block/nfs.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/block/nfs.c
154
+++ b/block/nfs.c
155
@@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
156
157
static int coroutine_fn
158
nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
159
- PreallocMode prealloc, Error **errp)
160
+ PreallocMode prealloc, BdrvRequestFlags flags,
161
+ Error **errp)
162
{
163
NFSClient *client = bs->opaque;
164
int ret;
165
diff --git a/block/qcow2.c b/block/qcow2.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/block/qcow2.c
168
+++ b/block/qcow2.c
169
@@ -XXX,XX +XXX,XX @@ fail:
170
171
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
172
bool exact, PreallocMode prealloc,
173
- Error **errp)
174
+ BdrvRequestFlags flags, Error **errp)
175
{
176
BDRVQcow2State *s = bs->opaque;
177
uint64_t old_length;
178
diff --git a/block/qed.c b/block/qed.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/block/qed.c
181
+++ b/block/qed.c
182
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
183
int64_t offset,
184
bool exact,
185
PreallocMode prealloc,
186
+ BdrvRequestFlags flags,
187
Error **errp)
188
{
189
BDRVQEDState *s = bs->opaque;
190
diff --git a/block/raw-format.c b/block/raw-format.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/block/raw-format.c
193
+++ b/block/raw-format.c
194
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
195
196
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
197
bool exact, PreallocMode prealloc,
198
- Error **errp)
199
+ BdrvRequestFlags flags, Error **errp)
200
{
201
BDRVRawState *s = bs->opaque;
202
203
diff --git a/block/rbd.c b/block/rbd.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/block/rbd.c
206
+++ b/block/rbd.c
207
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
208
int64_t offset,
209
bool exact,
210
PreallocMode prealloc,
211
+ BdrvRequestFlags flags,
212
Error **errp)
213
{
214
int r;
215
diff --git a/block/sheepdog.c b/block/sheepdog.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/block/sheepdog.c
218
+++ b/block/sheepdog.c
219
@@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs)
220
221
static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
222
bool exact, PreallocMode prealloc,
223
- Error **errp)
224
+ BdrvRequestFlags flags, Error **errp)
225
{
226
BDRVSheepdogState *s = bs->opaque;
227
int ret, fd;
228
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
229
230
assert(!flags);
231
if (offset > s->inode.vdi_size) {
232
- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL);
233
+ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
234
if (ret < 0) {
235
return ret;
236
}
237
diff --git a/block/ssh.c b/block/ssh.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/block/ssh.c
240
+++ b/block/ssh.c
241
@@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs)
242
243
static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
244
bool exact, PreallocMode prealloc,
245
- Error **errp)
246
+ BdrvRequestFlags flags, Error **errp)
247
{
248
BDRVSSHState *s = bs->opaque;
249
250
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
251
index XXXXXXX..XXXXXXX 100644
252
--- a/tests/test-block-iothread.c
253
+++ b/tests/test-block-iothread.c
254
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
255
256
static int coroutine_fn
257
bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
258
- PreallocMode prealloc, Error **errp)
259
+ PreallocMode prealloc, BdrvRequestFlags flags,
260
+ Error **errp)
261
{
262
return 0;
41
}
263
}
42
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
44
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
45
AioContext *aio_context = bdrv_get_aio_context(bs);
46
47
+ /* Stop things in parent-to-child order */
48
aio_context_acquire(aio_context);
49
- bdrv_parent_drained_begin(bs);
50
aio_disable_external(aio_context);
51
+ bdrv_parent_drained_begin(bs);
52
bdrv_drain_invoke(bs, true);
53
aio_context_release(aio_context);
54
55
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
56
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
57
AioContext *aio_context = bdrv_get_aio_context(bs);
58
59
+ /* Re-enable things in child-to-parent order */
60
aio_context_acquire(aio_context);
61
- aio_enable_external(aio_context);
62
- bdrv_parent_drained_end(bs);
63
bdrv_drain_invoke(bs, false);
64
+ bdrv_parent_drained_end(bs);
65
+ aio_enable_external(aio_context);
66
aio_context_release(aio_context);
67
}
68
69
--
264
--
70
2.13.6
265
2.25.3
71
266
72
267
diff view generated by jsdifflib
1
bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver
1
Now that block drivers can support flags for .bdrv_co_truncate, expose
2
callback inside its polling loop. This means that how many times it got
2
the parameter in the node level interfaces bdrv_co_truncate() and
3
called for each node depended on long it had to poll the event loop.
3
bdrv_truncate().
4
4
5
This is obviously not right and results in nodes that stay drained even
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per
6
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
node.
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Message-Id: <20200424125448.63318-3-kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
include/block/block.h | 5 +++--
13
block/block-backend.c | 2 +-
14
block/crypto.c | 2 +-
15
block/io.c | 12 +++++++-----
16
block/parallels.c | 6 +++---
17
block/qcow.c | 4 ++--
18
block/qcow2-refcount.c | 2 +-
19
block/qcow2.c | 15 +++++++++------
20
block/raw-format.c | 2 +-
21
block/vhdx-log.c | 2 +-
22
block/vhdx.c | 2 +-
23
block/vmdk.c | 2 +-
24
tests/test-block-iothread.c | 6 +++---
25
13 files changed, 34 insertions(+), 28 deletions(-)
8
26
9
Fix bdrv_drain_all_begin() to call the callback only once, too.
27
diff --git a/include/block/block.h b/include/block/block.h
10
28
index XXXXXXX..XXXXXXX 100644
11
Cc: qemu-stable@nongnu.org
29
--- a/include/block/block.h
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
30
+++ b/include/block/block.h
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
31
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
14
---
32
void bdrv_refresh_filename(BlockDriverState *bs);
15
block/io.c | 3 +--
33
16
1 file changed, 1 insertion(+), 2 deletions(-)
34
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
17
35
- PreallocMode prealloc, Error **errp);
36
+ PreallocMode prealloc, BdrvRequestFlags flags,
37
+ Error **errp);
38
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
39
- PreallocMode prealloc, Error **errp);
40
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
41
42
int64_t bdrv_nb_sectors(BlockDriverState *bs);
43
int64_t bdrv_getlength(BlockDriverState *bs);
44
diff --git a/block/block-backend.c b/block/block-backend.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/block/block-backend.c
47
+++ b/block/block-backend.c
48
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
49
return -ENOMEDIUM;
50
}
51
52
- return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
53
+ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
54
}
55
56
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
57
diff --git a/block/crypto.c b/block/crypto.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/block/crypto.c
60
+++ b/block/crypto.c
61
@@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
62
63
offset += payload_offset;
64
65
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
66
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
67
}
68
69
static void block_crypto_close(BlockDriverState *bs)
18
diff --git a/block/io.c b/block/io.c
70
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
72
--- a/block/io.c
21
+++ b/block/io.c
73
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
74
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
23
aio_context_acquire(aio_context);
75
* 'offset' bytes in length.
24
bdrv_parent_drained_begin(bs);
76
*/
25
aio_disable_external(aio_context);
77
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
26
+ bdrv_drain_invoke(bs, true);
78
- PreallocMode prealloc, Error **errp)
27
aio_context_release(aio_context);
79
+ PreallocMode prealloc, BdrvRequestFlags flags,
28
80
+ Error **errp)
29
if (!g_slist_find(aio_ctxs, aio_context)) {
81
{
30
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
82
BlockDriverState *bs = child->bs;
31
aio_context_acquire(aio_context);
83
BlockDriver *drv = bs->drv;
32
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
84
BdrvTrackedRequest req;
33
if (aio_context == bdrv_get_aio_context(bs)) {
85
- BdrvRequestFlags flags = 0;
34
- /* FIXME Calling this multiple times is wrong */
86
int64_t old_size, new_bytes;
35
- bdrv_drain_invoke(bs, true);
87
int ret;
36
waited |= bdrv_drain_recurse(bs, true);
88
37
}
89
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
90
}
91
ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
92
} else if (bs->file && drv->is_filter) {
93
- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
94
+ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
95
} else {
96
error_setg(errp, "Image format driver does not support resize");
97
ret = -ENOTSUP;
98
@@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo {
99
int64_t offset;
100
bool exact;
101
PreallocMode prealloc;
102
+ BdrvRequestFlags flags;
103
Error **errp;
104
int ret;
105
} TruncateCo;
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
107
{
108
TruncateCo *tco = opaque;
109
tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
110
- tco->prealloc, tco->errp);
111
+ tco->prealloc, tco->flags, tco->errp);
112
aio_wait_kick();
113
}
114
115
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
116
- PreallocMode prealloc, Error **errp)
117
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
118
{
119
Coroutine *co;
120
TruncateCo tco = {
121
@@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
122
.offset = offset,
123
.exact = exact,
124
.prealloc = prealloc,
125
+ .flags = flags,
126
.errp = errp,
127
.ret = NOT_DONE,
128
};
129
diff --git a/block/parallels.c b/block/parallels.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/parallels.c
132
+++ b/block/parallels.c
133
@@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
134
} else {
135
ret = bdrv_truncate(bs->file,
136
(s->data_end + space) << BDRV_SECTOR_BITS,
137
- false, PREALLOC_MODE_OFF, NULL);
138
+ false, PREALLOC_MODE_OFF, 0, NULL);
139
}
140
if (ret < 0) {
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
143
* That means we have to pass exact=true.
144
*/
145
ret = bdrv_truncate(bs->file, res->image_end_offset, true,
146
- PREALLOC_MODE_OFF, &local_err);
147
+ PREALLOC_MODE_OFF, 0, &local_err);
148
if (ret < 0) {
149
error_report_err(local_err);
150
res->check_errors++;
151
@@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs)
152
153
/* errors are ignored, so we might as well pass exact=true */
154
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
155
- PREALLOC_MODE_OFF, NULL);
156
+ PREALLOC_MODE_OFF, 0, NULL);
157
}
158
159
g_free(s->bat_dirty_bmap);
160
diff --git a/block/qcow.c b/block/qcow.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/block/qcow.c
163
+++ b/block/qcow.c
164
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
165
return -E2BIG;
166
}
167
ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
168
- false, PREALLOC_MODE_OFF, NULL);
169
+ false, PREALLOC_MODE_OFF, 0, NULL);
170
if (ret < 0) {
171
return ret;
172
}
173
@@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs)
174
l1_length) < 0)
175
return -1;
176
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false,
177
- PREALLOC_MODE_OFF, NULL);
178
+ PREALLOC_MODE_OFF, 0, NULL);
179
if (ret < 0)
180
return ret;
181
182
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/block/qcow2-refcount.c
185
+++ b/block/qcow2-refcount.c
186
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
187
}
188
189
ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
190
- PREALLOC_MODE_OFF, &local_err);
191
+ PREALLOC_MODE_OFF, 0, &local_err);
192
if (ret < 0) {
193
error_report_err(local_err);
194
goto resize_fail;
195
diff --git a/block/qcow2.c b/block/qcow2.c
196
index XXXXXXX..XXXXXXX 100644
197
--- a/block/qcow2.c
198
+++ b/block/qcow2.c
199
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
200
mode = PREALLOC_MODE_OFF;
201
}
202
ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
203
- mode, errp);
204
+ mode, 0, errp);
205
if (ret < 0) {
206
return ret;
207
}
208
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
209
* always fulfilled, so there is no need to pass it on.)
210
*/
211
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
212
- false, PREALLOC_MODE_OFF, &local_err);
213
+ false, PREALLOC_MODE_OFF, 0, &local_err);
214
if (local_err) {
215
warn_reportf_err(local_err,
216
"Failed to truncate the tail of the image: ");
217
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
218
* file should be resized to the exact target size, too,
219
* so we pass @exact here.
220
*/
221
- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
222
+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
223
+ errp);
224
if (ret < 0) {
225
goto fail;
38
}
226
}
227
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
228
new_file_size = allocation_start +
229
nb_new_data_clusters * s->cluster_size;
230
/* Image file grows, so @exact does not matter */
231
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
232
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
233
+ errp);
234
if (ret < 0) {
235
error_prepend(errp, "Failed to resize underlying file: ");
236
qcow2_free_clusters(bs, allocation_start,
237
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
238
if (len < 0) {
239
return len;
240
}
241
- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
242
+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
243
+ NULL);
244
}
245
246
if (offset_into_cluster(s, offset)) {
247
@@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs)
248
}
249
250
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
251
- PREALLOC_MODE_OFF, &local_err);
252
+ PREALLOC_MODE_OFF, 0, &local_err);
253
if (ret < 0) {
254
error_report_err(local_err);
255
goto fail;
256
diff --git a/block/raw-format.c b/block/raw-format.c
257
index XXXXXXX..XXXXXXX 100644
258
--- a/block/raw-format.c
259
+++ b/block/raw-format.c
260
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
261
262
s->size = offset;
263
offset += s->offset;
264
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
265
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
266
}
267
268
static void raw_eject(BlockDriverState *bs, bool eject_flag)
269
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
270
index XXXXXXX..XXXXXXX 100644
271
--- a/block/vhdx-log.c
272
+++ b/block/vhdx-log.c
273
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
274
goto exit;
275
}
276
ret = bdrv_truncate(bs->file, new_file_size, false,
277
- PREALLOC_MODE_OFF, NULL);
278
+ PREALLOC_MODE_OFF, 0, NULL);
279
if (ret < 0) {
280
goto exit;
281
}
282
diff --git a/block/vhdx.c b/block/vhdx.c
283
index XXXXXXX..XXXXXXX 100644
284
--- a/block/vhdx.c
285
+++ b/block/vhdx.c
286
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
287
}
288
289
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
290
- PREALLOC_MODE_OFF, NULL);
291
+ PREALLOC_MODE_OFF, 0, NULL);
292
}
293
294
/*
295
diff --git a/block/vmdk.c b/block/vmdk.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/block/vmdk.c
298
+++ b/block/vmdk.c
299
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
300
}
301
length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
302
ret = bdrv_truncate(s->extents[i].file, length, false,
303
- PREALLOC_MODE_OFF, NULL);
304
+ PREALLOC_MODE_OFF, 0, NULL);
305
if (ret < 0) {
306
return ret;
307
}
308
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
309
index XXXXXXX..XXXXXXX 100644
310
--- a/tests/test-block-iothread.c
311
+++ b/tests/test-block-iothread.c
312
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c)
313
int ret;
314
315
/* Normal success path */
316
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
317
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
318
g_assert_cmpint(ret, ==, 0);
319
320
/* Early error: Negative offset */
321
- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL);
322
+ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL);
323
g_assert_cmpint(ret, ==, -EINVAL);
324
325
/* Error: Read-only image */
326
c->bs->read_only = true;
327
c->bs->open_flags &= ~BDRV_O_RDWR;
328
329
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
330
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
331
g_assert_cmpint(ret, ==, -EACCES);
332
333
c->bs->read_only = false;
39
--
334
--
40
2.13.6
335
2.25.3
41
336
42
337
diff view generated by jsdifflib
1
Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently
1
Now that node level interface bdrv_truncate() supports passing request
2
in use as a mirror target. It is not enough for image formats, though,
2
flags to the block driver, expose this on the BlockBackend level, too.
3
as these still unconditionally request BLK_PERM_CONSISTENT_READ.
3
4
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
As this permission is geared towards whether the guest-visible data is
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
consistent, and has no impact on whether the metadata is sane, and
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
'qemu-img info' does not read guest-visible data (except for the raw
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there
8
Message-Id: <20200424125448.63318-4-kwolf@redhat.com>
9
is not going to be any guest I/O performed, regardless of image format.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
block.c | 6 +++++-
11
include/sysemu/block-backend.h | 2 +-
14
1 file changed, 5 insertions(+), 1 deletion(-)
12
block.c | 3 ++-
15
13
block/block-backend.c | 4 ++--
14
block/commit.c | 4 ++--
15
block/crypto.c | 2 +-
16
block/mirror.c | 2 +-
17
block/qcow2.c | 4 ++--
18
block/qed.c | 2 +-
19
block/vdi.c | 2 +-
20
block/vhdx.c | 4 ++--
21
block/vmdk.c | 6 +++---
22
block/vpc.c | 2 +-
23
blockdev.c | 2 +-
24
qemu-img.c | 2 +-
25
qemu-io-cmds.c | 2 +-
26
15 files changed, 22 insertions(+), 21 deletions(-)
27
28
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/sysemu/block-backend.h
31
+++ b/include/sysemu/block-backend.h
32
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
33
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
34
int bytes);
35
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
36
- PreallocMode prealloc, Error **errp);
37
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
38
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
39
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
40
int64_t pos, int size);
16
diff --git a/block.c b/block.c
41
diff --git a/block.c b/block.c
17
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
18
--- a/block.c
43
--- a/block.c
19
+++ b/block.c
44
+++ b/block.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
45
@@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
21
assert(role == &child_backing || role == &child_file);
46
int64_t size;
22
47
int ret;
23
if (!backing) {
48
24
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
49
- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err);
25
+
50
+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
26
/* Apart from the modifications below, the same permissions are
51
+ &local_err);
27
* forwarded and left alone as for filters */
52
if (ret < 0 && ret != -ENOTSUP) {
28
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
53
error_propagate(errp, local_err);
29
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
54
return ret;
30
55
diff --git a/block/block-backend.c b/block/block-backend.c
31
/* bs->file always needs to be consistent because of the metadata. We
56
index XXXXXXX..XXXXXXX 100644
32
* can never allow other users to resize or write to it. */
57
--- a/block/block-backend.c
33
- perm |= BLK_PERM_CONSISTENT_READ;
58
+++ b/block/block-backend.c
34
+ if (!(flags & BDRV_O_NO_IO)) {
59
@@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
35
+ perm |= BLK_PERM_CONSISTENT_READ;
60
}
36
+ }
61
37
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
62
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
63
- PreallocMode prealloc, Error **errp)
64
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
65
{
66
if (!blk_is_available(blk)) {
67
error_setg(errp, "No medium inserted");
68
return -ENOMEDIUM;
69
}
70
71
- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
72
+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
73
}
74
75
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
76
diff --git a/block/commit.c b/block/commit.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/block/commit.c
79
+++ b/block/commit.c
80
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp)
81
}
82
83
if (base_len < len) {
84
- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL);
85
+ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
86
if (ret) {
87
goto out;
88
}
89
@@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs)
90
* grow the backing file image if possible. If not possible,
91
* we must return an error */
92
if (length > backing_length) {
93
- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF,
94
+ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
95
&local_err);
96
if (ret < 0) {
97
error_report_err(local_err);
98
diff --git a/block/crypto.c b/block/crypto.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/crypto.c
101
+++ b/block/crypto.c
102
@@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block,
103
* which will be used by the crypto header
104
*/
105
return blk_truncate(data->blk, data->size + headerlen, false,
106
- data->prealloc, errp);
107
+ data->prealloc, 0, errp);
108
}
109
110
111
diff --git a/block/mirror.c b/block/mirror.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/block/mirror.c
114
+++ b/block/mirror.c
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
116
117
if (s->bdev_length > base_length) {
118
ret = blk_truncate(s->target, s->bdev_length, false,
119
- PREALLOC_MODE_OFF, NULL);
120
+ PREALLOC_MODE_OFF, 0, NULL);
121
if (ret < 0) {
122
goto immediate_exit;
123
}
124
diff --git a/block/qcow2.c b/block/qcow2.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/block/qcow2.c
127
+++ b/block/qcow2.c
128
@@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
129
130
/* Okay, now that we have a valid image, let's give it the right size */
131
ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
132
- errp);
133
+ 0, errp);
134
if (ret < 0) {
135
error_prepend(errp, "Could not resize image: ");
136
goto out;
137
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
138
* Amending image options should ensure that the image has
139
* exactly the given new values, so pass exact=true here.
140
*/
141
- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
142
+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
143
blk_unref(blk);
144
if (ret < 0) {
145
return ret;
146
diff --git a/block/qed.c b/block/qed.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/block/qed.c
149
+++ b/block/qed.c
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
151
* The QED format associates file length with allocation status,
152
* so a new file (which is empty) must have a length of 0.
153
*/
154
- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp);
155
+ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
156
if (ret < 0) {
157
goto out;
158
}
159
diff --git a/block/vdi.c b/block/vdi.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/block/vdi.c
162
+++ b/block/vdi.c
163
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
164
165
if (image_type == VDI_TYPE_STATIC) {
166
ret = blk_truncate(blk, offset + blocks * block_size, false,
167
- PREALLOC_MODE_OFF, errp);
168
+ PREALLOC_MODE_OFF, 0, errp);
169
if (ret < 0) {
170
error_prepend(errp, "Failed to statically allocate file");
171
goto exit;
172
diff --git a/block/vhdx.c b/block/vhdx.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/block/vhdx.c
175
+++ b/block/vhdx.c
176
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
177
/* All zeroes, so we can just extend the file - the end of the BAT
178
* is the furthest thing we have written yet */
179
ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
180
- errp);
181
+ 0, errp);
182
if (ret < 0) {
183
goto exit;
184
}
185
} else if (type == VHDX_TYPE_FIXED) {
186
ret = blk_truncate(blk, data_file_offset + image_size, false,
187
- PREALLOC_MODE_OFF, errp);
188
+ PREALLOC_MODE_OFF, 0, errp);
189
if (ret < 0) {
190
goto exit;
191
}
192
diff --git a/block/vmdk.c b/block/vmdk.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/block/vmdk.c
195
+++ b/block/vmdk.c
196
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
197
int gd_buf_size;
198
199
if (flat) {
200
- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp);
201
+ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
202
goto exit;
203
}
204
magic = cpu_to_be32(VMDK4_MAGIC);
205
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
206
}
207
208
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
209
- PREALLOC_MODE_OFF, errp);
210
+ PREALLOC_MODE_OFF, 0, errp);
211
if (ret < 0) {
212
goto exit;
213
}
214
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
215
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
216
* for description file */
217
if (desc_offset == 0) {
218
- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp);
219
+ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
220
if (ret < 0) {
221
goto exit;
222
}
223
diff --git a/block/vpc.c b/block/vpc.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/block/vpc.c
226
+++ b/block/vpc.c
227
@@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
228
/* Add footer to total size */
229
total_size += HEADER_SIZE;
230
231
- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp);
232
+ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
233
if (ret < 0) {
234
return ret;
235
}
236
diff --git a/blockdev.c b/blockdev.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/blockdev.c
239
+++ b/blockdev.c
240
@@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device,
241
}
242
243
bdrv_drained_begin(bs);
244
- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp);
245
+ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
246
bdrv_drained_end(bs);
247
248
out:
249
diff --git a/qemu-img.c b/qemu-img.c
250
index XXXXXXX..XXXXXXX 100644
251
--- a/qemu-img.c
252
+++ b/qemu-img.c
253
@@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv)
254
* resizing, so pass @exact=true. It is of no use to report
255
* success when the image has not actually been resized.
256
*/
257
- ret = blk_truncate(blk, total_size, true, prealloc, &err);
258
+ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
259
if (!ret) {
260
qprintf(quiet, "Image resized.\n");
38
} else {
261
} else {
39
/* We want consistent read from backing files if the parent needs it.
262
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
263
index XXXXXXX..XXXXXXX 100644
264
--- a/qemu-io-cmds.c
265
+++ b/qemu-io-cmds.c
266
@@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
267
* exact=true. It is better to err on the "emit more errors" side
268
* than to be overly permissive.
269
*/
270
- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err);
271
+ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err);
272
if (ret < 0) {
273
error_report_err(local_err);
274
return ret;
40
--
275
--
41
2.13.6
276
2.25.3
42
277
43
278
diff view generated by jsdifflib
1
Commit 15afd94a047 added code to acquire and release the AioContext in
1
If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling
2
qemuio_command(). This means that the lock is taken twice now in the
2
qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't
3
call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for
3
undo any previous preallocation, but just adds the zero flag to all
4
any requests issued to nodes in a non-mainloop AioContext.
4
relevant L2 entries. If an external data file is in use, a write_zeroes
5
5
request to the data file is made instead.
6
Dropping the first locking from hmp_qemu_io() fixes the problem.
7
6
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200424125448.63318-5-kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
12
---
11
hmp.c | 6 ------
13
block/qcow2-cluster.c | 2 +-
12
1 file changed, 6 deletions(-)
14
block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 35 insertions(+), 1 deletion(-)
13
16
14
diff --git a/hmp.c b/hmp.c
17
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/hmp.c
19
--- a/block/qcow2-cluster.c
17
+++ b/hmp.c
20
+++ b/block/qcow2-cluster.c
18
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
21
@@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
19
{
22
/* Caller must pass aligned values, except at image end */
20
BlockBackend *blk;
23
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
21
BlockBackend *local_blk = NULL;
24
assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
22
- AioContext *aio_context;
25
- end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
23
const char* device = qdict_get_str(qdict, "device");
26
+ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS);
24
const char* command = qdict_get_str(qdict, "command");
27
25
Error *err = NULL;
28
/* The zero flag is only supported by version 3 and newer */
26
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
29
if (s->qcow_version < 3) {
27
}
30
diff --git a/block/qcow2.c b/block/qcow2.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/qcow2.c
33
+++ b/block/qcow2.c
34
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
35
36
bs->supported_zero_flags = header.version >= 3 ?
37
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
38
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
39
40
/* Repair image if dirty */
41
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
43
g_assert_not_reached();
28
}
44
}
29
45
30
- aio_context = blk_get_aio_context(blk);
46
+ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
31
- aio_context_acquire(aio_context);
47
+ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size);
32
-
48
+
33
/*
49
+ /*
34
* Notably absent: Proper permission management. This is sad, but it seems
50
+ * Use zero clusters as much as we can. qcow2_cluster_zeroize()
35
* almost impossible to achieve without changing the semantics and thereby
51
+ * requires a cluster-aligned start. The end may be unaligned if it is
36
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
52
+ * at the end of the image (which it is here).
37
*/
53
+ */
38
qemuio_command(blk, command);
54
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
39
55
+ if (ret < 0) {
40
- aio_context_release(aio_context);
56
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
41
-
57
+ goto fail;
42
fail:
58
+ }
43
blk_unref(local_blk);
59
+
44
hmp_handle_error(mon, &err);
60
+ /* Write explicit zeros for the unaligned head */
61
+ if (zero_start > old_length) {
62
+ uint64_t len = zero_start - old_length;
63
+ uint8_t *buf = qemu_blockalign0(bs, len);
64
+ QEMUIOVector qiov;
65
+ qemu_iovec_init_buf(&qiov, buf, len);
66
+
67
+ qemu_co_mutex_unlock(&s->lock);
68
+ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
69
+ qemu_co_mutex_lock(&s->lock);
70
+
71
+ qemu_vfree(buf);
72
+ if (ret < 0) {
73
+ error_setg_errno(errp, -ret, "Failed to zero out the new area");
74
+ goto fail;
75
+ }
76
+ }
77
+ }
78
+
79
if (prealloc != PREALLOC_MODE_OFF) {
80
/* Flush metadata before actually changing the image size */
81
ret = qcow2_write_caches(bs);
45
--
82
--
46
2.13.6
83
2.25.3
47
84
48
85
diff view generated by jsdifflib
1
Now that the bdrv_drain_invoke() calls are pulled up to the callers of
1
The raw format driver can simply forward the flag and let its bs->file
2
bdrv_drain_recurse(), the 'begin' parameter isn't needed any more.
2
child take care of actually providing the zeros.
3
3
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20200424125448.63318-6-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
10
---
7
block/io.c | 12 ++++++------
11
block/raw-format.c | 4 +++-
8
1 file changed, 6 insertions(+), 6 deletions(-)
12
1 file changed, 3 insertions(+), 1 deletion(-)
9
13
10
diff --git a/block/io.c b/block/io.c
14
diff --git a/block/raw-format.c b/block/raw-format.c
11
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
16
--- a/block/raw-format.c
13
+++ b/block/io.c
17
+++ b/block/raw-format.c
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
15
}
19
20
s->size = offset;
21
offset += s->offset;
22
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
23
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
16
}
24
}
17
25
18
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
26
static void raw_eject(BlockDriverState *bs, bool eject_flag)
19
+static bool bdrv_drain_recurse(BlockDriverState *bs)
27
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
20
{
28
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
21
BdrvChild *child, *tmp;
29
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
22
bool waited;
30
bs->file->bs->supported_zero_flags);
23
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
31
+ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
24
*/
32
+ BDRV_REQ_ZERO_WRITE;
25
bdrv_ref(bs);
33
26
}
34
if (bs->probed && !bdrv_is_read_only(bs)) {
27
- waited |= bdrv_drain_recurse(bs, begin);
35
bdrv_refresh_filename(bs->file->bs);
28
+ waited |= bdrv_drain_recurse(bs);
29
if (in_main_loop) {
30
bdrv_unref(bs);
31
}
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
33
}
34
35
bdrv_drain_invoke(bs, true);
36
- bdrv_drain_recurse(bs, true);
37
+ bdrv_drain_recurse(bs);
38
}
39
40
void bdrv_drained_end(BlockDriverState *bs)
41
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
42
43
bdrv_parent_drained_end(bs);
44
bdrv_drain_invoke(bs, false);
45
- bdrv_drain_recurse(bs, false);
46
+ bdrv_drain_recurse(bs);
47
aio_enable_external(bdrv_get_aio_context(bs));
48
}
49
50
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
51
aio_context_acquire(aio_context);
52
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
53
if (aio_context == bdrv_get_aio_context(bs)) {
54
- waited |= bdrv_drain_recurse(bs, true);
55
+ waited |= bdrv_drain_recurse(bs);
56
}
57
}
58
aio_context_release(aio_context);
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
60
aio_enable_external(aio_context);
61
bdrv_parent_drained_end(bs);
62
bdrv_drain_invoke(bs, false);
63
- bdrv_drain_recurse(bs, false);
64
+ bdrv_drain_recurse(bs);
65
aio_context_release(aio_context);
66
}
67
68
--
36
--
69
2.13.6
37
2.25.3
70
38
71
39
diff view generated by jsdifflib
1
Removing a quorum child node with x-blockdev-change results in a quorum
1
For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the
2
driver state that cannot be recreated with create options because it
2
OS, so we can advertise the flag and just ignore it.
3
would require a list with gaps. This causes trouble in at least
4
.bdrv_refresh_filename().
5
6
Document this problem so that we won't accidentally mark the command
7
stable without having addressed it.
8
3
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20200424125448.63318-7-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
10
---
12
qapi/block-core.json | 4 ++++
11
block/file-posix.c | 4 ++++
13
1 file changed, 4 insertions(+)
12
1 file changed, 4 insertions(+)
14
13
15
diff --git a/qapi/block-core.json b/qapi/block-core.json
14
diff --git a/block/file-posix.c b/block/file-posix.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/qapi/block-core.json
16
--- a/block/file-posix.c
18
+++ b/qapi/block-core.json
17
+++ b/block/file-posix.c
19
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
20
# does not support all kinds of operations, all kinds of children, nor
19
#endif
21
# all block drivers.
20
22
#
21
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
23
+# FIXME Removing children from a quorum node means introducing gaps in the
22
+ if (S_ISREG(st.st_mode)) {
24
+# child indices. This cannot be represented in the 'children' list of
23
+ /* When extending regular files, we get zeros from the OS */
25
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
24
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
26
+#
25
+ }
27
# Warning: The data in a new quorum child MUST be consistent with that of
26
ret = 0;
28
# the rest of the array.
27
fail:
29
#
28
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
30
--
29
--
31
2.13.6
30
2.25.3
32
31
33
32
diff view generated by jsdifflib
1
The device is drained, so there is no point in waiting for requests at
1
When extending the size of an image that has a backing file larger than
2
the end of the drained section. Remove the bdrv_drain_recurse() calls
2
its old size, make sure that the backing file data doesn't become
3
there.
3
visible in the guest, but the added area is properly zeroed out.
4
4
5
The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e
5
Consider the following scenario where the overlay is shorter than its
6
in order to call the .bdrv_co_drain_end() driver callback. This is now
6
backing file:
7
done by a separate bdrv_drain_invoke() call.
7
8
base.qcow2: AAAAAAAA
9
overlay.qcow2: BBBB
10
11
When resizing (extending) overlay.qcow2, the new blocks should not stay
12
unallocated and make the additional As from base.qcow2 visible like
13
before this patch, but zeros should be read.
14
15
A similar case happens with the various variants of a commit job when an
16
intermediate file is short (- for unallocated):
17
18
base.qcow2: A-A-AAAA
19
mid.qcow2: BB-B
20
top.qcow2: C--C--C-
21
22
After commit top.qcow2 to mid.qcow2, the following happens:
23
24
mid.qcow2: CB-C00C0 (correct result)
25
mid.qcow2: CB-C--C- (before this fix)
26
27
Without the fix, blocks that previously read as zeros on top.qcow2
28
suddenly turn into A.
8
29
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
30
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
31
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
32
Message-Id: <20200424125448.63318-8-kwolf@redhat.com>
33
Reviewed-by: Max Reitz <mreitz@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
35
---
13
block/io.c | 2 --
36
block/io.c | 25 +++++++++++++++++++++++++
14
1 file changed, 2 deletions(-)
37
1 file changed, 25 insertions(+)
15
38
16
diff --git a/block/io.c b/block/io.c
39
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
18
--- a/block/io.c
41
--- a/block/io.c
19
+++ b/block/io.c
42
+++ b/block/io.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
21
44
goto out;
22
bdrv_parent_drained_end(bs);
23
bdrv_drain_invoke(bs, false);
24
- bdrv_drain_recurse(bs);
25
aio_enable_external(bdrv_get_aio_context(bs));
26
}
27
28
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
29
aio_enable_external(aio_context);
30
bdrv_parent_drained_end(bs);
31
bdrv_drain_invoke(bs, false);
32
- bdrv_drain_recurse(bs);
33
aio_context_release(aio_context);
34
}
45
}
35
46
47
+ /*
48
+ * If the image has a backing file that is large enough that it would
49
+ * provide data for the new area, we cannot leave it unallocated because
50
+ * then the backing file content would become visible. Instead, zero-fill
51
+ * the new area.
52
+ *
53
+ * Note that if the image has a backing file, but was opened without the
54
+ * backing file, taking care of keeping things consistent with that backing
55
+ * file is the user's responsibility.
56
+ */
57
+ if (new_bytes && bs->backing) {
58
+ int64_t backing_len;
59
+
60
+ backing_len = bdrv_getlength(backing_bs(bs));
61
+ if (backing_len < 0) {
62
+ ret = backing_len;
63
+ error_setg_errno(errp, -ret, "Could not get backing file size");
64
+ goto out;
65
+ }
66
+
67
+ if (backing_len > old_size) {
68
+ flags |= BDRV_REQ_ZERO_WRITE;
69
+ }
70
+ }
71
+
72
if (drv->bdrv_co_truncate) {
73
if (flags & ~bs->supported_truncate_flags) {
74
error_setg(errp, "Block driver does not support requested flags");
36
--
75
--
37
2.13.6
76
2.25.3
38
77
39
78
diff view generated by jsdifflib
1
From: John Snow <jsnow@redhat.com>
1
We want to keep TEST_IMG for the full path of the main test image, but
2
filter_testfiles() must be called for other test images before replacing
3
other things like the image format because the test directory path could
4
contain the format as a substring.
2
5
3
VPC has some difficulty creating geometries of particular size.
6
Insert a filter_testfiles() call between both.
4
However, we can indeed force it to use a literal one, so let's
5
do that for the sake of test 197, which is testing some specific
6
offsets.
7
7
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Lukáš Doktor <ldoktor@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Message-Id: <20200424125448.63318-9-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
13
---
14
tests/qemu-iotests/197 | 4 ++++
14
tests/qemu-iotests/iotests.py | 5 +++--
15
tests/qemu-iotests/common.filter | 3 ++-
15
1 file changed, 3 insertions(+), 2 deletions(-)
16
2 files changed, 6 insertions(+), 1 deletion(-)
17
16
18
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
17
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
19
index XXXXXXX..XXXXXXX 100755
20
--- a/tests/qemu-iotests/197
21
+++ b/tests/qemu-iotests/197
22
@@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ==='
23
echo
24
25
# Prep the images
26
+# VPC rounds image sizes to a specific geometry, force a specific size.
27
+if [ "$IMGFMT" = "vpc" ]; then
28
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
29
+fi
30
_make_test_img 4G
31
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
32
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
33
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
34
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
35
--- a/tests/qemu-iotests/common.filter
19
--- a/tests/qemu-iotests/iotests.py
36
+++ b/tests/qemu-iotests/common.filter
20
+++ b/tests/qemu-iotests/iotests.py
37
@@ -XXX,XX +XXX,XX @@ _filter_img_create()
21
@@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename):
38
-e "s# log_size=[0-9]\\+##g" \
22
for line in output.split('\n'):
39
-e "s# refcount_bits=[0-9]\\+##g" \
23
if 'disk size' in line or 'actual-size' in line:
40
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
24
continue
41
- -e "s# iter-time=[0-9]\\+##g"
25
- line = line.replace(filename, 'TEST_IMG') \
42
+ -e "s# iter-time=[0-9]\\+##g" \
26
- .replace(imgfmt, 'IMGFMT')
43
+ -e "s# force_size=\\(on\\|off\\)##g"
27
+ line = line.replace(filename, 'TEST_IMG')
44
}
28
+ line = filter_testfiles(line)
45
29
+ line = line.replace(imgfmt, 'IMGFMT')
46
_filter_img_info()
30
line = re.sub('iters: [0-9]+', 'iters: XXX', line)
31
line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line)
32
line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line)
47
--
33
--
48
2.13.6
34
2.25.3
49
35
50
36
diff view generated by jsdifflib
1
This adds a test case that the BlockDriver callbacks for drain are
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
called in bdrv_drained_all_begin/end(), and that both of them are called
2
Message-Id: <20200424125448.63318-10-kwolf@redhat.com>
3
exactly once.
3
Reviewed-by: Max Reitz <mreitz@redhat.com>
4
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/qemu-iotests/274 | 155 +++++++++++++++++++++
8
tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++
9
tests/qemu-iotests/group | 1 +
10
3 files changed, 424 insertions(+)
11
create mode 100755 tests/qemu-iotests/274
12
create mode 100644 tests/qemu-iotests/274.out
4
13
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
new file mode 100755
7
Reviewed-by: Eric Blake <eblake@redhat.com>
16
index XXXXXXX..XXXXXXX
8
---
17
--- /dev/null
9
tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
18
+++ b/tests/qemu-iotests/274
10
tests/Makefile.include | 2 +
19
@@ -XXX,XX +XXX,XX @@
11
2 files changed, 139 insertions(+)
20
+#!/usr/bin/env python3
12
create mode 100644 tests/test-bdrv-drain.c
21
+#
13
22
+# Copyright (C) 2019 Red Hat, Inc.
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
23
+#
24
+# This program is free software; you can redistribute it and/or modify
25
+# it under the terms of the GNU General Public License as published by
26
+# the Free Software Foundation; either version 2 of the License, or
27
+# (at your option) any later version.
28
+#
29
+# This program is distributed in the hope that it will be useful,
30
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32
+# GNU General Public License for more details.
33
+#
34
+# You should have received a copy of the GNU General Public License
35
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
36
+#
37
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
38
+#
39
+# Some tests for short backing files and short overlays
40
+
41
+import iotests
42
+
43
+iotests.verify_image_format(supported_fmts=['qcow2'])
44
+iotests.verify_platform(['linux'])
45
+
46
+size_short = 1 * 1024 * 1024
47
+size_long = 2 * 1024 * 1024
48
+size_diff = size_long - size_short
49
+
50
+def create_chain() -> None:
51
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base,
52
+ str(size_long))
53
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid,
54
+ str(size_short))
55
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top,
56
+ str(size_long))
57
+
58
+ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base)
59
+
60
+def create_vm() -> iotests.VM:
61
+ vm = iotests.VM()
62
+ vm.add_blockdev('file,filename=%s,node-name=base-file' % base)
63
+ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt)
64
+ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid)
65
+ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base'
66
+ % iotests.imgfmt)
67
+ vm.add_drive(top, 'backing=mid,node-name=top')
68
+ return vm
69
+
70
+with iotests.FilePath('base') as base, \
71
+ iotests.FilePath('mid') as mid, \
72
+ iotests.FilePath('top') as top:
73
+
74
+ iotests.log('== Commit tests ==')
75
+
76
+ create_chain()
77
+
78
+ iotests.log('=== Check visible data ===')
79
+
80
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top)
81
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top)
82
+
83
+ iotests.log('=== Checking allocation status ===')
84
+
85
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
86
+ '-c', 'alloc %d %d' % (size_short, size_diff),
87
+ base)
88
+
89
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
90
+ '-c', 'alloc %d %d' % (size_short, size_diff),
91
+ mid)
92
+
93
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
94
+ '-c', 'alloc %d %d' % (size_short, size_diff),
95
+ top)
96
+
97
+ iotests.log('=== Checking map ===')
98
+
99
+ iotests.qemu_img_log('map', '--output=json', base)
100
+ iotests.qemu_img_log('map', '--output=human', base)
101
+ iotests.qemu_img_log('map', '--output=json', mid)
102
+ iotests.qemu_img_log('map', '--output=human', mid)
103
+ iotests.qemu_img_log('map', '--output=json', top)
104
+ iotests.qemu_img_log('map', '--output=human', top)
105
+
106
+ iotests.log('=== Testing qemu-img commit (top -> mid) ===')
107
+
108
+ iotests.qemu_img_log('commit', top)
109
+ iotests.img_info_log(mid)
110
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
111
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
112
+
113
+ iotests.log('=== Testing HMP commit (top -> mid) ===')
114
+
115
+ create_chain()
116
+ with create_vm() as vm:
117
+ vm.launch()
118
+ vm.qmp_log('human-monitor-command', command_line='commit drive0')
119
+
120
+ iotests.img_info_log(mid)
121
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
122
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
123
+
124
+ iotests.log('=== Testing QMP active commit (top -> mid) ===')
125
+
126
+ create_chain()
127
+ with create_vm() as vm:
128
+ vm.launch()
129
+ vm.qmp_log('block-commit', device='top', base_node='mid',
130
+ job_id='job0', auto_dismiss=False)
131
+ vm.run_job('job0', wait=5)
132
+
133
+ iotests.img_info_log(mid)
134
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
135
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
136
+
137
+
138
+ iotests.log('== Resize tests ==')
139
+
140
+ # Use different sizes for different allocation modes:
141
+ #
142
+ # We want to have at least one test where 32 bit truncation in the size of
143
+ # the overlapping area becomes visible. This is covered by the
144
+ # prealloc='off' case (1G to 6G is an overlap of 5G).
145
+ #
146
+ # However, we can only do this for modes that don't preallocate data
147
+ # because otherwise we might run out of space on the test host.
148
+ #
149
+ # We also want to test some unaligned combinations.
150
+ for (prealloc, base_size, top_size_old, top_size_new, off) in [
151
+ ('off', '6G', '1G', '8G', '5G'),
152
+ ('metadata', '32G', '30G', '33G', '31G'),
153
+ ('falloc', '10M', '5M', '15M', '9M'),
154
+ ('full', '16M', '8M', '12M', '11M'),
155
+ ('off', '384k', '253k', '512k', '253k'),
156
+ ('off', '400k', '256k', '512k', '336k'),
157
+ ('off', '512k', '256k', '500k', '436k')]:
158
+
159
+ iotests.log('=== preallocation=%s ===' % prealloc)
160
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size)
161
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top,
162
+ top_size_old)
163
+ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base)
164
+
165
+ # After this, top_size_old to base_size should be allocated/zeroed.
166
+ #
167
+ # In theory, leaving base_size to top_size_new unallocated would be
168
+ # correct, but in practice, if we zero out anything, we zero out
169
+ # everything up to top_size_new.
170
+ iotests.qemu_img_log('resize', '-f', iotests.imgfmt,
171
+ '--preallocation', prealloc, top, top_size_new)
172
+ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top)
173
+ iotests.qemu_io_log('-c', 'map', top)
174
+ iotests.qemu_img_log('map', '--output=json', top)
175
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
15
new file mode 100644
176
new file mode 100644
16
index XXXXXXX..XXXXXXX
177
index XXXXXXX..XXXXXXX
17
--- /dev/null
178
--- /dev/null
18
+++ b/tests/test-bdrv-drain.c
179
+++ b/tests/qemu-iotests/274.out
19
@@ -XXX,XX +XXX,XX @@
180
@@ -XXX,XX +XXX,XX @@
20
+/*
181
+== Commit tests ==
21
+ * Block node draining tests
182
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
22
+ *
183
+
23
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
184
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
24
+ *
185
+
25
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
186
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
26
+ * of this software and associated documentation files (the "Software"), to deal
187
+
27
+ * in the Software without restriction, including without limitation the rights
188
+wrote 2097152/2097152 bytes at offset 0
28
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
189
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29
+ * copies of the Software, and to permit persons to whom the Software is
190
+
30
+ * furnished to do so, subject to the following conditions:
191
+=== Check visible data ===
31
+ *
192
+read 1048576/1048576 bytes at offset 0
32
+ * The above copyright notice and this permission notice shall be included in
193
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
33
+ * all copies or substantial portions of the Software.
194
+
34
+ *
195
+read 1048576/1048576 bytes at offset 1048576
35
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
196
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
36
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
197
+
37
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
198
+=== Checking allocation status ===
38
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
199
+1048576/1048576 bytes allocated at offset 0 bytes
39
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
200
+1048576/1048576 bytes allocated at offset 1 MiB
40
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
201
+
41
+ * THE SOFTWARE.
202
+0/1048576 bytes allocated at offset 0 bytes
42
+ */
203
+0/0 bytes allocated at offset 1 MiB
43
+
204
+
44
+#include "qemu/osdep.h"
205
+0/1048576 bytes allocated at offset 0 bytes
45
+#include "block/block.h"
206
+0/1048576 bytes allocated at offset 1 MiB
46
+#include "sysemu/block-backend.h"
207
+
47
+#include "qapi/error.h"
208
+=== Checking map ===
48
+
209
+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}]
49
+typedef struct BDRVTestState {
210
+
50
+ int drain_count;
211
+Offset Length Mapped to File
51
+} BDRVTestState;
212
+0 0x200000 0x50000 TEST_DIR/PID-base
52
+
213
+
53
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
214
+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}]
54
+{
215
+
55
+ BDRVTestState *s = bs->opaque;
216
+Offset Length Mapped to File
56
+ s->drain_count++;
217
+0 0x100000 0x50000 TEST_DIR/PID-base
57
+}
218
+
58
+
219
+[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680},
59
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
220
+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}]
60
+{
221
+
61
+ BDRVTestState *s = bs->opaque;
222
+Offset Length Mapped to File
62
+ s->drain_count--;
223
+0 0x100000 0x50000 TEST_DIR/PID-base
63
+}
224
+
64
+
225
+=== Testing qemu-img commit (top -> mid) ===
65
+static void bdrv_test_close(BlockDriverState *bs)
226
+Image committed.
66
+{
227
+
67
+ BDRVTestState *s = bs->opaque;
228
+image: TEST_IMG
68
+ g_assert_cmpint(s->drain_count, >, 0);
229
+file format: IMGFMT
69
+}
230
+virtual size: 2 MiB (2097152 bytes)
70
+
231
+cluster_size: 65536
71
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
232
+backing file: TEST_DIR/PID-base
72
+ uint64_t offset, uint64_t bytes,
233
+Format specific information:
73
+ QEMUIOVector *qiov, int flags)
234
+ compat: 1.1
74
+{
235
+ lazy refcounts: false
75
+ /* We want this request to stay until the polling loop in drain waits for
236
+ refcount bits: 16
76
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
237
+ corrupt: false
77
+ * first and polls its result, too, but it shouldn't accidentally complete
238
+
78
+ * this request yet. */
239
+read 1048576/1048576 bytes at offset 0
79
+ co_aio_sleep_ns(qemu_get_aio_context(), QEMU_CLOCK_REALTIME, 100000);
240
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
80
+
241
+
81
+ return 0;
242
+read 1048576/1048576 bytes at offset 1048576
82
+}
243
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
83
+
244
+
84
+static BlockDriver bdrv_test = {
245
+=== Testing HMP commit (top -> mid) ===
85
+ .format_name = "test",
246
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
86
+ .instance_size = sizeof(BDRVTestState),
247
+
87
+
248
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
88
+ .bdrv_close = bdrv_test_close,
249
+
89
+ .bdrv_co_preadv = bdrv_test_co_preadv,
250
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
90
+
251
+
91
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
252
+wrote 2097152/2097152 bytes at offset 0
92
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
253
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
93
+};
254
+
94
+
255
+{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}}
95
+static void aio_ret_cb(void *opaque, int ret)
256
+{"return": ""}
96
+{
257
+image: TEST_IMG
97
+ int *aio_ret = opaque;
258
+file format: IMGFMT
98
+ *aio_ret = ret;
259
+virtual size: 2 MiB (2097152 bytes)
99
+}
260
+cluster_size: 65536
100
+
261
+backing file: TEST_DIR/PID-base
101
+static void test_drv_cb_drain_all(void)
262
+Format specific information:
102
+{
263
+ compat: 1.1
103
+ BlockBackend *blk;
264
+ lazy refcounts: false
104
+ BlockDriverState *bs;
265
+ refcount bits: 16
105
+ BDRVTestState *s;
266
+ corrupt: false
106
+ BlockAIOCB *acb;
267
+
107
+ int aio_ret;
268
+read 1048576/1048576 bytes at offset 0
108
+
269
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
109
+ QEMUIOVector qiov;
270
+
110
+ struct iovec iov = {
271
+read 1048576/1048576 bytes at offset 1048576
111
+ .iov_base = NULL,
272
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
112
+ .iov_len = 0,
273
+
113
+ };
274
+=== Testing QMP active commit (top -> mid) ===
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
275
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
115
+
276
+
116
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
277
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
117
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
278
+
118
+ &error_abort);
279
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
119
+ s = bs->opaque;
280
+
120
+ blk_insert_bs(blk, bs, &error_abort);
281
+wrote 2097152/2097152 bytes at offset 0
121
+
282
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
283
+
123
+ g_assert_cmpint(s->drain_count, ==, 0);
284
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}}
124
+ bdrv_drain_all_begin();
285
+{"return": {}}
125
+ g_assert_cmpint(s->drain_count, ==, 1);
286
+{"execute": "job-complete", "arguments": {"id": "job0"}}
126
+ bdrv_drain_all_end();
287
+{"return": {}}
127
+ g_assert_cmpint(s->drain_count, ==, 0);
288
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
128
+
289
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
129
+ /* Now do the same while a request is pending */
290
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
130
+ aio_ret = -EINPROGRESS;
291
+{"return": {}}
131
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
292
+image: TEST_IMG
132
+ g_assert(acb != NULL);
293
+file format: IMGFMT
133
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
294
+virtual size: 2 MiB (2097152 bytes)
134
+
295
+cluster_size: 65536
135
+ g_assert_cmpint(s->drain_count, ==, 0);
296
+backing file: TEST_DIR/PID-base
136
+ bdrv_drain_all_begin();
297
+Format specific information:
137
+ g_assert_cmpint(aio_ret, ==, 0);
298
+ compat: 1.1
138
+ g_assert_cmpint(s->drain_count, ==, 1);
299
+ lazy refcounts: false
139
+ bdrv_drain_all_end();
300
+ refcount bits: 16
140
+ g_assert_cmpint(s->drain_count, ==, 0);
301
+ corrupt: false
141
+
302
+
142
+ bdrv_unref(bs);
303
+read 1048576/1048576 bytes at offset 0
143
+ blk_unref(blk);
304
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
144
+}
305
+
145
+
306
+read 1048576/1048576 bytes at offset 1048576
146
+int main(int argc, char **argv)
307
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
147
+{
308
+
148
+ bdrv_init();
309
+== Resize tests ==
149
+ qemu_init_main_loop(&error_abort);
310
+=== preallocation=off ===
150
+
311
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16
151
+ g_test_init(&argc, &argv, NULL);
312
+
152
+
313
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
153
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
314
+
154
+
315
+wrote 65536/65536 bytes at offset 5368709120
155
+ return g_test_run();
316
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
156
+}
317
+
157
diff --git a/tests/Makefile.include b/tests/Makefile.include
318
+Image resized.
319
+
320
+read 65536/65536 bytes at offset 5368709120
321
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
322
+
323
+1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
324
+7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000)
325
+
326
+[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false},
327
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}]
328
+
329
+=== preallocation=metadata ===
330
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16
331
+
332
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
333
+
334
+wrote 65536/65536 bytes at offset 33285996544
335
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
336
+
337
+Image resized.
338
+
339
+read 65536/65536 bytes at offset 33285996544
340
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
341
+
342
+30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
343
+3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000)
344
+
345
+[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false},
346
+{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680},
347
+{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128},
348
+{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576},
349
+{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024},
350
+{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008},
351
+{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}]
352
+
353
+=== preallocation=falloc ===
354
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16
355
+
356
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
357
+
358
+wrote 65536/65536 bytes at offset 9437184
359
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
360
+
361
+Image resized.
362
+
363
+read 65536/65536 bytes at offset 9437184
364
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
365
+
366
+5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
367
+10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
368
+
369
+[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
370
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
371
+
372
+=== preallocation=full ===
373
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
374
+
375
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
376
+
377
+wrote 65536/65536 bytes at offset 11534336
378
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
379
+
380
+Image resized.
381
+
382
+read 65536/65536 bytes at offset 11534336
383
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
384
+
385
+8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
386
+4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
387
+
388
+[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
389
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
390
+
391
+=== preallocation=off ===
392
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
393
+
394
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
395
+
396
+wrote 65536/65536 bytes at offset 259072
397
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
398
+
399
+Image resized.
400
+
401
+read 65536/65536 bytes at offset 259072
402
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
403
+
404
+192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
405
+320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000)
406
+
407
+[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false},
408
+{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680},
409
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
410
+
411
+=== preallocation=off ===
412
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16
413
+
414
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
415
+
416
+wrote 65536/65536 bytes at offset 344064
417
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
418
+
419
+Image resized.
420
+
421
+read 65536/65536 bytes at offset 344064
422
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
423
+
424
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
425
+256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000)
426
+
427
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
428
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
429
+
430
+=== preallocation=off ===
431
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16
432
+
433
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
434
+
435
+wrote 65536/65536 bytes at offset 446464
436
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
437
+
438
+Image resized.
439
+
440
+read 65536/65536 bytes at offset 446464
441
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
442
+
443
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
444
+244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000)
445
+
446
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
447
+{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}]
448
+
449
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
158
index XXXXXXX..XXXXXXX 100644
450
index XXXXXXX..XXXXXXX 100644
159
--- a/tests/Makefile.include
451
--- a/tests/qemu-iotests/group
160
+++ b/tests/Makefile.include
452
+++ b/tests/qemu-iotests/group
161
@@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c
453
@@ -XXX,XX +XXX,XX @@
162
gcov-files-test-hbitmap-y = util/hbitmap.c
454
270 rw backing quick
163
check-unit-y += tests/test-hbitmap$(EXESUF)
455
272 rw
164
gcov-files-test-hbitmap-y = blockjob.c
456
273 backing quick
165
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
457
+274 rw backing
166
check-unit-y += tests/test-blockjob$(EXESUF)
458
277 rw quick
167
check-unit-y += tests/test-blockjob-txn$(EXESUF)
459
279 rw backing quick
168
check-unit-y += tests/test-x86-cpuid$(EXESUF)
460
280 rw migration quick
169
@@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
170
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
171
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
172
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
173
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
174
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
175
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
176
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
177
--
461
--
178
2.13.6
462
2.25.3
179
463
180
464
diff view generated by jsdifflib
1
From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
1
The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the
2
image is possibly preallocated and then the zero flag is added to all
3
clusters. This means that a copy-on-write operation may be needed when
4
writing to these clusters, despite having used preallocation, negating
5
one of the major benefits of preallocation.
2
6
3
Since bdrv_co_preadv does all neccessary checks including
7
Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver,
4
reading after the end of the backing file, avoid duplication
8
and if the protocol driver can ensure that the new area reads as zeros,
5
of verification before bdrv_co_preadv call.
9
we can skip setting the zero flag in the qcow2 layer.
6
10
7
Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
11
Unfortunately, the same approach doesn't work for metadata
12
preallocation, so we'll still set the zero flag there.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Message-Id: <20200424142701.67053-1-kwolf@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
19
---
12
block/qcow2.h | 3 ---
20
block/qcow2.c | 22 +++++++++++++++++++---
13
block/qcow2.c | 51 ++++++++-------------------------------------------
21
tests/qemu-iotests/274.out | 4 ++--
14
2 files changed, 8 insertions(+), 46 deletions(-)
22
2 files changed, 21 insertions(+), 5 deletions(-)
15
23
16
diff --git a/block/qcow2.h b/block/qcow2.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.h
19
+++ b/block/qcow2.h
20
@@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
21
}
22
23
/* qcow2.c functions */
24
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
25
- int64_t sector_num, int nb_sectors);
26
-
27
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
28
int refcount_order, bool generous_increase,
29
uint64_t *refblock_count);
30
diff --git a/block/qcow2.c b/block/qcow2.c
24
diff --git a/block/qcow2.c b/block/qcow2.c
31
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
32
--- a/block/qcow2.c
26
--- a/block/qcow2.c
33
+++ b/block/qcow2.c
27
+++ b/block/qcow2.c
34
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
35
return status;
29
/* Allocate the data area */
36
}
30
new_file_size = allocation_start +
37
31
nb_new_data_clusters * s->cluster_size;
38
-/* handle reading after the end of the backing file */
32
- /* Image file grows, so @exact does not matter */
39
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
33
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
40
- int64_t offset, int bytes)
34
- errp);
41
-{
35
+ /*
42
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
36
+ * Image file grows, so @exact does not matter.
43
- int n1;
37
+ *
44
-
38
+ * If we need to zero out the new area, try first whether the protocol
45
- if ((offset + bytes) <= bs_size) {
39
+ * driver can already take care of this.
46
- return bytes;
40
+ */
47
- }
41
+ if (flags & BDRV_REQ_ZERO_WRITE) {
48
-
42
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
49
- if (offset >= bs_size) {
43
+ BDRV_REQ_ZERO_WRITE, NULL);
50
- n1 = 0;
44
+ if (ret >= 0) {
51
- } else {
45
+ flags &= ~BDRV_REQ_ZERO_WRITE;
52
- n1 = bs_size - offset;
46
+ }
53
- }
47
+ } else {
54
-
48
+ ret = -1;
55
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
49
+ }
56
-
50
+ if (ret < 0) {
57
- return n1;
51
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
58
-}
52
+ errp);
59
-
53
+ }
60
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
54
if (ret < 0) {
61
uint64_t bytes, QEMUIOVector *qiov,
55
error_prepend(errp, "Failed to resize underlying file: ");
62
int flags)
56
qcow2_free_clusters(bs, allocation_start,
63
{
57
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
64
BDRVQcow2State *s = bs->opaque;
58
index XXXXXXX..XXXXXXX 100644
65
- int offset_in_cluster, n1;
59
--- a/tests/qemu-iotests/274.out
66
+ int offset_in_cluster;
60
+++ b/tests/qemu-iotests/274.out
67
int ret;
61
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184
68
unsigned int cur_bytes; /* number of bytes in current iteration */
62
10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
69
uint64_t cluster_offset = 0;
63
70
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
64
[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
71
case QCOW2_CLUSTER_UNALLOCATED:
65
-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
72
66
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}]
73
if (bs->backing) {
67
74
- /* read from the base image */
68
=== preallocation=full ===
75
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
69
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
76
- offset, cur_bytes);
70
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336
77
- if (n1 > 0) {
71
4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
78
- QEMUIOVector local_qiov;
72
79
-
73
[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
80
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
74
-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
81
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
75
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}]
82
-
76
83
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
77
=== preallocation=off ===
84
- qemu_co_mutex_unlock(&s->lock);
78
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
85
- ret = bdrv_co_preadv(bs->backing, offset, n1,
86
- &local_qiov, 0);
87
- qemu_co_mutex_lock(&s->lock);
88
-
89
- qemu_iovec_destroy(&local_qiov);
90
-
91
- if (ret < 0) {
92
- goto fail;
93
- }
94
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
95
+ qemu_co_mutex_unlock(&s->lock);
96
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
97
+ &hd_qiov, 0);
98
+ qemu_co_mutex_lock(&s->lock);
99
+ if (ret < 0) {
100
+ goto fail;
101
}
102
} else {
103
/* Note: in this case, no need to wait */
104
--
79
--
105
2.13.6
80
2.25.3
106
81
107
82
diff view generated by jsdifflib
1
From: Doug Gale <doug16k@gmail.com>
1
From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
2
2
3
Add trace output for commands, errors, and undefined behavior.
3
This patch introduces support for PMR that has been defined as part of NVMe 1.4
4
Add guest error log output for undefined behavior.
4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
5
Report invalid undefined accesses to MMIO.
5
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
6
Annotate unlikely error checks with unlikely.
6
device. Guest OS can perform mmio read and writes to the PMR region that will stay
7
persistent across system reboot.
7
8
8
Signed-off-by: Doug Gale <doug16k@gmail.com>
9
Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
13
Reviewed-by: Keith Busch <kbusch@kernel.org>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
15
---
13
hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++--------
16
hw/block/nvme.h | 2 +
14
hw/block/trace-events | 93 ++++++++++++++
17
include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++
15
2 files changed, 390 insertions(+), 52 deletions(-)
18
hw/block/nvme.c | 109 ++++++++++++++++++++++++++
19
hw/block/Makefile.objs | 2 +-
20
hw/block/trace-events | 4 +
21
5 files changed, 288 insertions(+), 1 deletion(-)
16
22
23
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/block/nvme.h
26
+++ b/hw/block/nvme.h
27
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl {
28
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
29
30
char *serial;
31
+ HostMemoryBackend *pmrdev;
32
+
33
NvmeNamespace *namespaces;
34
NvmeSQueue **sq;
35
NvmeCQueue **cq;
36
diff --git a/include/block/nvme.h b/include/block/nvme.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/include/block/nvme.h
39
+++ b/include/block/nvme.h
40
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar {
41
uint64_t acq;
42
uint32_t cmbloc;
43
uint32_t cmbsz;
44
+ uint8_t padding[3520]; /* not used by QEMU */
45
+ uint32_t pmrcap;
46
+ uint32_t pmrctl;
47
+ uint32_t pmrsts;
48
+ uint32_t pmrebs;
49
+ uint32_t pmrswtp;
50
+ uint32_t pmrmsc;
51
} NvmeBar;
52
53
enum NvmeCapShift {
54
@@ -XXX,XX +XXX,XX @@ enum NvmeCapShift {
55
CAP_CSS_SHIFT = 37,
56
CAP_MPSMIN_SHIFT = 48,
57
CAP_MPSMAX_SHIFT = 52,
58
+ CAP_PMR_SHIFT = 56,
59
};
60
61
enum NvmeCapMask {
62
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
63
CAP_CSS_MASK = 0xff,
64
CAP_MPSMIN_MASK = 0xf,
65
CAP_MPSMAX_MASK = 0xf,
66
+ CAP_PMR_MASK = 0x1,
67
};
68
69
#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK)
70
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
71
<< CAP_MPSMIN_SHIFT)
72
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
73
<< CAP_MPSMAX_SHIFT)
74
+#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
75
+ << CAP_PMR_SHIFT)
76
77
enum NvmeCcShift {
78
CC_EN_SHIFT = 0,
79
@@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask {
80
#define NVME_CMBSZ_GETSIZE(cmbsz) \
81
(NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
82
83
+enum NvmePmrcapShift {
84
+ PMRCAP_RDS_SHIFT = 3,
85
+ PMRCAP_WDS_SHIFT = 4,
86
+ PMRCAP_BIR_SHIFT = 5,
87
+ PMRCAP_PMRTU_SHIFT = 8,
88
+ PMRCAP_PMRWBM_SHIFT = 10,
89
+ PMRCAP_PMRTO_SHIFT = 16,
90
+ PMRCAP_CMSS_SHIFT = 24,
91
+};
92
+
93
+enum NvmePmrcapMask {
94
+ PMRCAP_RDS_MASK = 0x1,
95
+ PMRCAP_WDS_MASK = 0x1,
96
+ PMRCAP_BIR_MASK = 0x7,
97
+ PMRCAP_PMRTU_MASK = 0x3,
98
+ PMRCAP_PMRWBM_MASK = 0xf,
99
+ PMRCAP_PMRTO_MASK = 0xff,
100
+ PMRCAP_CMSS_MASK = 0x1,
101
+};
102
+
103
+#define NVME_PMRCAP_RDS(pmrcap) \
104
+ ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK)
105
+#define NVME_PMRCAP_WDS(pmrcap) \
106
+ ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK)
107
+#define NVME_PMRCAP_BIR(pmrcap) \
108
+ ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK)
109
+#define NVME_PMRCAP_PMRTU(pmrcap) \
110
+ ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK)
111
+#define NVME_PMRCAP_PMRWBM(pmrcap) \
112
+ ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK)
113
+#define NVME_PMRCAP_PMRTO(pmrcap) \
114
+ ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK)
115
+#define NVME_PMRCAP_CMSS(pmrcap) \
116
+ ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK)
117
+
118
+#define NVME_PMRCAP_SET_RDS(pmrcap, val) \
119
+ (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT)
120
+#define NVME_PMRCAP_SET_WDS(pmrcap, val) \
121
+ (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT)
122
+#define NVME_PMRCAP_SET_BIR(pmrcap, val) \
123
+ (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT)
124
+#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \
125
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT)
126
+#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \
127
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT)
128
+#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \
129
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT)
130
+#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \
131
+ (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT)
132
+
133
+enum NvmePmrctlShift {
134
+ PMRCTL_EN_SHIFT = 0,
135
+};
136
+
137
+enum NvmePmrctlMask {
138
+ PMRCTL_EN_MASK = 0x1,
139
+};
140
+
141
+#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK)
142
+
143
+#define NVME_PMRCTL_SET_EN(pmrctl, val) \
144
+ (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT)
145
+
146
+enum NvmePmrstsShift {
147
+ PMRSTS_ERR_SHIFT = 0,
148
+ PMRSTS_NRDY_SHIFT = 8,
149
+ PMRSTS_HSTS_SHIFT = 9,
150
+ PMRSTS_CBAI_SHIFT = 12,
151
+};
152
+
153
+enum NvmePmrstsMask {
154
+ PMRSTS_ERR_MASK = 0xff,
155
+ PMRSTS_NRDY_MASK = 0x1,
156
+ PMRSTS_HSTS_MASK = 0x7,
157
+ PMRSTS_CBAI_MASK = 0x1,
158
+};
159
+
160
+#define NVME_PMRSTS_ERR(pmrsts) \
161
+ ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK)
162
+#define NVME_PMRSTS_NRDY(pmrsts) \
163
+ ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK)
164
+#define NVME_PMRSTS_HSTS(pmrsts) \
165
+ ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK)
166
+#define NVME_PMRSTS_CBAI(pmrsts) \
167
+ ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK)
168
+
169
+#define NVME_PMRSTS_SET_ERR(pmrsts, val) \
170
+ (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT)
171
+#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \
172
+ (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT)
173
+#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \
174
+ (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT)
175
+#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \
176
+ (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT)
177
+
178
+enum NvmePmrebsShift {
179
+ PMREBS_PMRSZU_SHIFT = 0,
180
+ PMREBS_RBB_SHIFT = 4,
181
+ PMREBS_PMRWBZ_SHIFT = 8,
182
+};
183
+
184
+enum NvmePmrebsMask {
185
+ PMREBS_PMRSZU_MASK = 0xf,
186
+ PMREBS_RBB_MASK = 0x1,
187
+ PMREBS_PMRWBZ_MASK = 0xffffff,
188
+};
189
+
190
+#define NVME_PMREBS_PMRSZU(pmrebs) \
191
+ ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK)
192
+#define NVME_PMREBS_RBB(pmrebs) \
193
+ ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK)
194
+#define NVME_PMREBS_PMRWBZ(pmrebs) \
195
+ ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK)
196
+
197
+#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \
198
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT)
199
+#define NVME_PMREBS_SET_RBB(pmrebs, val) \
200
+ (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT)
201
+#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \
202
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT)
203
+
204
+enum NvmePmrswtpShift {
205
+ PMRSWTP_PMRSWTU_SHIFT = 0,
206
+ PMRSWTP_PMRSWTV_SHIFT = 8,
207
+};
208
+
209
+enum NvmePmrswtpMask {
210
+ PMRSWTP_PMRSWTU_MASK = 0xf,
211
+ PMRSWTP_PMRSWTV_MASK = 0xffffff,
212
+};
213
+
214
+#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \
215
+ ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK)
216
+#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \
217
+ ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK)
218
+
219
+#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \
220
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT)
221
+#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \
222
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
223
+
224
+enum NvmePmrmscShift {
225
+ PMRMSC_CMSE_SHIFT = 1,
226
+ PMRMSC_CBA_SHIFT = 12,
227
+};
228
+
229
+enum NvmePmrmscMask {
230
+ PMRMSC_CMSE_MASK = 0x1,
231
+ PMRMSC_CBA_MASK = 0xfffffffffffff,
232
+};
233
+
234
+#define NVME_PMRMSC_CMSE(pmrmsc) \
235
+ ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK)
236
+#define NVME_PMRMSC_CBA(pmrmsc) \
237
+ ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK)
238
+
239
+#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \
240
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
241
+#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \
242
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
243
+
244
typedef struct NvmeCmd {
245
uint8_t opcode;
246
uint8_t fuse;
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
247
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
248
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/block/nvme.c
249
--- a/hw/block/nvme.c
20
+++ b/hw/block/nvme.c
250
+++ b/hw/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@
251
@@ -XXX,XX +XXX,XX @@
252
* -drive file=<file>,if=none,id=<drive_id>
253
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
254
* cmb_size_mb=<cmb_size_mb[optional]>, \
255
+ * [pmrdev=<mem_backend_file_id>,] \
256
* num_queues=<N[optional]>
257
*
258
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
259
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
260
+ *
261
+ * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
262
+ * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
263
+ * both provided.
264
+ * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
265
+ * For example:
266
+ * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
267
+ * size=<size> .... -device nvme,...,pmrdev=<mem_id>
268
*/
269
270
#include "qemu/osdep.h"
271
@@ -XXX,XX +XXX,XX @@
272
#include "sysemu/sysemu.h"
273
#include "qapi/error.h"
22
#include "qapi/visitor.h"
274
#include "qapi/visitor.h"
275
+#include "sysemu/hostmem.h"
23
#include "sysemu/block-backend.h"
276
#include "sysemu/block-backend.h"
24
277
+#include "exec/ram_addr.h"
25
+#include "qemu/log.h"
278
26
+#include "trace.h"
279
#include "qemu/log.h"
27
#include "nvme.h"
280
#include "qemu/module.h"
28
281
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
29
+#define NVME_GUEST_ERR(trace, fmt, ...) \
282
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
30
+ do { \
283
"invalid write to read only CMBSZ, ignored");
31
+ (trace_##trace)(__VA_ARGS__); \
284
return;
32
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
285
+ case 0xE00: /* PMRCAP */
33
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
286
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
34
+ } while (0)
287
+ "invalid write to PMRCAP register, ignored");
35
+
288
+ return;
36
static void nvme_process_sq(void *opaque);
289
+ case 0xE04: /* TODO PMRCTL */
37
290
+ break;
38
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
291
+ case 0xE08: /* PMRSTS */
39
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
292
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
40
{
293
+ "invalid write to PMRSTS register, ignored");
41
if (cq->irq_enabled) {
294
+ return;
42
if (msix_enabled(&(n->parent_obj))) {
295
+ case 0xE0C: /* PMREBS */
43
+ trace_nvme_irq_msix(cq->vector);
296
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
44
msix_notify(&(n->parent_obj), cq->vector);
297
+ "invalid write to PMREBS register, ignored");
45
} else {
298
+ return;
46
+ trace_nvme_irq_pin();
299
+ case 0xE10: /* PMRSWTP */
47
pci_irq_pulse(&n->parent_obj);
300
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
48
}
301
+ "invalid write to PMRSWTP register, ignored");
49
+ } else {
302
+ return;
50
+ trace_nvme_irq_masked();
303
+ case 0xE14: /* TODO PMRMSC */
304
+ break;
305
default:
306
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
307
"invalid MMIO write,"
308
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
51
}
309
}
52
}
310
53
311
if (addr < sizeof(n->bar)) {
54
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
312
+ /*
55
trans_len = MIN(len, trans_len);
313
+ * When PMRWBM bit 1 is set then read from
56
int num_prps = (len >> n->page_bits) + 1;
314
+ * from PMRSTS should ensure prior writes
57
315
+ * made it to persistent media
58
- if (!prp1) {
316
+ */
59
+ if (unlikely(!prp1)) {
317
+ if (addr == 0xE08 &&
60
+ trace_nvme_err_invalid_prp();
318
+ (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
61
return NVME_INVALID_FIELD | NVME_DNR;
319
+ qemu_ram_writeback(n->pmrdev->mr.ram_block,
62
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
320
+ 0, n->pmrdev->size);
63
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
321
+ }
64
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
322
memcpy(&val, ptr + addr, size);
65
}
66
len -= trans_len;
67
if (len) {
68
- if (!prp2) {
69
+ if (unlikely(!prp2)) {
70
+ trace_nvme_err_invalid_prp2_missing();
71
goto unmap;
72
}
73
if (len > n->page_size) {
74
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
if (i == n->max_prp_ents - 1 && len > n->page_size) {
78
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
79
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
80
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
81
goto unmap;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
85
prp_ent = le64_to_cpu(prp_list[i]);
86
}
87
88
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
89
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
90
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
91
goto unmap;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
95
i++;
96
}
97
} else {
98
- if (prp2 & (n->page_size - 1)) {
99
+ if (unlikely(prp2 & (n->page_size - 1))) {
100
+ trace_nvme_err_invalid_prp2_align(prp2);
101
goto unmap;
102
}
103
if (qsg->nsg) {
104
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
105
QEMUIOVector iov;
106
uint16_t status = NVME_SUCCESS;
107
108
+ trace_nvme_dma_read(prp1, prp2);
109
+
110
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
111
return NVME_INVALID_FIELD | NVME_DNR;
112
}
113
if (qsg.nsg > 0) {
114
- if (dma_buf_read(ptr, len, &qsg)) {
115
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
116
+ trace_nvme_err_invalid_dma();
117
status = NVME_INVALID_FIELD | NVME_DNR;
118
}
119
qemu_sglist_destroy(&qsg);
120
} else {
323
} else {
121
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
324
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
122
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
325
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
123
+ trace_nvme_err_invalid_dma();
326
error_setg(errp, "serial property not set");
124
status = NVME_INVALID_FIELD | NVME_DNR;
125
}
126
qemu_iovec_destroy(&iov);
127
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
128
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
129
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
130
131
- if (slba + nlb > ns->id_ns.nsze) {
132
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
133
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
134
return NVME_LBA_RANGE | NVME_DNR;
135
}
136
137
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
138
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
139
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
140
141
- if ((slba + nlb) > ns->id_ns.nsze) {
142
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
143
+
144
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
145
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
146
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
147
return NVME_LBA_RANGE | NVME_DNR;
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
151
NvmeNamespace *ns;
152
uint32_t nsid = le32_to_cpu(cmd->nsid);
153
154
- if (nsid == 0 || nsid > n->num_namespaces) {
155
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
156
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
157
return NVME_INVALID_NSID | NVME_DNR;
158
}
159
160
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
161
case NVME_CMD_READ:
162
return nvme_rw(n, ns, cmd, req);
163
default:
164
+ trace_nvme_err_invalid_opc(cmd->opcode);
165
return NVME_INVALID_OPCODE | NVME_DNR;
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
169
NvmeCQueue *cq;
170
uint16_t qid = le16_to_cpu(c->qid);
171
172
- if (!qid || nvme_check_sqid(n, qid)) {
173
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
174
+ trace_nvme_err_invalid_del_sq(qid);
175
return NVME_INVALID_QID | NVME_DNR;
176
}
177
178
+ trace_nvme_del_sq(qid);
179
+
180
sq = n->sq[qid];
181
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
182
req = QTAILQ_FIRST(&sq->out_req_list);
183
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
184
uint16_t qflags = le16_to_cpu(c->sq_flags);
185
uint64_t prp1 = le64_to_cpu(c->prp1);
186
187
- if (!cqid || nvme_check_cqid(n, cqid)) {
188
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
189
+
190
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
191
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
192
return NVME_INVALID_CQID | NVME_DNR;
193
}
194
- if (!sqid || !nvme_check_sqid(n, sqid)) {
195
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
196
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
197
return NVME_INVALID_QID | NVME_DNR;
198
}
199
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
200
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
201
+ trace_nvme_err_invalid_create_sq_size(qsize);
202
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
203
}
204
- if (!prp1 || prp1 & (n->page_size - 1)) {
205
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
206
+ trace_nvme_err_invalid_create_sq_addr(prp1);
207
return NVME_INVALID_FIELD | NVME_DNR;
208
}
209
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
210
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
211
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
212
return NVME_INVALID_FIELD | NVME_DNR;
213
}
214
sq = g_malloc0(sizeof(*sq));
215
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
216
NvmeCQueue *cq;
217
uint16_t qid = le16_to_cpu(c->qid);
218
219
- if (!qid || nvme_check_cqid(n, qid)) {
220
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
221
+ trace_nvme_err_invalid_del_cq_cqid(qid);
222
return NVME_INVALID_CQID | NVME_DNR;
223
}
224
225
cq = n->cq[qid];
226
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
227
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
228
+ trace_nvme_err_invalid_del_cq_notempty(qid);
229
return NVME_INVALID_QUEUE_DEL;
230
}
231
+ trace_nvme_del_cq(qid);
232
nvme_free_cq(cq, n);
233
return NVME_SUCCESS;
234
}
235
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
236
uint16_t qflags = le16_to_cpu(c->cq_flags);
237
uint64_t prp1 = le64_to_cpu(c->prp1);
238
239
- if (!cqid || !nvme_check_cqid(n, cqid)) {
240
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
241
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
242
+
243
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
244
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
245
return NVME_INVALID_CQID | NVME_DNR;
246
}
247
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
248
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
249
+ trace_nvme_err_invalid_create_cq_size(qsize);
250
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
251
}
252
- if (!prp1) {
253
+ if (unlikely(!prp1)) {
254
+ trace_nvme_err_invalid_create_cq_addr(prp1);
255
return NVME_INVALID_FIELD | NVME_DNR;
256
}
257
- if (vector > n->num_queues) {
258
+ if (unlikely(vector > n->num_queues)) {
259
+ trace_nvme_err_invalid_create_cq_vector(vector);
260
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
261
}
262
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
263
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
264
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
265
return NVME_INVALID_FIELD | NVME_DNR;
266
}
267
268
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
269
uint64_t prp1 = le64_to_cpu(c->prp1);
270
uint64_t prp2 = le64_to_cpu(c->prp2);
271
272
+ trace_nvme_identify_ctrl();
273
+
274
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
275
prp1, prp2);
276
}
277
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
278
uint64_t prp1 = le64_to_cpu(c->prp1);
279
uint64_t prp2 = le64_to_cpu(c->prp2);
280
281
- if (nsid == 0 || nsid > n->num_namespaces) {
282
+ trace_nvme_identify_ns(nsid);
283
+
284
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
285
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
286
return NVME_INVALID_NSID | NVME_DNR;
287
}
288
289
ns = &n->namespaces[nsid - 1];
290
+
291
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
292
prp1, prp2);
293
}
294
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
295
uint16_t ret;
296
int i, j = 0;
297
298
+ trace_nvme_identify_nslist(min_nsid);
299
+
300
list = g_malloc0(data_len);
301
for (i = 0; i < n->num_namespaces; i++) {
302
if (i < min_nsid) {
303
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
304
case 0x02:
305
return nvme_identify_nslist(n, c);
306
default:
307
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
308
return NVME_INVALID_FIELD | NVME_DNR;
309
}
310
}
311
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
312
switch (dw10) {
313
case NVME_VOLATILE_WRITE_CACHE:
314
result = blk_enable_write_cache(n->conf.blk);
315
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
316
break;
317
case NVME_NUMBER_OF_QUEUES:
318
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
319
+ trace_nvme_getfeat_numq(result);
320
break;
321
default:
322
+ trace_nvme_err_invalid_getfeat(dw10);
323
return NVME_INVALID_FIELD | NVME_DNR;
324
}
325
326
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
327
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
328
break;
329
case NVME_NUMBER_OF_QUEUES:
330
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
331
+ ((dw11 >> 16) & 0xFFFF) + 1,
332
+ n->num_queues - 1, n->num_queues - 1);
333
req->cqe.result =
334
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
335
break;
336
default:
337
+ trace_nvme_err_invalid_setfeat(dw10);
338
return NVME_INVALID_FIELD | NVME_DNR;
339
}
340
return NVME_SUCCESS;
341
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
342
case NVME_ADM_CMD_GET_FEATURES:
343
return nvme_get_feature(n, cmd, req);
344
default:
345
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
346
return NVME_INVALID_OPCODE | NVME_DNR;
347
}
348
}
349
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
350
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
351
uint32_t page_size = 1 << page_bits;
352
353
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
354
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
355
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
356
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
357
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
358
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
359
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
360
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
361
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
362
+ if (unlikely(n->cq[0])) {
363
+ trace_nvme_err_startfail_cq();
364
+ return -1;
365
+ }
366
+ if (unlikely(n->sq[0])) {
367
+ trace_nvme_err_startfail_sq();
368
+ return -1;
369
+ }
370
+ if (unlikely(!n->bar.asq)) {
371
+ trace_nvme_err_startfail_nbarasq();
372
+ return -1;
373
+ }
374
+ if (unlikely(!n->bar.acq)) {
375
+ trace_nvme_err_startfail_nbaracq();
376
+ return -1;
377
+ }
378
+ if (unlikely(n->bar.asq & (page_size - 1))) {
379
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
380
+ return -1;
381
+ }
382
+ if (unlikely(n->bar.acq & (page_size - 1))) {
383
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
384
+ return -1;
385
+ }
386
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
387
+ NVME_CAP_MPSMIN(n->bar.cap))) {
388
+ trace_nvme_err_startfail_page_too_small(
389
+ NVME_CC_MPS(n->bar.cc),
390
+ NVME_CAP_MPSMIN(n->bar.cap));
391
+ return -1;
392
+ }
393
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
394
+ NVME_CAP_MPSMAX(n->bar.cap))) {
395
+ trace_nvme_err_startfail_page_too_large(
396
+ NVME_CC_MPS(n->bar.cc),
397
+ NVME_CAP_MPSMAX(n->bar.cap));
398
+ return -1;
399
+ }
400
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
401
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
402
+ trace_nvme_err_startfail_cqent_too_small(
403
+ NVME_CC_IOCQES(n->bar.cc),
404
+ NVME_CTRL_CQES_MIN(n->bar.cap));
405
+ return -1;
406
+ }
407
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
408
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
409
+ trace_nvme_err_startfail_cqent_too_large(
410
+ NVME_CC_IOCQES(n->bar.cc),
411
+ NVME_CTRL_CQES_MAX(n->bar.cap));
412
+ return -1;
413
+ }
414
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
415
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
416
+ trace_nvme_err_startfail_sqent_too_small(
417
+ NVME_CC_IOSQES(n->bar.cc),
418
+ NVME_CTRL_SQES_MIN(n->bar.cap));
419
+ return -1;
420
+ }
421
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
422
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
423
+ trace_nvme_err_startfail_sqent_too_large(
424
+ NVME_CC_IOSQES(n->bar.cc),
425
+ NVME_CTRL_SQES_MAX(n->bar.cap));
426
+ return -1;
427
+ }
428
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
429
+ trace_nvme_err_startfail_asqent_sz_zero();
430
+ return -1;
431
+ }
432
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
433
+ trace_nvme_err_startfail_acqent_sz_zero();
434
return -1;
435
}
436
437
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
438
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
439
unsigned size)
440
{
441
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
442
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
443
+ "MMIO write not 32-bit aligned,"
444
+ " offset=0x%"PRIx64"", offset);
445
+ /* should be ignored, fall through for now */
446
+ }
447
+
448
+ if (unlikely(size < sizeof(uint32_t))) {
449
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
450
+ "MMIO write smaller than 32-bits,"
451
+ " offset=0x%"PRIx64", size=%u",
452
+ offset, size);
453
+ /* should be ignored, fall through for now */
454
+ }
455
+
456
switch (offset) {
457
- case 0xc:
458
+ case 0xc: /* INTMS */
459
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
460
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
461
+ "undefined access to interrupt mask set"
462
+ " when MSI-X is enabled");
463
+ /* should be ignored, fall through for now */
464
+ }
465
n->bar.intms |= data & 0xffffffff;
466
n->bar.intmc = n->bar.intms;
467
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
468
+ n->bar.intmc);
469
break;
470
- case 0x10:
471
+ case 0x10: /* INTMC */
472
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
473
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
474
+ "undefined access to interrupt mask clr"
475
+ " when MSI-X is enabled");
476
+ /* should be ignored, fall through for now */
477
+ }
478
n->bar.intms &= ~(data & 0xffffffff);
479
n->bar.intmc = n->bar.intms;
480
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
481
+ n->bar.intmc);
482
break;
483
- case 0x14:
484
+ case 0x14: /* CC */
485
+ trace_nvme_mmio_cfg(data & 0xffffffff);
486
/* Windows first sends data, then sends enable bit */
487
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
488
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
489
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
490
491
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
492
n->bar.cc = data;
493
- if (nvme_start_ctrl(n)) {
494
+ if (unlikely(nvme_start_ctrl(n))) {
495
+ trace_nvme_err_startfail();
496
n->bar.csts = NVME_CSTS_FAILED;
497
} else {
498
+ trace_nvme_mmio_start_success();
499
n->bar.csts = NVME_CSTS_READY;
500
}
501
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
502
+ trace_nvme_mmio_stopped();
503
nvme_clear_ctrl(n);
504
n->bar.csts &= ~NVME_CSTS_READY;
505
}
506
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
507
- nvme_clear_ctrl(n);
508
- n->bar.cc = data;
509
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
510
+ trace_nvme_mmio_shutdown_set();
511
+ nvme_clear_ctrl(n);
512
+ n->bar.cc = data;
513
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
514
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
515
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
516
- n->bar.cc = data;
517
+ trace_nvme_mmio_shutdown_cleared();
518
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
519
+ n->bar.cc = data;
520
+ }
521
+ break;
522
+ case 0x1C: /* CSTS */
523
+ if (data & (1 << 4)) {
524
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
525
+ "attempted to W1C CSTS.NSSRO"
526
+ " but CAP.NSSRS is zero (not supported)");
527
+ } else if (data != 0) {
528
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
529
+ "attempted to set a read only bit"
530
+ " of controller status");
531
+ }
532
+ break;
533
+ case 0x20: /* NSSR */
534
+ if (data == 0x4E564D65) {
535
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
536
+ } else {
537
+ /* The spec says that writes of other values have no effect */
538
+ return;
539
}
540
break;
541
- case 0x24:
542
+ case 0x24: /* AQA */
543
n->bar.aqa = data & 0xffffffff;
544
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
545
break;
546
- case 0x28:
547
+ case 0x28: /* ASQ */
548
n->bar.asq = data;
549
+ trace_nvme_mmio_asqaddr(data);
550
break;
551
- case 0x2c:
552
+ case 0x2c: /* ASQ hi */
553
n->bar.asq |= data << 32;
554
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
555
break;
556
- case 0x30:
557
+ case 0x30: /* ACQ */
558
+ trace_nvme_mmio_acqaddr(data);
559
n->bar.acq = data;
560
break;
561
- case 0x34:
562
+ case 0x34: /* ACQ hi */
563
n->bar.acq |= data << 32;
564
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
565
break;
566
+ case 0x38: /* CMBLOC */
567
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
568
+ "invalid write to reserved CMBLOC"
569
+ " when CMBSZ is zero, ignored");
570
+ return;
571
+ case 0x3C: /* CMBSZ */
572
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
573
+ "invalid write to read only CMBSZ, ignored");
574
+ return;
575
default:
576
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
577
+ "invalid MMIO write,"
578
+ " offset=0x%"PRIx64", data=%"PRIx64"",
579
+ offset, data);
580
break;
581
}
582
}
583
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
584
uint8_t *ptr = (uint8_t *)&n->bar;
585
uint64_t val = 0;
586
587
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
588
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
589
+ "MMIO read not 32-bit aligned,"
590
+ " offset=0x%"PRIx64"", addr);
591
+ /* should RAZ, fall through for now */
592
+ } else if (unlikely(size < sizeof(uint32_t))) {
593
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
594
+ "MMIO read smaller than 32-bits,"
595
+ " offset=0x%"PRIx64"", addr);
596
+ /* should RAZ, fall through for now */
597
+ }
598
+
599
if (addr < sizeof(n->bar)) {
600
memcpy(&val, ptr + addr, size);
601
+ } else {
602
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
603
+ "MMIO read beyond last register,"
604
+ " offset=0x%"PRIx64", returning 0", addr);
605
}
606
+
607
return val;
608
}
609
610
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
611
{
612
uint32_t qid;
613
614
- if (addr & ((1 << 2) - 1)) {
615
+ if (unlikely(addr & ((1 << 2) - 1))) {
616
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
617
+ "doorbell write not 32-bit aligned,"
618
+ " offset=0x%"PRIx64", ignoring", addr);
619
return;
327
return;
620
}
328
}
621
329
+
622
if (((addr - 0x1000) >> 2) & 1) {
330
+ if (!n->cmb_size_mb && n->pmrdev) {
623
+ /* Completion queue doorbell write */
331
+ if (host_memory_backend_is_mapped(n->pmrdev)) {
624
+
332
+ char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
625
uint16_t new_head = val & 0xffff;
333
+ error_setg(errp, "can't use already busy memdev: %s", path);
626
int start_sqs;
334
+ g_free(path);
627
NvmeCQueue *cq;
335
+ return;
628
336
+ }
629
qid = (addr - (0x1000 + (1 << 2))) >> 3;
337
+
630
- if (nvme_check_cqid(n, qid)) {
338
+ if (!is_power_of_2(n->pmrdev->size)) {
631
+ if (unlikely(nvme_check_cqid(n, qid))) {
339
+ error_setg(errp, "pmr backend size needs to be power of 2 in size");
632
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
340
+ return;
633
+ "completion queue doorbell write"
341
+ }
634
+ " for nonexistent queue,"
342
+
635
+ " sqid=%"PRIu32", ignoring", qid);
343
+ host_memory_backend_set_mapped(n->pmrdev, true);
636
return;
344
+ }
637
}
345
+
638
346
blkconf_blocksizes(&n->conf);
639
cq = n->cq[qid];
347
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
640
- if (new_head >= cq->size) {
348
false, errp)) {
641
+ if (unlikely(new_head >= cq->size)) {
349
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
642
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
350
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
643
+ "completion queue doorbell write value"
351
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
644
+ " beyond queue size, sqid=%"PRIu32","
352
645
+ " new_head=%"PRIu16", ignoring",
353
+ } else if (n->pmrdev) {
646
+ qid, new_head);
354
+ /* Controller Capabilities register */
647
return;
355
+ NVME_CAP_SET_PMRS(n->bar.cap, 1);
648
}
356
+
649
357
+ /* PMR Capabities register */
650
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
358
+ n->bar.pmrcap = 0;
651
nvme_isr_notify(n, cq);
359
+ NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
652
}
360
+ NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
653
} else {
361
+ NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
654
+ /* Submission queue doorbell write */
362
+ NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
655
+
363
+ /* Turn on bit 1 support */
656
uint16_t new_tail = val & 0xffff;
364
+ NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
657
NvmeSQueue *sq;
365
+ NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
658
366
+ NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
659
qid = (addr - 0x1000) >> 3;
367
+
660
- if (nvme_check_sqid(n, qid)) {
368
+ /* PMR Control register */
661
+ if (unlikely(nvme_check_sqid(n, qid))) {
369
+ n->bar.pmrctl = 0;
662
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
370
+ NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
663
+ "submission queue doorbell write"
371
+
664
+ " for nonexistent queue,"
372
+ /* PMR Status register */
665
+ " sqid=%"PRIu32", ignoring", qid);
373
+ n->bar.pmrsts = 0;
666
return;
374
+ NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
667
}
375
+ NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
668
376
+ NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
669
sq = n->sq[qid];
377
+ NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
670
- if (new_tail >= sq->size) {
378
+
671
+ if (unlikely(new_tail >= sq->size)) {
379
+ /* PMR Elasticity Buffer Size register */
672
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
380
+ n->bar.pmrebs = 0;
673
+ "submission queue doorbell write value"
381
+ NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
674
+ " beyond queue size, sqid=%"PRIu32","
382
+ NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
675
+ " new_tail=%"PRIu16", ignoring",
383
+ NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
676
+ qid, new_tail);
384
+
677
return;
385
+ /* PMR Sustained Write Throughput register */
678
}
386
+ n->bar.pmrswtp = 0;
679
387
+ NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
388
+ NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
389
+
390
+ /* PMR Memory Space Control register */
391
+ n->bar.pmrmsc = 0;
392
+ NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
393
+ NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
394
+
395
+ pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
396
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
397
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
398
}
399
400
for (i = 0; i < n->num_namespaces; i++) {
401
@@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev)
402
if (n->cmb_size_mb) {
403
g_free(n->cmbuf);
404
}
405
+
406
+ if (n->pmrdev) {
407
+ host_memory_backend_set_mapped(n->pmrdev, false);
408
+ }
409
msix_uninit_exclusive_bar(pci_dev);
410
}
411
412
static Property nvme_props[] = {
413
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
414
+ DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
415
+ HostMemoryBackend *),
416
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
417
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
418
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
419
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
420
index XXXXXXX..XXXXXXX 100644
421
--- a/hw/block/Makefile.objs
422
+++ b/hw/block/Makefile.objs
423
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
424
common-obj-$(CONFIG_XEN) += xen-block.o
425
common-obj-$(CONFIG_ECC) += ecc.o
426
common-obj-$(CONFIG_ONENAND) += onenand.o
427
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
428
common-obj-$(CONFIG_SWIM) += swim.o
429
430
common-obj-$(CONFIG_SH4) += tc58128.o
431
432
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
433
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
434
+obj-$(CONFIG_NVME_PCI) += nvme.o
435
436
obj-y += dataplane/
680
diff --git a/hw/block/trace-events b/hw/block/trace-events
437
diff --git a/hw/block/trace-events b/hw/block/trace-events
681
index XXXXXXX..XXXXXXX 100644
438
index XXXXXXX..XXXXXXX 100644
682
--- a/hw/block/trace-events
439
--- a/hw/block/trace-events
683
+++ b/hw/block/trace-events
440
+++ b/hw/block/trace-events
684
@@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
441
@@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
685
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
442
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
686
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
443
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
687
444
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
688
+# hw/block/nvme.c
445
+nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
689
+# nvme traces for successful events
446
+nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
690
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
447
+nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
691
+nvme_irq_pin(void) "pulsing IRQ pin"
448
+nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
692
+nvme_irq_masked(void) "IRQ is masked"
449
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
693
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
450
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
694
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
451
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
695
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
696
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
697
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
698
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
699
+nvme_identify_ctrl(void) "identify controller"
700
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
701
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
702
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
703
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
704
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
705
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
706
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
707
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
708
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
709
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
710
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
711
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
712
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
713
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
714
+nvme_mmio_stopped(void) "cleared controller enable bit"
715
+nvme_mmio_shutdown_set(void) "shutdown bit set"
716
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
717
+
718
+# nvme traces for error conditions
719
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
720
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
721
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
722
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
723
+nvme_err_invalid_field(void) "invalid field"
724
+nvme_err_invalid_prp(void) "invalid PRP"
725
+nvme_err_invalid_sgl(void) "invalid SGL"
726
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
727
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
728
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
729
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
730
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
731
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
732
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
733
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
734
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
735
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
736
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
737
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
738
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
739
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
740
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
741
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
742
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
743
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
744
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
745
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
746
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
747
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
748
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
749
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
750
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
751
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
752
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
753
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
754
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
755
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
756
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
757
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
758
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
759
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
760
+nvme_err_startfail(void) "setting controller enable bit failed"
761
+
762
+# Traces for undefined behavior
763
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
764
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
765
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
766
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
767
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
768
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
769
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
770
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
771
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
772
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
773
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
774
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
775
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
776
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
777
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
778
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
779
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
780
+
781
# hw/block/xen_disk.c
782
xen_disk_alloc(char *name) "%s"
783
xen_disk_init(char *name) "%s"
784
--
452
--
785
2.13.6
453
2.25.3
786
454
787
455
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
The QMP handler qmp_object_add() and the implementation of --object in
2
qemu-storage-daemon can share most of the code. Currently,
3
qemu-storage-daemon calls qmp_object_add(), but this is not correct
4
because different visitors need to be used.
2
5
3
Convert nvme_init() to realize and rename it to nvme_realize().
6
As a first step towards a fix, make qmp_object_add() a wrapper around a
7
new function user_creatable_add_dict() that can get an additional
8
parameter. The handling of "props" is only required for compatibility
9
and not required for the qemu-storage-daemon command line, so it stays
10
in qmp_object_add().
4
11
5
Cc: John Snow <jsnow@redhat.com>
6
Cc: Keith Busch <keith.busch@intel.com>
7
Cc: Kevin Wolf <kwolf@redhat.com>
8
Cc: Max Reitz <mreitz@redhat.com>
9
Cc: Markus Armbruster <armbru@redhat.com>
10
11
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
13
---
14
hw/block/nvme.c | 18 ++++++++++--------
14
include/qom/object_interfaces.h | 12 ++++++++++++
15
1 file changed, 10 insertions(+), 8 deletions(-)
15
qom/object_interfaces.c | 27 +++++++++++++++++++++++++++
16
qom/qom-qmp-cmds.c | 24 +-----------------------
17
3 files changed, 40 insertions(+), 23 deletions(-)
16
18
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
19
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/block/nvme.c
21
--- a/include/qom/object_interfaces.h
20
+++ b/hw/block/nvme.c
22
+++ b/include/qom/object_interfaces.h
21
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps nvme_cmb_ops = {
23
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
22
},
24
const QDict *qdict,
23
};
25
Visitor *v, Error **errp);
24
26
25
-static int nvme_init(PCIDevice *pci_dev)
27
+/**
26
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
28
+ * user_creatable_add_dict:
29
+ * @qdict: the object definition
30
+ * @errp: if an error occurs, a pointer to an area to store the error
31
+ *
32
+ * Create an instance of the user creatable object that is defined by
33
+ * @qdict. The object type is taken from the QDict key 'qom-type', its
34
+ * ID from the key 'id'. The remaining entries in @qdict are used to
35
+ * initialize the object properties.
36
+ */
37
+void user_creatable_add_dict(QDict *qdict, Error **errp);
38
+
39
/**
40
* user_creatable_add_opts:
41
* @opts: the object definition
42
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qom/object_interfaces.c
45
+++ b/qom/object_interfaces.c
46
@@ -XXX,XX +XXX,XX @@
47
#include "qapi/qmp/qerror.h"
48
#include "qapi/qmp/qjson.h"
49
#include "qapi/qmp/qstring.h"
50
+#include "qapi/qobject-input-visitor.h"
51
#include "qom/object_interfaces.h"
52
#include "qemu/help_option.h"
53
#include "qemu/module.h"
54
@@ -XXX,XX +XXX,XX @@ out:
55
return obj;
56
}
57
58
+void user_creatable_add_dict(QDict *qdict, Error **errp)
59
+{
60
+ Visitor *v;
61
+ Object *obj;
62
+ g_autofree char *type = NULL;
63
+ g_autofree char *id = NULL;
64
+
65
+ type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
66
+ if (!type) {
67
+ error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
68
+ return;
69
+ }
70
+ qdict_del(qdict, "qom-type");
71
+
72
+ id = g_strdup(qdict_get_try_str(qdict, "id"));
73
+ if (!id) {
74
+ error_setg(errp, QERR_MISSING_PARAMETER, "id");
75
+ return;
76
+ }
77
+ qdict_del(qdict, "id");
78
+
79
+ v = qobject_input_visitor_new(QOBJECT(qdict));
80
+ obj = user_creatable_add_type(type, id, qdict, v, errp);
81
+ visit_free(v);
82
+ object_unref(obj);
83
+}
84
85
Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
27
{
86
{
28
NvmeCtrl *n = NVME(pci_dev);
87
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
29
NvmeIdCtrl *id = &n->id_ctrl;
88
index XXXXXXX..XXXXXXX 100644
30
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
89
--- a/qom/qom-qmp-cmds.c
31
Error *local_err = NULL;
90
+++ b/qom/qom-qmp-cmds.c
32
91
@@ -XXX,XX +XXX,XX @@
33
if (!n->conf.blk) {
92
#include "qapi/qapi-commands-qom.h"
34
- return -1;
93
#include "qapi/qmp/qdict.h"
35
+ error_setg(errp, "drive property not set");
94
#include "qapi/qmp/qerror.h"
36
+ return;
95
-#include "qapi/qobject-input-visitor.h"
96
#include "qemu/cutils.h"
97
#include "qom/object_interfaces.h"
98
#include "qom/qom-qobject.h"
99
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
100
{
101
QObject *props;
102
QDict *pdict;
103
- Visitor *v;
104
- Object *obj;
105
- g_autofree char *type = NULL;
106
- g_autofree char *id = NULL;
107
-
108
- type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
109
- if (!type) {
110
- error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
111
- return;
112
- }
113
- qdict_del(qdict, "qom-type");
114
-
115
- id = g_strdup(qdict_get_try_str(qdict, "id"));
116
- if (!id) {
117
- error_setg(errp, QERR_MISSING_PARAMETER, "id");
118
- return;
119
- }
120
- qdict_del(qdict, "id");
121
122
props = qdict_get(qdict, "props");
123
if (props) {
124
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
125
qobject_unref(pdict);
37
}
126
}
38
127
39
bs_size = blk_getlength(n->conf.blk);
128
- v = qobject_input_visitor_new(QOBJECT(qdict));
40
if (bs_size < 0) {
129
- obj = user_creatable_add_type(type, id, qdict, v, errp);
41
- return -1;
130
- visit_free(v);
42
+ error_setg(errp, "could not get backing file size");
131
- object_unref(obj);
43
+ return;
132
+ user_creatable_add_dict(qdict, errp);
44
}
45
46
blkconf_serial(&n->conf, &n->serial);
47
if (!n->serial) {
48
- return -1;
49
+ error_setg(errp, "serial property not set");
50
+ return;
51
}
52
blkconf_blocksizes(&n->conf);
53
blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
54
false, &local_err);
55
if (local_err) {
56
- error_report_err(local_err);
57
- return -1;
58
+ error_propagate(errp, local_err);
59
+ return;
60
}
61
62
pci_conf = pci_dev->config;
63
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
64
cpu_to_le64(n->ns_size >>
65
id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
66
}
67
- return 0;
68
}
133
}
69
134
70
static void nvme_exit(PCIDevice *pci_dev)
135
void qmp_object_del(const char *id, Error **errp)
71
@@ -XXX,XX +XXX,XX @@ static void nvme_class_init(ObjectClass *oc, void *data)
72
DeviceClass *dc = DEVICE_CLASS(oc);
73
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
74
75
- pc->init = nvme_init;
76
+ pc->realize = nvme_realize;
77
pc->exit = nvme_exit;
78
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
79
pc->vendor_id = PCI_VENDOR_ID_INTEL;
80
--
136
--
81
2.13.6
137
2.25.3
82
138
83
139
diff view generated by jsdifflib
1
This change separates bdrv_drain_invoke(), which calls the BlockDriver
1
After processing the option string with the keyval parser, we get a
2
drain callbacks, from bdrv_drain_recurse(). Instead, the function
2
QDict that contains only strings. This QDict must be fed to a keyval
3
performs its own recursion now.
3
visitor which converts the strings into the right data types.
4
4
5
One reason for this is that bdrv_drain_recurse() can be called multiple
5
qmp_object_add(), however, uses the normal QObject input visitor, which
6
times by bdrv_drain_all_begin(), but the callbacks may only be called
6
expects a QDict where all properties already have the QType that matches
7
once. The separation is necessary to fix this bug.
7
the data type required by the QOM object type.
8
8
9
The other reason is that we intend to go to a model where we call all
9
Change the --object implementation in qemu-storage-daemon so that it
10
driver callbacks first, and only then start polling. This is not fully
10
doesn't call qmp_object_add(), but calls user_creatable_add_dict()
11
achieved yet with this patch, as bdrv_drain_invoke() contains a
11
directly instead and pass it a new keyval boolean that decides which
12
BDRV_POLL_WHILE() loop for the block driver callbacks, which can still
12
visitor must be used.
13
call callbacks for any unrelated event. It's a step in this direction
14
anyway.
15
13
16
Cc: qemu-stable@nongnu.org
14
Reported-by: Coiby Xu <coiby.xu@gmail.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
16
---
20
block/io.c | 14 +++++++++++---
17
include/qom/object_interfaces.h | 6 +++++-
21
1 file changed, 11 insertions(+), 3 deletions(-)
18
qemu-storage-daemon.c | 4 +---
19
qom/object_interfaces.c | 8 ++++++--
20
qom/qom-qmp-cmds.c | 2 +-
21
4 files changed, 13 insertions(+), 7 deletions(-)
22
22
23
diff --git a/block/io.c b/block/io.c
23
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
24
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io.c
25
--- a/include/qom/object_interfaces.h
26
+++ b/block/io.c
26
+++ b/include/qom/object_interfaces.h
27
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
27
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
28
bdrv_wakeup(bs);
28
/**
29
* user_creatable_add_dict:
30
* @qdict: the object definition
31
+ * @keyval: if true, use a keyval visitor for processing @qdict (i.e.
32
+ * assume that all @qdict values are strings); otherwise, use
33
+ * the normal QObject visitor (i.e. assume all @qdict values
34
+ * have the QType expected by the QOM object type)
35
* @errp: if an error occurs, a pointer to an area to store the error
36
*
37
* Create an instance of the user creatable object that is defined by
38
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
39
* ID from the key 'id'. The remaining entries in @qdict are used to
40
* initialize the object properties.
41
*/
42
-void user_creatable_add_dict(QDict *qdict, Error **errp);
43
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp);
44
45
/**
46
* user_creatable_add_opts:
47
diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/qemu-storage-daemon.c
50
+++ b/qemu-storage-daemon.c
51
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
52
QemuOpts *opts;
53
const char *type;
54
QDict *args;
55
- QObject *ret_data = NULL;
56
57
/* FIXME The keyval parser rejects 'help' arguments, so we must
58
* unconditionall try QemuOpts first. */
59
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
60
qemu_opts_del(opts);
61
62
args = keyval_parse(optarg, "qom-type", &error_fatal);
63
- qmp_object_add(args, &ret_data, &error_fatal);
64
+ user_creatable_add_dict(args, true, &error_fatal);
65
qobject_unref(args);
66
- qobject_unref(ret_data);
67
break;
68
}
69
default:
70
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/qom/object_interfaces.c
73
+++ b/qom/object_interfaces.c
74
@@ -XXX,XX +XXX,XX @@ out:
75
return obj;
29
}
76
}
30
77
31
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
78
-void user_creatable_add_dict(QDict *qdict, Error **errp)
32
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
79
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
33
{
80
{
34
+ BdrvChild *child, *tmp;
81
Visitor *v;
35
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
82
Object *obj;
36
83
@@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp)
37
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
84
}
38
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
85
qdict_del(qdict, "id");
39
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
86
40
bdrv_coroutine_enter(bs, data.co);
87
- v = qobject_input_visitor_new(QOBJECT(qdict));
41
BDRV_POLL_WHILE(bs, !data.done);
88
+ if (keyval) {
42
+
89
+ v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
43
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
90
+ } else {
44
+ bdrv_drain_invoke(child->bs, begin);
91
+ v = qobject_input_visitor_new(QOBJECT(qdict));
45
+ }
92
+ }
93
obj = user_creatable_add_type(type, id, qdict, v, errp);
94
visit_free(v);
95
object_unref(obj);
96
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/qom/qom-qmp-cmds.c
99
+++ b/qom/qom-qmp-cmds.c
100
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
101
qobject_unref(pdict);
102
}
103
104
- user_creatable_add_dict(qdict, errp);
105
+ user_creatable_add_dict(qdict, false, errp);
46
}
106
}
47
107
48
static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
108
void qmp_object_del(const char *id, Error **errp)
49
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
50
BdrvChild *child, *tmp;
51
bool waited;
52
53
- /* Ensure any pending metadata writes are submitted to bs->file. */
54
- bdrv_drain_invoke(bs, begin);
55
-
56
/* Wait for drained requests to finish */
57
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
58
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
60
bdrv_parent_drained_begin(bs);
61
}
62
63
+ bdrv_drain_invoke(bs, true);
64
bdrv_drain_recurse(bs, true);
65
}
66
67
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
68
}
69
70
bdrv_parent_drained_end(bs);
71
+ bdrv_drain_invoke(bs, false);
72
bdrv_drain_recurse(bs, false);
73
aio_enable_external(bdrv_get_aio_context(bs));
74
}
75
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
76
aio_context_acquire(aio_context);
77
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
78
if (aio_context == bdrv_get_aio_context(bs)) {
79
+ /* FIXME Calling this multiple times is wrong */
80
+ bdrv_drain_invoke(bs, true);
81
waited |= bdrv_drain_recurse(bs, true);
82
}
83
}
84
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
85
aio_context_acquire(aio_context);
86
aio_enable_external(aio_context);
87
bdrv_parent_drained_end(bs);
88
+ bdrv_drain_invoke(bs, false);
89
bdrv_drain_recurse(bs, false);
90
aio_context_release(aio_context);
91
}
92
--
109
--
93
2.13.6
110
2.25.3
94
111
95
112
diff view generated by jsdifflib