1
The following changes since commit 9e06029aea3b2eca1d5261352e695edc1e7d7b8b:
1
The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e:
2
2
3
Update version for v4.1.0 release (2019-08-15 13:03:37 +0100)
3
Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to a6b257a08e3d72219f03e461a52152672fec0612:
9
for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809:
10
10
11
file-posix: Handle undetectable alignment (2019-08-16 11:29:11 +0200)
11
qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches:
15
15
16
- file-posix: Fix O_DIRECT alignment detection
16
- Fix resize (extending) of short overlays
17
- Fixes for concurrent block jobs
17
- nvme: introduce PMR support from NVMe 1.4 spec
18
- block-backend: Queue requests while drained (fix IDE vs. job crashes)
18
- qemu-storage-daemon: Fix non-string --object properties
19
- qemu-img convert: Deprecate using -n and -o together
20
- iotests: Migration tests with filter nodes
21
- iotests: More media change tests
22
19
23
----------------------------------------------------------------
20
----------------------------------------------------------------
24
Kevin Wolf (10):
21
Alberto Garcia (1):
25
iotests/118: Test media change for scsi-cd
22
qcow2: Add incompatibility note between backing files and raw external data files
26
iotests/118: Create test classes dynamically
27
iotests/118: Add -blockdev based tests
28
iotests: Move migration helpers to iotests.py
29
iotests: Test migration with all kinds of filter nodes
30
block: Simplify bdrv_filter_default_perms()
31
block: Remove blk_pread_unthrottled()
32
mirror: Keep mirror_top_bs drained after dropping permissions
33
block-backend: Queue requests while drained
34
qemu-img convert: Deprecate using -n and -o together
35
23
36
Max Reitz (5):
24
Andrzej Jakowski (1):
37
block: Keep subtree drained in drop_intermediate
25
nvme: introduce PMR support from NVMe 1.4 spec
38
block: Reduce (un)drains when replacing a child
39
tests: Test polling in bdrv_drop_intermediate()
40
tests: Test mid-drain bdrv_replace_child_noperm()
41
iotests: Add test for concurrent stream/commit
42
26
43
Nir Soffer (1):
27
Kevin Wolf (12):
44
file-posix: Handle undetectable alignment
28
block: Add flags to BlockDriver.bdrv_co_truncate()
29
block: Add flags to bdrv(_co)_truncate()
30
block-backend: Add flags to blk_truncate()
31
qcow2: Support BDRV_REQ_ZERO_WRITE for truncate
32
raw-format: Support BDRV_REQ_ZERO_WRITE for truncate
33
file-posix: Support BDRV_REQ_ZERO_WRITE for truncate
34
block: truncate: Don't make backing file data visible
35
iotests: Filter testfiles out in filter_img_info()
36
iotests: Test committing to short backing file
37
qcow2: Forward ZERO_WRITE flag for full preallocation
38
qom: Factor out user_creatable_add_dict()
39
qemu-storage-daemon: Fix non-string --object properties
45
40
46
include/sysemu/block-backend.h | 3 +-
41
Paolo Bonzini (1):
47
block.c | 63 +++---
42
qemu-iotests: allow qcow2 external discarded clusters to contain stale data
48
block/backup.c | 1 +
49
block/block-backend.c | 69 ++++--
50
block/commit.c | 2 +
51
block/file-posix.c | 36 +++-
52
block/mirror.c | 7 +-
53
blockjob.c | 3 +
54
hw/block/hd-geometry.c | 7 +-
55
qemu-img.c | 5 +
56
tests/test-bdrv-drain.c | 476 +++++++++++++++++++++++++++++++++++++++++
57
qemu-deprecated.texi | 7 +
58
tests/qemu-iotests/118 | 84 ++++----
59
tests/qemu-iotests/118.out | 4 +-
60
tests/qemu-iotests/234 | 30 +--
61
tests/qemu-iotests/258 | 163 ++++++++++++++
62
tests/qemu-iotests/258.out | 33 +++
63
tests/qemu-iotests/262 | 82 +++++++
64
tests/qemu-iotests/262.out | 17 ++
65
tests/qemu-iotests/group | 2 +
66
tests/qemu-iotests/iotests.py | 16 ++
67
21 files changed, 983 insertions(+), 127 deletions(-)
68
create mode 100755 tests/qemu-iotests/258
69
create mode 100644 tests/qemu-iotests/258.out
70
create mode 100755 tests/qemu-iotests/262
71
create mode 100644 tests/qemu-iotests/262.out
72
43
44
docs/interop/qcow2.txt | 3 +
45
hw/block/nvme.h | 2 +
46
include/block/block.h | 5 +-
47
include/block/block_int.h | 10 +-
48
include/block/nvme.h | 172 ++++++++++++++++++++++++++
49
include/qom/object_interfaces.h | 16 +++
50
include/sysemu/block-backend.h | 2 +-
51
block.c | 3 +-
52
block/block-backend.c | 4 +-
53
block/commit.c | 4 +-
54
block/crypto.c | 7 +-
55
block/file-posix.c | 6 +-
56
block/file-win32.c | 2 +-
57
block/gluster.c | 1 +
58
block/io.c | 43 ++++++-
59
block/iscsi.c | 2 +-
60
block/mirror.c | 2 +-
61
block/nfs.c | 3 +-
62
block/parallels.c | 6 +-
63
block/qcow.c | 4 +-
64
block/qcow2-cluster.c | 2 +-
65
block/qcow2-refcount.c | 2 +-
66
block/qcow2.c | 73 +++++++++--
67
block/qed.c | 3 +-
68
block/raw-format.c | 6 +-
69
block/rbd.c | 1 +
70
block/sheepdog.c | 4 +-
71
block/ssh.c | 2 +-
72
block/vdi.c | 2 +-
73
block/vhdx-log.c | 2 +-
74
block/vhdx.c | 6 +-
75
block/vmdk.c | 8 +-
76
block/vpc.c | 2 +-
77
blockdev.c | 2 +-
78
hw/block/nvme.c | 109 ++++++++++++++++
79
qemu-img.c | 2 +-
80
qemu-io-cmds.c | 2 +-
81
qemu-storage-daemon.c | 4 +-
82
qom/object_interfaces.c | 31 +++++
83
qom/qom-qmp-cmds.c | 24 +---
84
tests/test-block-iothread.c | 9 +-
85
tests/qemu-iotests/iotests.py | 5 +-
86
hw/block/Makefile.objs | 2 +-
87
hw/block/trace-events | 4 +
88
tests/qemu-iotests/244 | 10 +-
89
tests/qemu-iotests/244.out | 9 +-
90
tests/qemu-iotests/274 | 155 +++++++++++++++++++++++
91
tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++
92
tests/qemu-iotests/group | 1 +
93
49 files changed, 951 insertions(+), 96 deletions(-)
94
create mode 100755 tests/qemu-iotests/274
95
create mode 100644 tests/qemu-iotests/274.out
96
97
diff view generated by jsdifflib
Deleted patch
1
The test covered only floppy and ide-cd. Add scsi-cd as well.
2
1
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
---
6
tests/qemu-iotests/118 | 20 ++++++++++++++++++++
7
tests/qemu-iotests/118.out | 4 ++--
8
2 files changed, 22 insertions(+), 2 deletions(-)
9
10
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
11
index XXXXXXX..XXXXXXX 100755
12
--- a/tests/qemu-iotests/118
13
+++ b/tests/qemu-iotests/118
14
@@ -XXX,XX +XXX,XX @@ def interface_to_device_name(interface):
15
return 'ide-cd'
16
elif interface == 'floppy':
17
return 'floppy'
18
+ elif interface == 'scsi':
19
+ return 'scsi-cd'
20
else:
21
return None
22
23
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
24
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
25
self.vm = iotests.VM()
26
self.vm.add_drive(old_img, 'media=%s' % media, 'none')
27
+ if interface == 'scsi':
28
+ self.vm.add_device('virtio-scsi-pci')
29
self.vm.add_device('%s,drive=drive0,id=%s' %
30
(interface_to_device_name(interface),
31
self.device_name))
32
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
33
def setUp(self, media, interface):
34
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
35
self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none')
36
+ if interface == 'scsi':
37
+ self.vm.add_device('virtio-scsi-pci')
38
self.vm.add_device('%s,drive=drive0,id=%s' %
39
(interface_to_device_name(interface),
40
self.device_name))
41
@@ -XXX,XX +XXX,XX @@ class TestCDInitiallyEmpty(TestInitiallyEmpty):
42
def setUp(self):
43
self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide')
44
45
+class TestSCSICDInitiallyFilled(TestInitiallyFilled):
46
+ TestInitiallyFilled = TestInitiallyFilled
47
+ has_real_tray = True
48
+
49
+ def setUp(self):
50
+ self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi')
51
+
52
+class TestSCSICDInitiallyEmpty(TestInitiallyEmpty):
53
+ TestInitiallyEmpty = TestInitiallyEmpty
54
+ has_real_tray = True
55
+
56
+ def setUp(self):
57
+ self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi')
58
+
59
class TestFloppyInitiallyFilled(TestInitiallyFilled):
60
TestInitiallyFilled = TestInitiallyFilled
61
has_real_tray = False
62
diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tests/qemu-iotests/118.out
65
+++ b/tests/qemu-iotests/118.out
66
@@ -XXX,XX +XXX,XX @@
67
-...............................................................
68
+.........................................................................................
69
----------------------------------------------------------------------
70
-Ran 63 tests
71
+Ran 89 tests
72
73
OK
74
--
75
2.20.1
76
77
diff view generated by jsdifflib
1
mirror_top_bs is currently implicitly drained through its connection to
1
From: Alberto Garcia <berto@igalia.com>
2
the source or the target node. However, the drain section for target_bs
3
ends early after moving mirror_top_bs from src to target_bs, so that
4
requests can already be restarted while mirror_top_bs is still present
5
in the chain, but has dropped all permissions and therefore runs into an
6
assertion failure like this:
7
2
8
qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare:
3
Backing files and raw external data files are mutually exclusive.
9
Assertion `child->perm & BLK_PERM_WRITE' failed.
4
The documentation of the raw external data bit (in autoclear_features)
5
already indicates that, but we should also mention it on the other
6
side.
10
7
11
Keep mirror_top_bs drained until all graph changes have completed.
8
Suggested-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Alberto Garcia <berto@igalia.com>
10
Message-Id: <20200410121816.8334-1-berto@igalia.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
docs/interop/qcow2.txt | 3 +++
15
1 file changed, 3 insertions(+)
12
16
13
Cc: qemu-stable@nongnu.org
17
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
---
17
block/mirror.c | 6 +++++-
18
1 file changed, 5 insertions(+), 1 deletion(-)
19
20
diff --git a/block/mirror.c b/block/mirror.c
21
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
22
--- a/block/mirror.c
19
--- a/docs/interop/qcow2.txt
23
+++ b/block/mirror.c
20
+++ b/docs/interop/qcow2.txt
24
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
21
@@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header:
25
s->target = NULL;
22
is stored (NB: The string is not null terminated). 0 if the
26
23
image doesn't have a backing file.
27
/* We don't access the source any more. Dropping any WRITE/RESIZE is
24
28
- * required before it could become a backing file of target_bs. */
25
+ Note: backing files are incompatible with raw external data
29
+ * required before it could become a backing file of target_bs. Not having
26
+ files (auto-clear feature bit 1).
30
+ * these permissions any more means that we can't allow any new requests on
27
+
31
+ * mirror_top_bs from now on, so keep it drained. */
28
16 - 19: backing_file_size
32
+ bdrv_drained_begin(mirror_top_bs);
29
Length of the backing file name in bytes. Must not be
33
bs_opaque->stop = true;
30
longer than 1023 bytes. Undefined if the image doesn't have
34
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
35
&error_abort);
36
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
37
bs_opaque->job = NULL;
38
39
bdrv_drained_end(src);
40
+ bdrv_drained_end(mirror_top_bs);
41
s->in_drain = false;
42
bdrv_unref(mirror_top_bs);
43
bdrv_unref(src);
44
--
31
--
45
2.20.1
32
2.25.3
46
33
47
34
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
bdrv_drop_intermediate() calls BdrvChildRole.update_filename(). That
3
Test 244 checks the expected behavior of qcow2 external data files
4
may poll, thus changing the graph, which potentially breaks the
4
with respect to zero and discarded clusters. Filesystems however
5
QLIST_FOREACH_SAFE() loop.
5
are free to ignore discard requests, and this seems to be the
6
case for overlayfs. Relax the tests to skip checks on the
7
external data file for discarded areas, which implies not using
8
qemu-img compare in the data_file_raw=on case.
6
9
7
Just keep the whole subtree drained. This is probably the right thing
10
This fixes docker tests on RHEL8.
8
to do anyway (dropping nodes while the subtree is not drained seems
9
wrong).
10
11
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Cc: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Cc: qemu-block@nongnu.org
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Message-Id: <20200409191006.24429-1-pbonzini@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
17
---
15
block.c | 2 ++
18
tests/qemu-iotests/244 | 10 ++++++++--
16
1 file changed, 2 insertions(+)
19
tests/qemu-iotests/244.out | 9 ++++++---
20
2 files changed, 14 insertions(+), 5 deletions(-)
17
21
18
diff --git a/block.c b/block.c
22
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
23
index XXXXXXX..XXXXXXX 100755
24
--- a/tests/qemu-iotests/244
25
+++ b/tests/qemu-iotests/244
26
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
27
echo
28
$QEMU_IO -c 'read -P 0 0 1M' \
29
-c 'read -P 0x11 1M 1M' \
30
- -c 'read -P 0 2M 2M' \
31
-c 'read -P 0x11 4M 1M' \
32
-c 'read -P 0 5M 1M' \
33
-f raw "$TEST_IMG.data" |
34
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
35
-f $IMGFMT "$TEST_IMG" |
36
_filter_qemu_io
37
38
+# Discarded clusters are only marked as such in the qcow2 metadata, but
39
+# they can contain stale data in the external data file. Instead, zero
40
+# clusters must be zeroed in the external data file too.
41
echo
42
-$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data"
43
+$QEMU_IO -c 'read -P 0 0 1M' \
44
+ -c 'read -P 0x11 1M 1M' \
45
+ -c 'read -P 0 3M 3M' \
46
+ -f raw "$TEST_IMG".data |
47
+ _filter_qemu_io
48
49
echo -n "qcow2 file size after I/O: "
50
du -b $TEST_IMG | cut -f1
51
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
19
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
20
--- a/block.c
53
--- a/tests/qemu-iotests/244.out
21
+++ b/block.c
54
+++ b/tests/qemu-iotests/244.out
22
@@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
55
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
23
int ret = -EIO;
56
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
24
57
read 1048576/1048576 bytes at offset 1048576
25
bdrv_ref(top);
58
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
26
+ bdrv_subtree_drained_begin(top);
59
-read 2097152/2097152 bytes at offset 2097152
27
60
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
28
if (!top->drv || !base->drv) {
61
read 1048576/1048576 bytes at offset 4194304
29
goto exit;
62
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
30
@@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
63
read 1048576/1048576 bytes at offset 5242880
31
64
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576
32
ret = 0;
65
read 4194304/4194304 bytes at offset 2097152
33
exit:
66
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
34
+ bdrv_subtree_drained_end(top);
67
35
bdrv_unref(top);
68
-Images are identical.
36
return ret;
69
+read 1048576/1048576 bytes at offset 0
37
}
70
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
71
+read 1048576/1048576 bytes at offset 1048576
72
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
73
+read 3145728/3145728 bytes at offset 3145728
74
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
75
qcow2 file size after I/O: 327680
76
77
=== bdrv_co_block_status test for file and offset=0 ===
38
--
78
--
39
2.20.1
79
2.25.3
40
80
41
81
diff view generated by jsdifflib
1
bdrv_create options specified with -o have no effect when skipping image
1
This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate()
2
creation with -n, so this doesn't make sense. Warn against the misuse
2
driver callbacks, and a supported_truncate_flags field in
3
and deprecate the combination so we can make it a hard error later.
3
BlockDriverState that allows drivers to advertise support for request
4
flags in the context of truncate.
5
6
For now, we always pass 0 and no drivers declare support for any flag.
4
7
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: John Snow <jsnow@redhat.com>
12
Message-Id: <20200424125448.63318-2-kwolf@redhat.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
14
---
10
qemu-img.c | 5 +++++
15
include/block/block_int.h | 10 +++++++++-
11
qemu-deprecated.texi | 7 +++++++
16
block/crypto.c | 3 ++-
12
2 files changed, 12 insertions(+)
17
block/file-posix.c | 2 +-
13
18
block/file-win32.c | 2 +-
14
diff --git a/qemu-img.c b/qemu-img.c
19
block/gluster.c | 1 +
15
index XXXXXXX..XXXXXXX 100644
20
block/io.c | 8 +++++++-
16
--- a/qemu-img.c
21
block/iscsi.c | 2 +-
17
+++ b/qemu-img.c
22
block/nfs.c | 3 ++-
18
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
23
block/qcow2.c | 2 +-
19
goto fail_getopt;
24
block/qed.c | 1 +
25
block/raw-format.c | 2 +-
26
block/rbd.c | 1 +
27
block/sheepdog.c | 4 ++--
28
block/ssh.c | 2 +-
29
tests/test-block-iothread.c | 3 ++-
30
15 files changed, 33 insertions(+), 13 deletions(-)
31
32
diff --git a/include/block/block_int.h b/include/block/block_int.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block_int.h
35
+++ b/include/block/block_int.h
36
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
37
*/
38
int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
39
bool exact, PreallocMode prealloc,
40
- Error **errp);
41
+ BdrvRequestFlags flags, Error **errp);
42
43
int64_t (*bdrv_getlength)(BlockDriverState *bs);
44
bool has_variable_length;
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
46
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
47
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
48
unsigned int supported_zero_flags;
49
+ /*
50
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
51
+ *
52
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
53
+ * that any added space reads as all zeros. If this can't be guaranteed,
54
+ * the operation must fail.
55
+ */
56
+ unsigned int supported_truncate_flags;
57
58
/* the following member gives a name to every node on the bs graph. */
59
char node_name[32];
60
diff --git a/block/crypto.c b/block/crypto.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/block/crypto.c
63
+++ b/block/crypto.c
64
@@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
65
66
static int coroutine_fn
67
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
68
- PreallocMode prealloc, Error **errp)
69
+ PreallocMode prealloc, BdrvRequestFlags flags,
70
+ Error **errp)
71
{
72
BlockCrypto *crypto = bs->opaque;
73
uint64_t payload_offset =
74
diff --git a/block/file-posix.c b/block/file-posix.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/block/file-posix.c
77
+++ b/block/file-posix.c
78
@@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
79
80
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
81
bool exact, PreallocMode prealloc,
82
- Error **errp)
83
+ BdrvRequestFlags flags, Error **errp)
84
{
85
BDRVRawState *s = bs->opaque;
86
struct stat st;
87
diff --git a/block/file-win32.c b/block/file-win32.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/block/file-win32.c
90
+++ b/block/file-win32.c
91
@@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs)
92
93
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
94
bool exact, PreallocMode prealloc,
95
- Error **errp)
96
+ BdrvRequestFlags flags, Error **errp)
97
{
98
BDRVRawState *s = bs->opaque;
99
LONG low, high;
100
diff --git a/block/gluster.c b/block/gluster.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/block/gluster.c
103
+++ b/block/gluster.c
104
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs,
105
int64_t offset,
106
bool exact,
107
PreallocMode prealloc,
108
+ BdrvRequestFlags flags,
109
Error **errp)
110
{
111
BDRVGlusterState *s = bs->opaque;
112
diff --git a/block/io.c b/block/io.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/block/io.c
115
+++ b/block/io.c
116
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
117
BlockDriverState *bs = child->bs;
118
BlockDriver *drv = bs->drv;
119
BdrvTrackedRequest req;
120
+ BdrvRequestFlags flags = 0;
121
int64_t old_size, new_bytes;
122
int ret;
123
124
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
20
}
125
}
21
126
22
+ if (skip_create && options) {
127
if (drv->bdrv_co_truncate) {
23
+ warn_report("-o has no effect when skipping image creation");
128
- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
24
+ warn_report("This will become an error in future QEMU versions.");
129
+ if (flags & ~bs->supported_truncate_flags) {
25
+ }
130
+ error_setg(errp, "Block driver does not support requested flags");
26
+
131
+ ret = -ENOTSUP;
27
s.src_num = argc - optind - 1;
132
+ goto out;
28
out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
133
+ }
29
134
+ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
30
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
135
} else if (bs->file && drv->is_filter) {
31
index XXXXXXX..XXXXXXX 100644
136
ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
32
--- a/qemu-deprecated.texi
137
} else {
33
+++ b/qemu-deprecated.texi
138
diff --git a/block/iscsi.c b/block/iscsi.c
34
@@ -XXX,XX +XXX,XX @@ to just export the entire image and then mount only /dev/nbd0p1 than
139
index XXXXXXX..XXXXXXX 100644
35
it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a
140
--- a/block/iscsi.c
36
subset of the image.
141
+++ b/block/iscsi.c
37
142
@@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
38
+@subsection qemu-img convert -n -o (since 4.2.0)
143
39
+
144
static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
40
+All options specified in @option{-o} are image creation options, so
145
bool exact, PreallocMode prealloc,
41
+they have no effect when used with @option{-n} to skip image creation.
146
- Error **errp)
42
+Silently ignored options can be confusing, so this combination of
147
+ BdrvRequestFlags flags, Error **errp)
43
+options will be made an error in future versions.
148
{
44
+
149
IscsiLun *iscsilun = bs->opaque;
45
@section Build system
150
int64_t cur_length;
46
151
diff --git a/block/nfs.c b/block/nfs.c
47
@subsection Python 2 support (since 4.1.0)
152
index XXXXXXX..XXXXXXX 100644
153
--- a/block/nfs.c
154
+++ b/block/nfs.c
155
@@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
156
157
static int coroutine_fn
158
nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
159
- PreallocMode prealloc, Error **errp)
160
+ PreallocMode prealloc, BdrvRequestFlags flags,
161
+ Error **errp)
162
{
163
NFSClient *client = bs->opaque;
164
int ret;
165
diff --git a/block/qcow2.c b/block/qcow2.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/block/qcow2.c
168
+++ b/block/qcow2.c
169
@@ -XXX,XX +XXX,XX @@ fail:
170
171
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
172
bool exact, PreallocMode prealloc,
173
- Error **errp)
174
+ BdrvRequestFlags flags, Error **errp)
175
{
176
BDRVQcow2State *s = bs->opaque;
177
uint64_t old_length;
178
diff --git a/block/qed.c b/block/qed.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/block/qed.c
181
+++ b/block/qed.c
182
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
183
int64_t offset,
184
bool exact,
185
PreallocMode prealloc,
186
+ BdrvRequestFlags flags,
187
Error **errp)
188
{
189
BDRVQEDState *s = bs->opaque;
190
diff --git a/block/raw-format.c b/block/raw-format.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/block/raw-format.c
193
+++ b/block/raw-format.c
194
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
195
196
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
197
bool exact, PreallocMode prealloc,
198
- Error **errp)
199
+ BdrvRequestFlags flags, Error **errp)
200
{
201
BDRVRawState *s = bs->opaque;
202
203
diff --git a/block/rbd.c b/block/rbd.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/block/rbd.c
206
+++ b/block/rbd.c
207
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
208
int64_t offset,
209
bool exact,
210
PreallocMode prealloc,
211
+ BdrvRequestFlags flags,
212
Error **errp)
213
{
214
int r;
215
diff --git a/block/sheepdog.c b/block/sheepdog.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/block/sheepdog.c
218
+++ b/block/sheepdog.c
219
@@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs)
220
221
static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
222
bool exact, PreallocMode prealloc,
223
- Error **errp)
224
+ BdrvRequestFlags flags, Error **errp)
225
{
226
BDRVSheepdogState *s = bs->opaque;
227
int ret, fd;
228
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
229
230
assert(!flags);
231
if (offset > s->inode.vdi_size) {
232
- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL);
233
+ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
234
if (ret < 0) {
235
return ret;
236
}
237
diff --git a/block/ssh.c b/block/ssh.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/block/ssh.c
240
+++ b/block/ssh.c
241
@@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs)
242
243
static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
244
bool exact, PreallocMode prealloc,
245
- Error **errp)
246
+ BdrvRequestFlags flags, Error **errp)
247
{
248
BDRVSSHState *s = bs->opaque;
249
250
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
251
index XXXXXXX..XXXXXXX 100644
252
--- a/tests/test-block-iothread.c
253
+++ b/tests/test-block-iothread.c
254
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
255
256
static int coroutine_fn
257
bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
258
- PreallocMode prealloc, Error **errp)
259
+ PreallocMode prealloc, BdrvRequestFlags flags,
260
+ Error **errp)
261
{
262
return 0;
263
}
48
--
264
--
49
2.20.1
265
2.25.3
50
266
51
267
diff view generated by jsdifflib
1
The functionality offered by blk_pread_unthrottled() goes back to commit
1
Now that block drivers can support flags for .bdrv_co_truncate, expose
2
498e386c584. Then, we couldn't perform I/O throttling with synchronous
2
the parameter in the node level interfaces bdrv_co_truncate() and
3
requests because timers wouldn't be executed in polling loops. So the
3
bdrv_truncate().
4
commit automatically disabled I/O throttling as soon as a synchronous
5
request was issued.
6
7
However, for geometry detection during disk initialisation, we always
8
used (and still use) synchronous requests even if guest requests use AIO
9
later. Geometry detection was not wanted to disable I/O throttling, so
10
bdrv_pread_unthrottled() was introduced which disabled throttling only
11
temporarily.
12
13
All of this isn't necessary any more because we do run timers in polling
14
loop and even synchronous requests are now using coroutine
15
infrastructure internally. For this reason, commit 90c78624f already
16
removed the automatic disabling of I/O throttling.
17
18
It's time to get rid of the workaround for the removed code, and its
19
abuse of blk_root_drained_begin()/end(), as well.
20
4
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
22
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Message-Id: <20200424125448.63318-3-kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
---
11
---
24
include/sysemu/block-backend.h | 2 --
12
include/block/block.h | 5 +++--
25
block/block-backend.c | 16 ----------------
13
block/block-backend.c | 2 +-
26
hw/block/hd-geometry.c | 7 +------
14
block/crypto.c | 2 +-
27
3 files changed, 1 insertion(+), 24 deletions(-)
15
block/io.c | 12 +++++++-----
16
block/parallels.c | 6 +++---
17
block/qcow.c | 4 ++--
18
block/qcow2-refcount.c | 2 +-
19
block/qcow2.c | 15 +++++++++------
20
block/raw-format.c | 2 +-
21
block/vhdx-log.c | 2 +-
22
block/vhdx.c | 2 +-
23
block/vmdk.c | 2 +-
24
tests/test-block-iothread.c | 6 +++---
25
13 files changed, 34 insertions(+), 28 deletions(-)
28
26
29
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
27
diff --git a/include/block/block.h b/include/block/block.h
30
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
31
--- a/include/sysemu/block-backend.h
29
--- a/include/block/block.h
32
+++ b/include/sysemu/block-backend.h
30
+++ b/include/block/block.h
33
@@ -XXX,XX +XXX,XX @@ char *blk_get_attached_dev_id(BlockBackend *blk);
31
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
34
BlockBackend *blk_by_dev(void *dev);
32
void bdrv_refresh_filename(BlockDriverState *bs);
35
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
33
36
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
34
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
37
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
35
- PreallocMode prealloc, Error **errp);
38
- int bytes);
36
+ PreallocMode prealloc, BdrvRequestFlags flags,
39
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
37
+ Error **errp);
40
unsigned int bytes, QEMUIOVector *qiov,
38
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
41
BdrvRequestFlags flags);
39
- PreallocMode prealloc, Error **errp);
40
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
41
42
int64_t bdrv_nb_sectors(BlockDriverState *bs);
43
int64_t bdrv_getlength(BlockDriverState *bs);
42
diff --git a/block/block-backend.c b/block/block-backend.c
44
diff --git a/block/block-backend.c b/block/block-backend.c
43
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
44
--- a/block/block-backend.c
46
--- a/block/block-backend.c
45
+++ b/block/block-backend.c
47
+++ b/block/block-backend.c
46
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
48
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
47
return rwco.ret;
49
return -ENOMEDIUM;
48
}
50
}
49
51
50
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
52
- return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
51
- int count)
53
+ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
52
-{
54
}
53
- int ret;
55
54
-
56
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
55
- ret = blk_check_byte_request(blk, offset, count);
57
diff --git a/block/crypto.c b/block/crypto.c
56
- if (ret < 0) {
58
index XXXXXXX..XXXXXXX 100644
57
- return ret;
59
--- a/block/crypto.c
58
- }
60
+++ b/block/crypto.c
59
-
61
@@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
60
- blk_root_drained_begin(blk->root);
62
61
- ret = blk_pread(blk, offset, buf, count);
63
offset += payload_offset;
62
- blk_root_drained_end(blk->root, NULL);
64
63
- return ret;
65
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
64
-}
66
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
65
-
67
}
66
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
68
67
int bytes, BdrvRequestFlags flags)
69
static void block_crypto_close(BlockDriverState *bs)
70
diff --git a/block/io.c b/block/io.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/block/io.c
73
+++ b/block/io.c
74
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
75
* 'offset' bytes in length.
76
*/
77
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
78
- PreallocMode prealloc, Error **errp)
79
+ PreallocMode prealloc, BdrvRequestFlags flags,
80
+ Error **errp)
68
{
81
{
69
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
82
BlockDriverState *bs = child->bs;
70
index XXXXXXX..XXXXXXX 100644
83
BlockDriver *drv = bs->drv;
71
--- a/hw/block/hd-geometry.c
84
BdrvTrackedRequest req;
72
+++ b/hw/block/hd-geometry.c
85
- BdrvRequestFlags flags = 0;
73
@@ -XXX,XX +XXX,XX @@ static int guess_disk_lchs(BlockBackend *blk,
86
int64_t old_size, new_bytes;
74
87
int ret;
75
blk_get_geometry(blk, &nb_sectors);
88
76
89
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
77
- /**
90
}
78
- * The function will be invoked during startup not only in sync I/O mode,
91
ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
79
- * but also in async I/O mode. So the I/O throttling function has to
92
} else if (bs->file && drv->is_filter) {
80
- * be disabled temporarily here, not permanently.
93
- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
81
- */
94
+ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
82
- if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
95
} else {
83
+ if (blk_pread(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
96
error_setg(errp, "Image format driver does not support resize");
97
ret = -ENOTSUP;
98
@@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo {
99
int64_t offset;
100
bool exact;
101
PreallocMode prealloc;
102
+ BdrvRequestFlags flags;
103
Error **errp;
104
int ret;
105
} TruncateCo;
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
107
{
108
TruncateCo *tco = opaque;
109
tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
110
- tco->prealloc, tco->errp);
111
+ tco->prealloc, tco->flags, tco->errp);
112
aio_wait_kick();
113
}
114
115
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
116
- PreallocMode prealloc, Error **errp)
117
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
118
{
119
Coroutine *co;
120
TruncateCo tco = {
121
@@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
122
.offset = offset,
123
.exact = exact,
124
.prealloc = prealloc,
125
+ .flags = flags,
126
.errp = errp,
127
.ret = NOT_DONE,
128
};
129
diff --git a/block/parallels.c b/block/parallels.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/parallels.c
132
+++ b/block/parallels.c
133
@@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
134
} else {
135
ret = bdrv_truncate(bs->file,
136
(s->data_end + space) << BDRV_SECTOR_BITS,
137
- false, PREALLOC_MODE_OFF, NULL);
138
+ false, PREALLOC_MODE_OFF, 0, NULL);
139
}
140
if (ret < 0) {
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
143
* That means we have to pass exact=true.
144
*/
145
ret = bdrv_truncate(bs->file, res->image_end_offset, true,
146
- PREALLOC_MODE_OFF, &local_err);
147
+ PREALLOC_MODE_OFF, 0, &local_err);
148
if (ret < 0) {
149
error_report_err(local_err);
150
res->check_errors++;
151
@@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs)
152
153
/* errors are ignored, so we might as well pass exact=true */
154
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
155
- PREALLOC_MODE_OFF, NULL);
156
+ PREALLOC_MODE_OFF, 0, NULL);
157
}
158
159
g_free(s->bat_dirty_bmap);
160
diff --git a/block/qcow.c b/block/qcow.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/block/qcow.c
163
+++ b/block/qcow.c
164
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
165
return -E2BIG;
166
}
167
ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
168
- false, PREALLOC_MODE_OFF, NULL);
169
+ false, PREALLOC_MODE_OFF, 0, NULL);
170
if (ret < 0) {
171
return ret;
172
}
173
@@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs)
174
l1_length) < 0)
84
return -1;
175
return -1;
85
}
176
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false,
86
/* test msdos magic */
177
- PREALLOC_MODE_OFF, NULL);
178
+ PREALLOC_MODE_OFF, 0, NULL);
179
if (ret < 0)
180
return ret;
181
182
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/block/qcow2-refcount.c
185
+++ b/block/qcow2-refcount.c
186
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
187
}
188
189
ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
190
- PREALLOC_MODE_OFF, &local_err);
191
+ PREALLOC_MODE_OFF, 0, &local_err);
192
if (ret < 0) {
193
error_report_err(local_err);
194
goto resize_fail;
195
diff --git a/block/qcow2.c b/block/qcow2.c
196
index XXXXXXX..XXXXXXX 100644
197
--- a/block/qcow2.c
198
+++ b/block/qcow2.c
199
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
200
mode = PREALLOC_MODE_OFF;
201
}
202
ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
203
- mode, errp);
204
+ mode, 0, errp);
205
if (ret < 0) {
206
return ret;
207
}
208
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
209
* always fulfilled, so there is no need to pass it on.)
210
*/
211
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
212
- false, PREALLOC_MODE_OFF, &local_err);
213
+ false, PREALLOC_MODE_OFF, 0, &local_err);
214
if (local_err) {
215
warn_reportf_err(local_err,
216
"Failed to truncate the tail of the image: ");
217
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
218
* file should be resized to the exact target size, too,
219
* so we pass @exact here.
220
*/
221
- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
222
+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
223
+ errp);
224
if (ret < 0) {
225
goto fail;
226
}
227
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
228
new_file_size = allocation_start +
229
nb_new_data_clusters * s->cluster_size;
230
/* Image file grows, so @exact does not matter */
231
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
232
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
233
+ errp);
234
if (ret < 0) {
235
error_prepend(errp, "Failed to resize underlying file: ");
236
qcow2_free_clusters(bs, allocation_start,
237
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
238
if (len < 0) {
239
return len;
240
}
241
- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
242
+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
243
+ NULL);
244
}
245
246
if (offset_into_cluster(s, offset)) {
247
@@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs)
248
}
249
250
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
251
- PREALLOC_MODE_OFF, &local_err);
252
+ PREALLOC_MODE_OFF, 0, &local_err);
253
if (ret < 0) {
254
error_report_err(local_err);
255
goto fail;
256
diff --git a/block/raw-format.c b/block/raw-format.c
257
index XXXXXXX..XXXXXXX 100644
258
--- a/block/raw-format.c
259
+++ b/block/raw-format.c
260
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
261
262
s->size = offset;
263
offset += s->offset;
264
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
265
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
266
}
267
268
static void raw_eject(BlockDriverState *bs, bool eject_flag)
269
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
270
index XXXXXXX..XXXXXXX 100644
271
--- a/block/vhdx-log.c
272
+++ b/block/vhdx-log.c
273
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
274
goto exit;
275
}
276
ret = bdrv_truncate(bs->file, new_file_size, false,
277
- PREALLOC_MODE_OFF, NULL);
278
+ PREALLOC_MODE_OFF, 0, NULL);
279
if (ret < 0) {
280
goto exit;
281
}
282
diff --git a/block/vhdx.c b/block/vhdx.c
283
index XXXXXXX..XXXXXXX 100644
284
--- a/block/vhdx.c
285
+++ b/block/vhdx.c
286
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
287
}
288
289
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
290
- PREALLOC_MODE_OFF, NULL);
291
+ PREALLOC_MODE_OFF, 0, NULL);
292
}
293
294
/*
295
diff --git a/block/vmdk.c b/block/vmdk.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/block/vmdk.c
298
+++ b/block/vmdk.c
299
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
300
}
301
length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
302
ret = bdrv_truncate(s->extents[i].file, length, false,
303
- PREALLOC_MODE_OFF, NULL);
304
+ PREALLOC_MODE_OFF, 0, NULL);
305
if (ret < 0) {
306
return ret;
307
}
308
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
309
index XXXXXXX..XXXXXXX 100644
310
--- a/tests/test-block-iothread.c
311
+++ b/tests/test-block-iothread.c
312
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c)
313
int ret;
314
315
/* Normal success path */
316
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
317
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
318
g_assert_cmpint(ret, ==, 0);
319
320
/* Early error: Negative offset */
321
- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL);
322
+ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL);
323
g_assert_cmpint(ret, ==, -EINVAL);
324
325
/* Error: Read-only image */
326
c->bs->read_only = true;
327
c->bs->open_flags &= ~BDRV_O_RDWR;
328
329
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
330
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
331
g_assert_cmpint(ret, ==, -EACCES);
332
333
c->bs->read_only = false;
87
--
334
--
88
2.20.1
335
2.25.3
89
336
90
337
diff view generated by jsdifflib
1
This fixes devices like IDE that can still start new requests from I/O
1
Now that node level interface bdrv_truncate() supports passing request
2
handlers in the CPU thread while the block backend is drained.
2
flags to the block driver, expose this on the BlockBackend level, too.
3
4
The basic assumption is that in a drain section, no new requests should
5
be allowed through a BlockBackend (blk_drained_begin/end don't exist,
6
we get drain sections only on the node level). However, there are two
7
special cases where requests should not be queued:
8
9
1. Block jobs: We already make sure that block jobs are paused in a
10
drain section, so they won't start new requests. However, if the
11
drain_begin is called on the job's BlockBackend first, it can happen
12
that we deadlock because the job stays busy until it reaches a pause
13
point - which it can't if its requests aren't processed any more.
14
15
The proper solution here would be to make all requests through the
16
job's filter node instead of using a BlockBackend. For now, just
17
disabling request queuing on the job BlockBackend is simpler.
18
19
2. In test cases where making requests through bdrv_* would be
20
cumbersome because we'd need a BdrvChild. As we already got the
21
functionality to disable request queuing from 1., use it in tests,
22
too, for convenience.
23
3
24
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
25
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20200424125448.63318-4-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26
---
10
---
27
include/sysemu/block-backend.h | 1 +
11
include/sysemu/block-backend.h | 2 +-
28
block/backup.c | 1 +
12
block.c | 3 ++-
29
block/block-backend.c | 53 ++++++++++++++++++++++++++++++++--
13
block/block-backend.c | 4 ++--
30
block/commit.c | 2 ++
14
block/commit.c | 4 ++--
31
block/mirror.c | 1 +
15
block/crypto.c | 2 +-
32
blockjob.c | 3 ++
16
block/mirror.c | 2 +-
33
tests/test-bdrv-drain.c | 1 +
17
block/qcow2.c | 4 ++--
34
7 files changed, 59 insertions(+), 3 deletions(-)
18
block/qed.c | 2 +-
19
block/vdi.c | 2 +-
20
block/vhdx.c | 4 ++--
21
block/vmdk.c | 6 +++---
22
block/vpc.c | 2 +-
23
blockdev.c | 2 +-
24
qemu-img.c | 2 +-
25
qemu-io-cmds.c | 2 +-
26
15 files changed, 22 insertions(+), 21 deletions(-)
35
27
36
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
28
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
37
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
38
--- a/include/sysemu/block-backend.h
30
--- a/include/sysemu/block-backend.h
39
+++ b/include/sysemu/block-backend.h
31
+++ b/include/sysemu/block-backend.h
40
@@ -XXX,XX +XXX,XX @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
32
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
41
33
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
42
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
34
int bytes);
43
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
35
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
44
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
36
- PreallocMode prealloc, Error **errp);
45
void blk_iostatus_enable(BlockBackend *blk);
37
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
46
bool blk_iostatus_is_enabled(const BlockBackend *blk);
38
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
47
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
39
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
48
diff --git a/block/backup.c b/block/backup.c
40
int64_t pos, int size);
49
index XXXXXXX..XXXXXXX 100644
41
diff --git a/block.c b/block.c
50
--- a/block/backup.c
42
index XXXXXXX..XXXXXXX 100644
51
+++ b/block/backup.c
43
--- a/block.c
52
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
44
+++ b/block.c
53
if (ret < 0) {
45
@@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
54
goto error;
46
int64_t size;
55
}
47
int ret;
56
+ blk_set_disable_request_queuing(job->target, true);
48
57
49
- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err);
58
job->on_source_error = on_source_error;
50
+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
59
job->on_target_error = on_target_error;
51
+ &local_err);
52
if (ret < 0 && ret != -ENOTSUP) {
53
error_propagate(errp, local_err);
54
return ret;
60
diff --git a/block/block-backend.c b/block/block-backend.c
55
diff --git a/block/block-backend.c b/block/block-backend.c
61
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
62
--- a/block/block-backend.c
57
--- a/block/block-backend.c
63
+++ b/block/block-backend.c
58
+++ b/block/block-backend.c
64
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
59
@@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
65
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
66
67
int quiesce_counter;
68
+ CoQueue queued_requests;
69
+ bool disable_request_queuing;
70
+
71
VMChangeStateEntry *vmsh;
72
bool force_allow_inactivate;
73
74
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
75
76
block_acct_init(&blk->stats);
77
78
+ qemu_co_queue_init(&blk->queued_requests);
79
notifier_list_init(&blk->remove_bs_notifiers);
80
notifier_list_init(&blk->insert_bs_notifiers);
81
QLIST_INIT(&blk->aio_notifiers);
82
@@ -XXX,XX +XXX,XX @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
83
blk->allow_aio_context_change = allow;
84
}
60
}
85
61
86
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
62
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
87
+{
63
- PreallocMode prealloc, Error **errp)
88
+ blk->disable_request_queuing = disable;
64
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
89
+}
90
+
91
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
92
size_t size)
93
{
65
{
94
@@ -XXX,XX +XXX,XX @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
66
if (!blk_is_available(blk)) {
95
return 0;
67
error_setg(errp, "No medium inserted");
68
return -ENOMEDIUM;
69
}
70
71
- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
72
+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
96
}
73
}
97
74
98
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
75
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
99
+{
100
+ if (blk->quiesce_counter && !blk->disable_request_queuing) {
101
+ qemu_co_queue_wait(&blk->queued_requests, NULL);
102
+ }
103
+}
104
+
105
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
106
unsigned int bytes, QEMUIOVector *qiov,
107
BdrvRequestFlags flags)
108
{
109
int ret;
110
- BlockDriverState *bs = blk_bs(blk);
111
+ BlockDriverState *bs;
112
113
+ blk_wait_while_drained(blk);
114
+
115
+ /* Call blk_bs() only after waiting, the graph may have changed */
116
+ bs = blk_bs(blk);
117
trace_blk_co_preadv(blk, bs, offset, bytes, flags);
118
119
ret = blk_check_byte_request(blk, offset, bytes);
120
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
121
BdrvRequestFlags flags)
122
{
123
int ret;
124
- BlockDriverState *bs = blk_bs(blk);
125
+ BlockDriverState *bs;
126
127
+ blk_wait_while_drained(blk);
128
+
129
+ /* Call blk_bs() only after waiting, the graph may have changed */
130
+ bs = blk_bs(blk);
131
trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
132
133
ret = blk_check_byte_request(blk, offset, bytes);
134
@@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque)
135
BlkRwCo *rwco = &acb->rwco;
136
QEMUIOVector *qiov = rwco->iobuf;
137
138
+ if (rwco->blk->quiesce_counter) {
139
+ blk_dec_in_flight(rwco->blk);
140
+ blk_wait_while_drained(rwco->blk);
141
+ blk_inc_in_flight(rwco->blk);
142
+ }
143
+
144
assert(qiov->size == acb->bytes);
145
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
146
qiov, rwco->flags);
147
@@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque)
148
BlkRwCo *rwco = &acb->rwco;
149
QEMUIOVector *qiov = rwco->iobuf;
150
151
+ if (rwco->blk->quiesce_counter) {
152
+ blk_dec_in_flight(rwco->blk);
153
+ blk_wait_while_drained(rwco->blk);
154
+ blk_inc_in_flight(rwco->blk);
155
+ }
156
+
157
assert(!qiov || qiov->size == acb->bytes);
158
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
159
qiov, rwco->flags);
160
@@ -XXX,XX +XXX,XX @@ void blk_aio_cancel_async(BlockAIOCB *acb)
161
162
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
163
{
164
+ blk_wait_while_drained(blk);
165
+
166
if (!blk_is_available(blk)) {
167
return -ENOMEDIUM;
168
}
169
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
170
171
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
172
{
173
- int ret = blk_check_byte_request(blk, offset, bytes);
174
+ int ret;
175
+
176
+ blk_wait_while_drained(blk);
177
+
178
+ ret = blk_check_byte_request(blk, offset, bytes);
179
if (ret < 0) {
180
return ret;
181
}
182
@@ -XXX,XX +XXX,XX @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
183
184
int blk_co_flush(BlockBackend *blk)
185
{
186
+ blk_wait_while_drained(blk);
187
+
188
if (!blk_is_available(blk)) {
189
return -ENOMEDIUM;
190
}
191
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
192
if (blk->dev_ops && blk->dev_ops->drained_end) {
193
blk->dev_ops->drained_end(blk->dev_opaque);
194
}
195
+ while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
196
+ /* Resume all queued requests */
197
+ }
198
}
199
}
200
201
diff --git a/block/commit.c b/block/commit.c
76
diff --git a/block/commit.c b/block/commit.c
202
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
203
--- a/block/commit.c
78
--- a/block/commit.c
204
+++ b/block/commit.c
79
+++ b/block/commit.c
205
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
80
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp)
206
if (ret < 0) {
81
}
207
goto fail;
82
208
}
83
if (base_len < len) {
209
+ blk_set_disable_request_queuing(s->base, true);
84
- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL);
210
s->base_bs = base;
85
+ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
211
86
if (ret) {
212
/* Required permissions are already taken with block_job_add_bdrv() */
87
goto out;
213
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
88
}
214
if (ret < 0) {
89
@@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs)
215
goto fail;
90
* grow the backing file image if possible. If not possible,
216
}
91
* we must return an error */
217
+ blk_set_disable_request_queuing(s->top, true);
92
if (length > backing_length) {
218
93
- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF,
219
s->backing_file_str = g_strdup(backing_file_str);
94
+ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
220
s->on_error = on_error;
95
&local_err);
96
if (ret < 0) {
97
error_report_err(local_err);
98
diff --git a/block/crypto.c b/block/crypto.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/crypto.c
101
+++ b/block/crypto.c
102
@@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block,
103
* which will be used by the crypto header
104
*/
105
return blk_truncate(data->blk, data->size + headerlen, false,
106
- data->prealloc, errp);
107
+ data->prealloc, 0, errp);
108
}
109
110
221
diff --git a/block/mirror.c b/block/mirror.c
111
diff --git a/block/mirror.c b/block/mirror.c
222
index XXXXXXX..XXXXXXX 100644
112
index XXXXXXX..XXXXXXX 100644
223
--- a/block/mirror.c
113
--- a/block/mirror.c
224
+++ b/block/mirror.c
114
+++ b/block/mirror.c
225
@@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job(
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
226
blk_set_force_allow_inactivate(s->target);
116
227
}
117
if (s->bdev_length > base_length) {
228
blk_set_allow_aio_context_change(s->target, true);
118
ret = blk_truncate(s->target, s->bdev_length, false,
229
+ blk_set_disable_request_queuing(s->target, true);
119
- PREALLOC_MODE_OFF, NULL);
230
120
+ PREALLOC_MODE_OFF, 0, NULL);
231
s->replaces = g_strdup(replaces);
121
if (ret < 0) {
232
s->on_source_error = on_source_error;
122
goto immediate_exit;
233
diff --git a/blockjob.c b/blockjob.c
123
}
234
index XXXXXXX..XXXXXXX 100644
124
diff --git a/block/qcow2.c b/block/qcow2.c
235
--- a/blockjob.c
125
index XXXXXXX..XXXXXXX 100644
236
+++ b/blockjob.c
126
--- a/block/qcow2.c
237
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
127
+++ b/block/qcow2.c
238
128
@@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
239
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
129
240
130
/* Okay, now that we have a valid image, let's give it the right size */
241
+ /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
131
ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
242
+ * The job reports that it's busy until it reaches a pause point. */
132
- errp);
243
+ blk_set_disable_request_queuing(blk, true);
133
+ 0, errp);
244
blk_set_allow_aio_context_change(blk, true);
134
if (ret < 0) {
245
135
error_prepend(errp, "Could not resize image: ");
246
/* Only set speed when necessary to avoid NotSupported error */
136
goto out;
247
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
137
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
248
index XXXXXXX..XXXXXXX 100644
138
* Amending image options should ensure that the image has
249
--- a/tests/test-bdrv-drain.c
139
* exactly the given new values, so pass exact=true here.
250
+++ b/tests/test-bdrv-drain.c
140
*/
251
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
141
- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
252
&error_abort);
142
+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
253
s = bs->opaque;
143
blk_unref(blk);
254
blk_insert_bs(blk, bs, &error_abort);
144
if (ret < 0) {
255
+ blk_set_disable_request_queuing(blk, true);
145
return ret;
256
146
diff --git a/block/qed.c b/block/qed.c
257
blk_set_aio_context(blk, ctx_a, &error_abort);
147
index XXXXXXX..XXXXXXX 100644
258
aio_context_acquire(ctx_a);
148
--- a/block/qed.c
149
+++ b/block/qed.c
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
151
* The QED format associates file length with allocation status,
152
* so a new file (which is empty) must have a length of 0.
153
*/
154
- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp);
155
+ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
156
if (ret < 0) {
157
goto out;
158
}
159
diff --git a/block/vdi.c b/block/vdi.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/block/vdi.c
162
+++ b/block/vdi.c
163
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
164
165
if (image_type == VDI_TYPE_STATIC) {
166
ret = blk_truncate(blk, offset + blocks * block_size, false,
167
- PREALLOC_MODE_OFF, errp);
168
+ PREALLOC_MODE_OFF, 0, errp);
169
if (ret < 0) {
170
error_prepend(errp, "Failed to statically allocate file");
171
goto exit;
172
diff --git a/block/vhdx.c b/block/vhdx.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/block/vhdx.c
175
+++ b/block/vhdx.c
176
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
177
/* All zeroes, so we can just extend the file - the end of the BAT
178
* is the furthest thing we have written yet */
179
ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
180
- errp);
181
+ 0, errp);
182
if (ret < 0) {
183
goto exit;
184
}
185
} else if (type == VHDX_TYPE_FIXED) {
186
ret = blk_truncate(blk, data_file_offset + image_size, false,
187
- PREALLOC_MODE_OFF, errp);
188
+ PREALLOC_MODE_OFF, 0, errp);
189
if (ret < 0) {
190
goto exit;
191
}
192
diff --git a/block/vmdk.c b/block/vmdk.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/block/vmdk.c
195
+++ b/block/vmdk.c
196
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
197
int gd_buf_size;
198
199
if (flat) {
200
- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp);
201
+ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
202
goto exit;
203
}
204
magic = cpu_to_be32(VMDK4_MAGIC);
205
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
206
}
207
208
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
209
- PREALLOC_MODE_OFF, errp);
210
+ PREALLOC_MODE_OFF, 0, errp);
211
if (ret < 0) {
212
goto exit;
213
}
214
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
215
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
216
* for description file */
217
if (desc_offset == 0) {
218
- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp);
219
+ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
220
if (ret < 0) {
221
goto exit;
222
}
223
diff --git a/block/vpc.c b/block/vpc.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/block/vpc.c
226
+++ b/block/vpc.c
227
@@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
228
/* Add footer to total size */
229
total_size += HEADER_SIZE;
230
231
- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp);
232
+ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
233
if (ret < 0) {
234
return ret;
235
}
236
diff --git a/blockdev.c b/blockdev.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/blockdev.c
239
+++ b/blockdev.c
240
@@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device,
241
}
242
243
bdrv_drained_begin(bs);
244
- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp);
245
+ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
246
bdrv_drained_end(bs);
247
248
out:
249
diff --git a/qemu-img.c b/qemu-img.c
250
index XXXXXXX..XXXXXXX 100644
251
--- a/qemu-img.c
252
+++ b/qemu-img.c
253
@@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv)
254
* resizing, so pass @exact=true. It is of no use to report
255
* success when the image has not actually been resized.
256
*/
257
- ret = blk_truncate(blk, total_size, true, prealloc, &err);
258
+ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
259
if (!ret) {
260
qprintf(quiet, "Image resized.\n");
261
} else {
262
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
263
index XXXXXXX..XXXXXXX 100644
264
--- a/qemu-io-cmds.c
265
+++ b/qemu-io-cmds.c
266
@@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
267
* exact=true. It is better to err on the "emit more errors" side
268
* than to be overly permissive.
269
*/
270
- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err);
271
+ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err);
272
if (ret < 0) {
273
error_report_err(local_err);
274
return ret;
259
--
275
--
260
2.20.1
276
2.25.3
261
277
262
278
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling
2
qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't
3
undo any previous preallocation, but just adds the zero flag to all
4
relevant L2 entries. If an external data file is in use, a write_zeroes
5
request to the data file is made instead.
2
6
3
We already have 030 for that in general, but this tests very specific
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
cases of both jobs finishing concurrently.
8
Message-Id: <20200424125448.63318-5-kwolf@redhat.com>
5
9
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
12
---
9
tests/qemu-iotests/258 | 163 +++++++++++++++++++++++++++++++++++++
13
block/qcow2-cluster.c | 2 +-
10
tests/qemu-iotests/258.out | 33 ++++++++
14
block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++
11
tests/qemu-iotests/group | 1 +
15
2 files changed, 35 insertions(+), 1 deletion(-)
12
3 files changed, 197 insertions(+)
13
create mode 100755 tests/qemu-iotests/258
14
create mode 100644 tests/qemu-iotests/258.out
15
16
16
diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258
17
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
17
new file mode 100755
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX
19
--- a/block/qcow2-cluster.c
19
--- /dev/null
20
+++ b/block/qcow2-cluster.c
20
+++ b/tests/qemu-iotests/258
21
@@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
21
@@ -XXX,XX +XXX,XX @@
22
/* Caller must pass aligned values, except at image end */
22
+#!/usr/bin/env python
23
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
23
+#
24
assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
24
+# Very specific tests for adjacent commit/stream block jobs
25
- end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
25
+#
26
+ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS);
26
+# Copyright (C) 2019 Red Hat, Inc.
27
27
+#
28
/* The zero flag is only supported by version 3 and newer */
28
+# This program is free software; you can redistribute it and/or modify
29
if (s->qcow_version < 3) {
29
+# it under the terms of the GNU General Public License as published by
30
diff --git a/block/qcow2.c b/block/qcow2.c
30
+# the Free Software Foundation; either version 2 of the License, or
31
index XXXXXXX..XXXXXXX 100644
31
+# (at your option) any later version.
32
--- a/block/qcow2.c
32
+#
33
+++ b/block/qcow2.c
33
+# This program is distributed in the hope that it will be useful,
34
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
34
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
35
35
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36
bs->supported_zero_flags = header.version >= 3 ?
36
+# GNU General Public License for more details.
37
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
37
+#
38
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
38
+# You should have received a copy of the GNU General Public License
39
39
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
40
/* Repair image if dirty */
40
+#
41
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
41
+# Creator/Owner: Max Reitz <mreitz@redhat.com>
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
43
g_assert_not_reached();
44
}
45
46
+ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
47
+ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size);
42
+
48
+
43
+import iotests
49
+ /*
44
+from iotests import log, qemu_img, qemu_io_silent, \
50
+ * Use zero clusters as much as we can. qcow2_cluster_zeroize()
45
+ filter_qmp_testfiles, filter_qmp_imgfmt
51
+ * requires a cluster-aligned start. The end may be unaligned if it is
52
+ * at the end of the image (which it is here).
53
+ */
54
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
55
+ if (ret < 0) {
56
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
57
+ goto fail;
58
+ }
46
+
59
+
47
+# Need backing file and change-backing-file support
60
+ /* Write explicit zeros for the unaligned head */
48
+iotests.verify_image_format(supported_fmts=['qcow2', 'qed'])
61
+ if (zero_start > old_length) {
49
+iotests.verify_platform(['linux'])
62
+ uint64_t len = zero_start - old_length;
63
+ uint8_t *buf = qemu_blockalign0(bs, len);
64
+ QEMUIOVector qiov;
65
+ qemu_iovec_init_buf(&qiov, buf, len);
50
+
66
+
67
+ qemu_co_mutex_unlock(&s->lock);
68
+ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
69
+ qemu_co_mutex_lock(&s->lock);
51
+
70
+
52
+# Returns a node for blockdev-add
71
+ qemu_vfree(buf);
53
+def node(node_name, path, backing=None, fmt=None, throttle=None):
72
+ if (ret < 0) {
54
+ if fmt is None:
73
+ error_setg_errno(errp, -ret, "Failed to zero out the new area");
55
+ fmt = iotests.imgfmt
74
+ goto fail;
56
+
75
+ }
57
+ res = {
58
+ 'node-name': node_name,
59
+ 'driver': fmt,
60
+ 'file': {
61
+ 'driver': 'file',
62
+ 'filename': path
63
+ }
76
+ }
64
+ }
77
+ }
65
+
78
+
66
+ if backing is not None:
79
if (prealloc != PREALLOC_MODE_OFF) {
67
+ res['backing'] = backing
80
/* Flush metadata before actually changing the image size */
68
+
81
ret = qcow2_write_caches(bs);
69
+ if throttle:
70
+ res['file'] = {
71
+ 'driver': 'throttle',
72
+ 'throttle-group': throttle,
73
+ 'file': res['file']
74
+ }
75
+
76
+ return res
77
+
78
+# Finds a node in the debug block graph
79
+def find_graph_node(graph, node_id):
80
+ return next(node for node in graph['nodes'] if node['id'] == node_id)
81
+
82
+
83
+def test_concurrent_finish(write_to_stream_node):
84
+ log('')
85
+ log('=== Commit and stream finish concurrently (letting %s write) ===' % \
86
+ ('stream' if write_to_stream_node else 'commit'))
87
+ log('')
88
+
89
+ # All chosen in such a way that when the commit job wants to
90
+ # finish, it polls and thus makes stream finish concurrently --
91
+ # and the other way around, depending on whether the commit job
92
+ # is finalized before stream completes or not.
93
+
94
+ with iotests.FilePath('node4.img') as node4_path, \
95
+ iotests.FilePath('node3.img') as node3_path, \
96
+ iotests.FilePath('node2.img') as node2_path, \
97
+ iotests.FilePath('node1.img') as node1_path, \
98
+ iotests.FilePath('node0.img') as node0_path, \
99
+ iotests.VM() as vm:
100
+
101
+ # It is important to use raw for the base layer (so that
102
+ # permissions are just handed through to the protocol layer)
103
+ assert qemu_img('create', '-f', 'raw', node0_path, '64M') == 0
104
+
105
+ stream_throttle=None
106
+ commit_throttle=None
107
+
108
+ for path in [node1_path, node2_path, node3_path, node4_path]:
109
+ assert qemu_img('create', '-f', iotests.imgfmt, path, '64M') == 0
110
+
111
+ if write_to_stream_node:
112
+ # This is what (most of the time) makes commit finish
113
+ # earlier and then pull in stream
114
+ assert qemu_io_silent(node2_path,
115
+ '-c', 'write %iK 64K' % (65536 - 192),
116
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
117
+
118
+ stream_throttle='tg'
119
+ else:
120
+ # And this makes stream finish earlier
121
+ assert qemu_io_silent(node1_path,
122
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
123
+
124
+ commit_throttle='tg'
125
+
126
+ vm.launch()
127
+
128
+ vm.qmp_log('object-add',
129
+ qom_type='throttle-group',
130
+ id='tg',
131
+ props={
132
+ 'x-iops-write': 1,
133
+ 'x-iops-write-max': 1
134
+ })
135
+
136
+ vm.qmp_log('blockdev-add',
137
+ filters=[filter_qmp_testfiles, filter_qmp_imgfmt],
138
+ **node('node4', node4_path, throttle=stream_throttle,
139
+ backing=node('node3', node3_path,
140
+ backing=node('node2', node2_path,
141
+ backing=node('node1', node1_path,
142
+ backing=node('node0', node0_path, throttle=commit_throttle,
143
+ fmt='raw'))))))
144
+
145
+ vm.qmp_log('block-commit',
146
+ job_id='commit',
147
+ device='node4',
148
+ filter_node_name='commit-filter',
149
+ top_node='node1',
150
+ base_node='node0',
151
+ auto_finalize=False)
152
+
153
+ vm.qmp_log('block-stream',
154
+ job_id='stream',
155
+ device='node3',
156
+ base_node='commit-filter')
157
+
158
+ if write_to_stream_node:
159
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
160
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
161
+ else:
162
+ # No, the jobs do not really finish concurrently here,
163
+ # the stream job does complete strictly before commit.
164
+ # But still, this is close enough for what we want to
165
+ # test.
166
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
167
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
168
+
169
+ # Assert that the backing node of node3 is node 0 now
170
+ graph = vm.qmp('x-debug-query-block-graph')['return']
171
+ for edge in graph['edges']:
172
+ if edge['name'] == 'backing' and \
173
+ find_graph_node(graph, edge['parent'])['name'] == 'node3':
174
+ assert find_graph_node(graph, edge['child'])['name'] == 'node0'
175
+ break
176
+
177
+
178
+def main():
179
+ log('Running tests:')
180
+ test_concurrent_finish(True)
181
+ test_concurrent_finish(False)
182
+
183
+if __name__ == '__main__':
184
+ main()
185
diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out
186
new file mode 100644
187
index XXXXXXX..XXXXXXX
188
--- /dev/null
189
+++ b/tests/qemu-iotests/258.out
190
@@ -XXX,XX +XXX,XX @@
191
+Running tests:
192
+
193
+=== Commit and stream finish concurrently (letting stream write) ===
194
+
195
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
196
+{"return": {}}
197
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}}
198
+{"return": {}}
199
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
200
+{"return": {}}
201
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
202
+{"return": {}}
203
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
204
+{"return": {}}
205
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
206
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
207
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
208
+
209
+=== Commit and stream finish concurrently (letting commit write) ===
210
+
211
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
212
+{"return": {}}
213
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}}
214
+{"return": {}}
215
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
216
+{"return": {}}
217
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
218
+{"return": {}}
219
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
220
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
221
+{"return": {}}
222
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
223
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
224
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
225
index XXXXXXX..XXXXXXX 100644
226
--- a/tests/qemu-iotests/group
227
+++ b/tests/qemu-iotests/group
228
@@ -XXX,XX +XXX,XX @@
229
254 rw backing quick
230
255 rw quick
231
256 rw quick
232
+258 rw quick
233
262 rw quick migration
234
--
82
--
235
2.20.1
83
2.25.3
236
84
237
85
diff view generated by jsdifflib
1
The code path for -device drive=<node-name> or without a drive=...
1
The raw format driver can simply forward the flag and let its bs->file
2
option for empty drives, which is supposed to be used with -blockdev
2
child take care of actually providing the zeros.
3
differs enough from the -drive based path with a user-owned
4
BlockBackend, so we want to test both paths at least for the basic tests
5
implemented by TestInitiallyFilled and TestInitiallyEmpty.
6
7
This would have caught the bug recently fixed for inserting read-only
8
nodes into a scsi-cd created without a drive=... option.
9
3
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20200424125448.63318-6-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
tests/qemu-iotests/118 | 43 ++++++++++++++++++++++++++------------
11
block/raw-format.c | 4 +++-
14
tests/qemu-iotests/118.out | 4 ++--
12
1 file changed, 3 insertions(+), 1 deletion(-)
15
2 files changed, 32 insertions(+), 15 deletions(-)
16
13
17
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
14
diff --git a/block/raw-format.c b/block/raw-format.c
18
index XXXXXXX..XXXXXXX 100755
19
--- a/tests/qemu-iotests/118
20
+++ b/tests/qemu-iotests/118
21
@@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase):
22
has_opened = False
23
has_closed = False
24
25
+ device_name = 'qdev0'
26
+ use_drive = False
27
+
28
def process_events(self):
29
for event in self.vm.get_qmp_events(wait=False):
30
if (event['event'] == 'DEVICE_TRAY_MOVED' and
31
- event['data']['device'] == 'drive0'):
32
+ (event['data']['device'] == 'drive0' or
33
+ event['data']['id'] == self.device_name)):
34
if event['data']['tray-open'] == False:
35
self.has_closed = True
36
else:
37
@@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase):
38
39
class GeneralChangeTestsBaseClass(ChangeBaseClass):
40
41
- device_name = 'qdev0'
42
-
43
def test_change(self):
44
+ # 'change' requires a drive name, so skip the test for blockdev
45
+ if not self.use_drive:
46
+ return
47
+
48
result = self.vm.qmp('change', device='drive0', target=new_img,
49
arg=iotests.imgfmt)
50
self.assert_qmp(result, 'return', {})
51
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
52
qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
53
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
54
self.vm = iotests.VM()
55
- self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
56
+ if self.use_drive:
57
+ self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
58
+ else:
59
+ self.vm.add_blockdev([ 'node-name=drive0',
60
+ 'driver=%s' % iotests.imgfmt,
61
+ 'file.driver=file',
62
+ 'file.filename=%s' % old_img ])
63
if self.interface == 'scsi':
64
self.vm.add_device('virtio-scsi-pci')
65
self.vm.add_device('%s,drive=drive0,id=%s' %
66
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
67
68
def setUp(self):
69
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
70
- self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none')
71
+ self.vm = iotests.VM()
72
+ if self.use_drive:
73
+ self.vm.add_drive(None, 'media=%s' % self.media, 'none')
74
if self.interface == 'scsi':
75
self.vm.add_device('virtio-scsi-pci')
76
- self.vm.add_device('%s,drive=drive0,id=%s' %
77
+ self.vm.add_device('%s,%sid=%s' %
78
(interface_to_device_name(self.interface),
79
+ 'drive=drive0,' if self.use_drive else '',
80
self.device_name))
81
self.vm.launch()
82
83
@@ -XXX,XX +XXX,XX @@ def create_basic_test_classes():
84
('disk', 'floppy', False) ]:
85
86
for case in [ TestInitiallyFilled, TestInitiallyEmpty ]:
87
-
88
- attr = { 'media': media,
89
- 'interface': interface,
90
- 'has_real_tray': has_real_tray }
91
-
92
- name = '%s_%s_%s' % (case.__name__, media, interface)
93
- globals()[name] = type(name, (case, ), attr)
94
+ for use_drive in [ True, False ]:
95
+ attr = { 'media': media,
96
+ 'interface': interface,
97
+ 'has_real_tray': has_real_tray,
98
+ 'use_drive': use_drive }
99
+
100
+ name = '%s_%s_%s_%s' % (case.__name__, media, interface,
101
+ 'drive' if use_drive else 'blockdev')
102
+ globals()[name] = type(name, (case, ), attr)
103
104
create_basic_test_classes()
105
106
diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out
107
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
108
--- a/tests/qemu-iotests/118.out
16
--- a/block/raw-format.c
109
+++ b/tests/qemu-iotests/118.out
17
+++ b/block/raw-format.c
110
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
111
-.........................................................................................
19
112
+.......................................................................................................................................................................
20
s->size = offset;
113
----------------------------------------------------------------------
21
offset += s->offset;
114
-Ran 89 tests
22
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
115
+Ran 167 tests
23
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
116
24
}
117
OK
25
26
static void raw_eject(BlockDriverState *bs, bool eject_flag)
27
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
28
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
29
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
30
bs->file->bs->supported_zero_flags);
31
+ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
32
+ BDRV_REQ_ZERO_WRITE;
33
34
if (bs->probed && !bdrv_is_read_only(bs)) {
35
bdrv_refresh_filename(bs->file->bs);
118
--
36
--
119
2.20.1
37
2.25.3
120
38
121
39
diff view generated by jsdifflib
1
From: Nir Soffer <nirsof@gmail.com>
1
For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the
2
OS, so we can advertise the flag and just ignore it.
2
3
3
In some cases buf_align or request_alignment cannot be detected:
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
1. With Gluster, buf_align cannot be detected since the actual I/O is
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
done on Gluster server, and qemu buffer alignment does not matter.
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Since we don't have alignment requirement, buf_align=1 is the best
8
Message-Id: <20200424125448.63318-7-kwolf@redhat.com>
8
value.
9
10
2. With local XFS filesystem, buf_align cannot be detected if reading
11
from unallocated area. In this we must align the buffer, but we don't
12
know what is the correct size. Using the wrong alignment results in
13
I/O error.
14
15
3. With Gluster backed by XFS, request_alignment cannot be detected if
16
reading from unallocated area. In this case we need to use the
17
correct alignment, and failing to do so results in I/O errors.
18
19
4. With NFS, the server does not use direct I/O, so both buf_align cannot
20
be detected. In this case we don't need any alignment so we can use
21
buf_align=1 and request_alignment=1.
22
23
These cases seems to work when storage sector size is 512 bytes, because
24
the current code starts checking align=512. If the check succeeds
25
because alignment cannot be detected we use 512. But this does not work
26
for storage with 4k sector size.
27
28
To determine if we can detect the alignment, we probe first with
29
align=1. If probing succeeds, maybe there are no alignment requirement
30
(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we
31
don't have any way to tell, we treat this as undetectable alignment. If
32
probing with align=1 fails with EINVAL, but probing with one of the
33
expected alignments succeeds, we know that we found a working alignment.
34
35
Practically the alignment requirements are the same for buffer
36
alignment, buffer length, and offset in file. So in case we cannot
37
detect buf_align, we can use request alignment. If we cannot detect
38
request alignment, we can fallback to a safe value. To use this logic,
39
we probe first request alignment instead of buf_align.
40
41
Here is a table showing the behaviour with current code (the value in
42
parenthesis is the optimal value).
43
44
Case Sector buf_align (opt) request_alignment (opt) result
45
======================================================================
46
1 512 512 (1) 512 (512) OK
47
1 4096 512 (1) 4096 (4096) FAIL
48
----------------------------------------------------------------------
49
2 512 512 (512) 512 (512) OK
50
2 4096 512 (4096) 4096 (4096) FAIL
51
----------------------------------------------------------------------
52
3 512 512 (1) 512 (512) OK
53
3 4096 512 (1) 512 (4096) FAIL
54
----------------------------------------------------------------------
55
4 512 512 (1) 512 (1) OK
56
4 4096 512 (1) 512 (1) OK
57
58
Same cases with this change:
59
60
Case Sector buf_align (opt) request_alignment (opt) result
61
======================================================================
62
1 512 512 (1) 512 (512) OK
63
1 4096 4096 (1) 4096 (4096) OK
64
----------------------------------------------------------------------
65
2 512 512 (512) 512 (512) OK
66
2 4096 4096 (4096) 4096 (4096) OK
67
----------------------------------------------------------------------
68
3 512 4096 (1) 4096 (512) OK
69
3 4096 4096 (1) 4096 (4096) OK
70
----------------------------------------------------------------------
71
4 512 4096 (1) 4096 (1) OK
72
4 4096 4096 (1) 4096 (1) OK
73
74
I tested that provisioning VMs and copying disks on local XFS and
75
Gluster with 4k bytes sector size work now, resolving bugs [1],[2].
76
I tested also on XFS, NFS, Gluster with 512 bytes sector size.
77
78
[1] https://bugzilla.redhat.com/1737256
79
[2] https://bugzilla.redhat.com/1738657
80
81
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
82
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
83
---
10
---
84
block/file-posix.c | 36 +++++++++++++++++++++++++-----------
11
block/file-posix.c | 4 ++++
85
1 file changed, 25 insertions(+), 11 deletions(-)
12
1 file changed, 4 insertions(+)
86
13
87
diff --git a/block/file-posix.c b/block/file-posix.c
14
diff --git a/block/file-posix.c b/block/file-posix.c
88
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
89
--- a/block/file-posix.c
16
--- a/block/file-posix.c
90
+++ b/block/file-posix.c
17
+++ b/block/file-posix.c
91
@@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
18
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
92
BDRVRawState *s = bs->opaque;
93
char *buf;
94
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
95
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
96
97
/* For SCSI generic devices the alignment is not really used.
98
With buffered I/O, we don't have any restrictions. */
99
@@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
100
}
101
#endif
19
#endif
102
20
103
- /* If we could not get the sizes so far, we can only guess them */
21
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
104
- if (!s->buf_align) {
22
+ if (S_ISREG(st.st_mode)) {
105
+ /*
23
+ /* When extending regular files, we get zeros from the OS */
106
+ * If we could not get the sizes so far, we can only guess them. First try
24
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
107
+ * to detect request alignment, since it is more likely to succeed. Then
25
+ }
108
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
26
ret = 0;
109
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
27
fail:
110
+ * request_alignment.
28
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
111
+ */
112
+
113
+ if (!bs->bl.request_alignment) {
114
+ int i;
115
size_t align;
116
- buf = qemu_memalign(max_align, 2 * max_align);
117
- for (align = 512; align <= max_align; align <<= 1) {
118
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
119
- s->buf_align = align;
120
+ buf = qemu_memalign(max_align, max_align);
121
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
122
+ align = alignments[i];
123
+ if (raw_is_io_aligned(fd, buf, align)) {
124
+ /* Fallback to safe value. */
125
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
126
break;
127
}
128
}
129
qemu_vfree(buf);
130
}
131
132
- if (!bs->bl.request_alignment) {
133
+ if (!s->buf_align) {
134
+ int i;
135
size_t align;
136
- buf = qemu_memalign(s->buf_align, max_align);
137
- for (align = 512; align <= max_align; align <<= 1) {
138
- if (raw_is_io_aligned(fd, buf, align)) {
139
- bs->bl.request_alignment = align;
140
+ buf = qemu_memalign(max_align, 2 * max_align);
141
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
142
+ align = alignments[i];
143
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
144
+ /* Fallback to request_aligment. */
145
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
146
break;
147
}
148
}
149
--
29
--
150
2.20.1
30
2.25.3
151
31
152
32
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
When extending the size of an image that has a backing file larger than
2
its old size, make sure that the backing file data doesn't become
3
visible in the guest, but the added area is properly zeroed out.
2
4
3
Add a test for what happens when you call bdrv_replace_child_noperm()
5
Consider the following scenario where the overlay is shorter than its
4
for various drain situations ({old,new} child {drained,not drained}).
6
backing file:
5
7
6
Most importantly, if both the old and the new child are drained, the
8
base.qcow2: AAAAAAAA
7
parent must not be undrained at any point.
9
overlay.qcow2: BBBB
8
10
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
When resizing (extending) overlay.qcow2, the new blocks should not stay
12
unallocated and make the additional As from base.qcow2 visible like
13
before this patch, but zeros should be read.
14
15
A similar case happens with the various variants of a commit job when an
16
intermediate file is short (- for unallocated):
17
18
base.qcow2: A-A-AAAA
19
mid.qcow2: BB-B
20
top.qcow2: C--C--C-
21
22
After commit top.qcow2 to mid.qcow2, the following happens:
23
24
mid.qcow2: CB-C00C0 (correct result)
25
mid.qcow2: CB-C--C- (before this fix)
26
27
Without the fix, blocks that previously read as zeros on top.qcow2
28
suddenly turn into A.
29
30
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
31
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
32
Message-Id: <20200424125448.63318-8-kwolf@redhat.com>
33
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
35
---
12
tests/test-bdrv-drain.c | 308 ++++++++++++++++++++++++++++++++++++++++
36
block/io.c | 25 +++++++++++++++++++++++++
13
1 file changed, 308 insertions(+)
37
1 file changed, 25 insertions(+)
14
38
15
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
39
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/test-bdrv-drain.c
41
--- a/block/io.c
18
+++ b/tests/test-bdrv-drain.c
42
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ static void test_drop_intermediate_poll(void)
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
20
bdrv_unref(chain[2]);
44
goto out;
21
}
45
}
22
46
47
+ /*
48
+ * If the image has a backing file that is large enough that it would
49
+ * provide data for the new area, we cannot leave it unallocated because
50
+ * then the backing file content would become visible. Instead, zero-fill
51
+ * the new area.
52
+ *
53
+ * Note that if the image has a backing file, but was opened without the
54
+ * backing file, taking care of keeping things consistent with that backing
55
+ * file is the user's responsibility.
56
+ */
57
+ if (new_bytes && bs->backing) {
58
+ int64_t backing_len;
23
+
59
+
24
+typedef struct BDRVReplaceTestState {
60
+ backing_len = bdrv_getlength(backing_bs(bs));
25
+ bool was_drained;
61
+ if (backing_len < 0) {
26
+ bool was_undrained;
62
+ ret = backing_len;
27
+ bool has_read;
63
+ error_setg_errno(errp, -ret, "Could not get backing file size");
28
+
64
+ goto out;
29
+ int drain_count;
30
+
31
+ bool yield_before_read;
32
+ Coroutine *io_co;
33
+ Coroutine *drain_co;
34
+} BDRVReplaceTestState;
35
+
36
+static void bdrv_replace_test_close(BlockDriverState *bs)
37
+{
38
+}
39
+
40
+/**
41
+ * If @bs has a backing file:
42
+ * Yield if .yield_before_read is true (and wait for drain_begin to
43
+ * wake us up).
44
+ * Forward the read to bs->backing. Set .has_read to true.
45
+ * If drain_begin has woken us, wake it in turn.
46
+ *
47
+ * Otherwise:
48
+ * Set .has_read to true and return success.
49
+ */
50
+static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
51
+ uint64_t offset,
52
+ uint64_t bytes,
53
+ QEMUIOVector *qiov,
54
+ int flags)
55
+{
56
+ BDRVReplaceTestState *s = bs->opaque;
57
+
58
+ if (bs->backing) {
59
+ int ret;
60
+
61
+ g_assert(!s->drain_count);
62
+
63
+ s->io_co = qemu_coroutine_self();
64
+ if (s->yield_before_read) {
65
+ s->yield_before_read = false;
66
+ qemu_coroutine_yield();
67
+ }
68
+ s->io_co = NULL;
69
+
70
+ ret = bdrv_preadv(bs->backing, offset, qiov);
71
+ s->has_read = true;
72
+
73
+ /* Wake up drain_co if it runs */
74
+ if (s->drain_co) {
75
+ aio_co_wake(s->drain_co);
76
+ }
65
+ }
77
+
66
+
78
+ return ret;
67
+ if (backing_len > old_size) {
68
+ flags |= BDRV_REQ_ZERO_WRITE;
69
+ }
79
+ }
70
+ }
80
+
71
+
81
+ s->has_read = true;
72
if (drv->bdrv_co_truncate) {
82
+ return 0;
73
if (flags & ~bs->supported_truncate_flags) {
83
+}
74
error_setg(errp, "Block driver does not support requested flags");
84
+
85
+/**
86
+ * If .drain_count is 0, wake up .io_co if there is one; and set
87
+ * .was_drained.
88
+ * Increment .drain_count.
89
+ */
90
+static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
91
+{
92
+ BDRVReplaceTestState *s = bs->opaque;
93
+
94
+ if (!s->drain_count) {
95
+ /* Keep waking io_co up until it is done */
96
+ s->drain_co = qemu_coroutine_self();
97
+ while (s->io_co) {
98
+ aio_co_wake(s->io_co);
99
+ s->io_co = NULL;
100
+ qemu_coroutine_yield();
101
+ }
102
+ s->drain_co = NULL;
103
+
104
+ s->was_drained = true;
105
+ }
106
+ s->drain_count++;
107
+}
108
+
109
+/**
110
+ * Reduce .drain_count, set .was_undrained once it reaches 0.
111
+ * If .drain_count reaches 0 and the node has a backing file, issue a
112
+ * read request.
113
+ */
114
+static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
115
+{
116
+ BDRVReplaceTestState *s = bs->opaque;
117
+
118
+ g_assert(s->drain_count > 0);
119
+ if (!--s->drain_count) {
120
+ int ret;
121
+
122
+ s->was_undrained = true;
123
+
124
+ if (bs->backing) {
125
+ char data;
126
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
127
+
128
+ /* Queue a read request post-drain */
129
+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
130
+ g_assert(ret >= 0);
131
+ }
132
+ }
133
+}
134
+
135
+static BlockDriver bdrv_replace_test = {
136
+ .format_name = "replace_test",
137
+ .instance_size = sizeof(BDRVReplaceTestState),
138
+
139
+ .bdrv_close = bdrv_replace_test_close,
140
+ .bdrv_co_preadv = bdrv_replace_test_co_preadv,
141
+
142
+ .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
143
+ .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
144
+
145
+ .bdrv_child_perm = bdrv_format_default_perms,
146
+};
147
+
148
+static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
149
+{
150
+ int ret;
151
+ char data;
152
+
153
+ ret = blk_co_pread(opaque, 0, 1, &data, 0);
154
+ g_assert(ret >= 0);
155
+}
156
+
157
+/**
158
+ * We test two things:
159
+ * (1) bdrv_replace_child_noperm() must not undrain the parent if both
160
+ * children are drained.
161
+ * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
162
+ * drained child. If the old child is drained, it must flush I/O
163
+ * requests after the new one has been attached. If the new child
164
+ * is drained, it must flush I/O requests before the old one is
165
+ * detached.
166
+ *
167
+ * To do so, we create one parent node and two child nodes; then
168
+ * attach one of the children (old_child_bs) to the parent, then
169
+ * drain both old_child_bs and new_child_bs according to
170
+ * old_drain_count and new_drain_count, respectively, and finally
171
+ * we invoke bdrv_replace_node() to replace old_child_bs by
172
+ * new_child_bs.
173
+ *
174
+ * The test block driver we use here (bdrv_replace_test) has a read
175
+ * function that:
176
+ * - For the parent node, can optionally yield, and then forwards the
177
+ * read to bdrv_preadv(),
178
+ * - For the child node, just returns immediately.
179
+ *
180
+ * If the read yields, the drain_begin function will wake it up.
181
+ *
182
+ * The drain_end function issues a read on the parent once it is fully
183
+ * undrained (which simulates requests starting to come in again).
184
+ */
185
+static void do_test_replace_child_mid_drain(int old_drain_count,
186
+ int new_drain_count)
187
+{
188
+ BlockBackend *parent_blk;
189
+ BlockDriverState *parent_bs;
190
+ BlockDriverState *old_child_bs, *new_child_bs;
191
+ BDRVReplaceTestState *parent_s;
192
+ BDRVReplaceTestState *old_child_s, *new_child_s;
193
+ Coroutine *io_co;
194
+ int i;
195
+
196
+ parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
197
+ &error_abort);
198
+ parent_s = parent_bs->opaque;
199
+
200
+ parent_blk = blk_new(qemu_get_aio_context(),
201
+ BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
202
+ blk_insert_bs(parent_blk, parent_bs, &error_abort);
203
+
204
+ old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
205
+ &error_abort);
206
+ new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
207
+ &error_abort);
208
+ old_child_s = old_child_bs->opaque;
209
+ new_child_s = new_child_bs->opaque;
210
+
211
+ /* So that we can read something */
212
+ parent_bs->total_sectors = 1;
213
+ old_child_bs->total_sectors = 1;
214
+ new_child_bs->total_sectors = 1;
215
+
216
+ bdrv_ref(old_child_bs);
217
+ parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child",
218
+ &child_backing, &error_abort);
219
+
220
+ for (i = 0; i < old_drain_count; i++) {
221
+ bdrv_drained_begin(old_child_bs);
222
+ }
223
+ for (i = 0; i < new_drain_count; i++) {
224
+ bdrv_drained_begin(new_child_bs);
225
+ }
226
+
227
+ if (!old_drain_count) {
228
+ /*
229
+ * Start a read operation that will yield, so it will not
230
+ * complete before the node is drained.
231
+ */
232
+ parent_s->yield_before_read = true;
233
+ io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
234
+ parent_blk);
235
+ qemu_coroutine_enter(io_co);
236
+ }
237
+
238
+ /* If we have started a read operation, it should have yielded */
239
+ g_assert(!parent_s->has_read);
240
+
241
+ /* Reset drained status so we can see what bdrv_replace_node() does */
242
+ parent_s->was_drained = false;
243
+ parent_s->was_undrained = false;
244
+
245
+ g_assert(parent_bs->quiesce_counter == old_drain_count);
246
+ bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
247
+ g_assert(parent_bs->quiesce_counter == new_drain_count);
248
+
249
+ if (!old_drain_count && !new_drain_count) {
250
+ /*
251
+ * From undrained to undrained drains and undrains the parent,
252
+ * because bdrv_replace_node() contains a drained section for
253
+ * @old_child_bs.
254
+ */
255
+ g_assert(parent_s->was_drained && parent_s->was_undrained);
256
+ } else if (!old_drain_count && new_drain_count) {
257
+ /*
258
+ * From undrained to drained should drain the parent and keep
259
+ * it that way.
260
+ */
261
+ g_assert(parent_s->was_drained && !parent_s->was_undrained);
262
+ } else if (old_drain_count && !new_drain_count) {
263
+ /*
264
+ * From drained to undrained should undrain the parent and
265
+ * keep it that way.
266
+ */
267
+ g_assert(!parent_s->was_drained && parent_s->was_undrained);
268
+ } else /* if (old_drain_count && new_drain_count) */ {
269
+ /*
270
+ * From drained to drained must not undrain the parent at any
271
+ * point
272
+ */
273
+ g_assert(!parent_s->was_drained && !parent_s->was_undrained);
274
+ }
275
+
276
+ if (!old_drain_count || !new_drain_count) {
277
+ /*
278
+ * If !old_drain_count, we have started a read request before
279
+ * bdrv_replace_node(). If !new_drain_count, the parent must
280
+ * have been undrained at some point, and
281
+ * bdrv_replace_test_co_drain_end() starts a read request
282
+ * then.
283
+ */
284
+ g_assert(parent_s->has_read);
285
+ } else {
286
+ /*
287
+ * If the parent was never undrained, there is no way to start
288
+ * a read request.
289
+ */
290
+ g_assert(!parent_s->has_read);
291
+ }
292
+
293
+ /* A drained child must have not received any request */
294
+ g_assert(!(old_drain_count && old_child_s->has_read));
295
+ g_assert(!(new_drain_count && new_child_s->has_read));
296
+
297
+ for (i = 0; i < new_drain_count; i++) {
298
+ bdrv_drained_end(new_child_bs);
299
+ }
300
+ for (i = 0; i < old_drain_count; i++) {
301
+ bdrv_drained_end(old_child_bs);
302
+ }
303
+
304
+ /*
305
+ * By now, bdrv_replace_test_co_drain_end() must have been called
306
+ * at some point while the new child was attached to the parent.
307
+ */
308
+ g_assert(parent_s->has_read);
309
+ g_assert(new_child_s->has_read);
310
+
311
+ blk_unref(parent_blk);
312
+ bdrv_unref(parent_bs);
313
+ bdrv_unref(old_child_bs);
314
+ bdrv_unref(new_child_bs);
315
+}
316
+
317
+static void test_replace_child_mid_drain(void)
318
+{
319
+ int old_drain_count, new_drain_count;
320
+
321
+ for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
322
+ for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
323
+ do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
324
+ }
325
+ }
326
+}
327
+
328
int main(int argc, char **argv)
329
{
330
int ret;
331
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
332
g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
333
test_drop_intermediate_poll);
334
335
+ g_test_add_func("/bdrv-drain/replace_child/mid-drain",
336
+ test_replace_child_mid_drain);
337
+
338
ret = g_test_run();
339
qemu_event_destroy(&done_event);
340
return ret;
341
--
75
--
342
2.20.1
76
2.25.3
343
77
344
78
diff view generated by jsdifflib
1
234 implements functions that are useful for doing migration between two
1
We want to keep TEST_IMG for the full path of the main test image, but
2
VMs. Move them to iotests.py so that other test cases can use them, too.
2
filter_testfiles() must be called for other test images before replacing
3
other things like the image format because the test directory path could
4
contain the format as a substring.
5
6
Insert a filter_testfiles() call between both.
3
7
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Message-Id: <20200424125448.63318-9-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
13
---
7
tests/qemu-iotests/234 | 30 +++++++-----------------------
14
tests/qemu-iotests/iotests.py | 5 +++--
8
tests/qemu-iotests/iotests.py | 16 ++++++++++++++++
15
1 file changed, 3 insertions(+), 2 deletions(-)
9
2 files changed, 23 insertions(+), 23 deletions(-)
10
16
11
diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234
12
index XXXXXXX..XXXXXXX 100755
13
--- a/tests/qemu-iotests/234
14
+++ b/tests/qemu-iotests/234
15
@@ -XXX,XX +XXX,XX @@ import os
16
iotests.verify_image_format(supported_fmts=['qcow2'])
17
iotests.verify_platform(['linux'])
18
19
-def enable_migration_events(vm, name):
20
- iotests.log('Enabling migration QMP events on %s...' % name)
21
- iotests.log(vm.qmp('migrate-set-capabilities', capabilities=[
22
- {
23
- 'capability': 'events',
24
- 'state': True
25
- }
26
- ]))
27
-
28
-def wait_migration(vm):
29
- while True:
30
- event = vm.event_wait('MIGRATION')
31
- iotests.log(event, filters=[iotests.filter_qmp_event])
32
- if event['data']['status'] == 'completed':
33
- break
34
-
35
with iotests.FilePath('img') as img_path, \
36
iotests.FilePath('backing') as backing_path, \
37
iotests.FilePath('mig_fifo_a') as fifo_a, \
38
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
39
.add_blockdev('%s,file=drive0-backing-file,node-name=drive0-backing' % (iotests.imgfmt))
40
.launch())
41
42
- enable_migration_events(vm_a, 'A')
43
+ vm_a.enable_migration_events('A')
44
45
iotests.log('Launching destination VM...')
46
(vm_b.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
47
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
48
.add_incoming("exec: cat '%s'" % (fifo_a))
49
.launch())
50
51
- enable_migration_events(vm_b, 'B')
52
+ vm_b.enable_migration_events('B')
53
54
# Add a child node that was created after the parent node. The reverse case
55
# is covered by the -blockdev options above.
56
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
57
iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a)))
58
with iotests.Timeout(3, 'Migration does not complete'):
59
# Wait for the source first (which includes setup=setup)
60
- wait_migration(vm_a)
61
+ vm_a.wait_migration()
62
# Wait for the destination second (which does not)
63
- wait_migration(vm_b)
64
+ vm_b.wait_migration()
65
66
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
67
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
68
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
69
.add_incoming("exec: cat '%s'" % (fifo_b))
70
.launch())
71
72
- enable_migration_events(vm_a, 'A')
73
+ vm_a.enable_migration_events('A')
74
75
iotests.log(vm_a.qmp('blockdev-snapshot', node='drive0-backing',
76
overlay='drive0'))
77
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
78
iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b)))
79
with iotests.Timeout(3, 'Migration does not complete'):
80
# Wait for the source first (which includes setup=setup)
81
- wait_migration(vm_b)
82
+ vm_b.wait_migration()
83
# Wait for the destination second (which does not)
84
- wait_migration(vm_a)
85
+ vm_a.wait_migration()
86
87
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
88
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
89
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
17
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
90
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
91
--- a/tests/qemu-iotests/iotests.py
19
--- a/tests/qemu-iotests/iotests.py
92
+++ b/tests/qemu-iotests/iotests.py
20
+++ b/tests/qemu-iotests/iotests.py
93
@@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine):
21
@@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename):
94
elif status == 'null':
22
for line in output.split('\n'):
95
return error
23
if 'disk size' in line or 'actual-size' in line:
96
24
continue
97
+ def enable_migration_events(self, name):
25
- line = line.replace(filename, 'TEST_IMG') \
98
+ log('Enabling migration QMP events on %s...' % name)
26
- .replace(imgfmt, 'IMGFMT')
99
+ log(self.qmp('migrate-set-capabilities', capabilities=[
27
+ line = line.replace(filename, 'TEST_IMG')
100
+ {
28
+ line = filter_testfiles(line)
101
+ 'capability': 'events',
29
+ line = line.replace(imgfmt, 'IMGFMT')
102
+ 'state': True
30
line = re.sub('iters: [0-9]+', 'iters: XXX', line)
103
+ }
31
line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line)
104
+ ]))
32
line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line)
105
+
106
+ def wait_migration(self):
107
+ while True:
108
+ event = self.event_wait('MIGRATION')
109
+ log(event, filters=[filter_qmp_event])
110
+ if event['data']['status'] == 'completed':
111
+ break
112
+
113
def node_info(self, node_name):
114
nodes = self.qmp('query-named-block-nodes')
115
for x in nodes['return']:
116
--
33
--
117
2.20.1
34
2.25.3
118
35
119
36
diff view generated by jsdifflib
1
This test case is motivated by commit 2b23f28639 ('block/copy-on-read:
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Fix permissions for inactive node'). Instead of just testing
2
Message-Id: <20200424125448.63318-10-kwolf@redhat.com>
3
copy-on-read on migration, let's stack all sorts of filter nodes on top
3
Reviewed-by: Max Reitz <mreitz@redhat.com>
4
of each other and try if the resulting VM can still migrate
4
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
successfully. For good measure, put everything into an iothread, because
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
why not?
6
---
7
tests/qemu-iotests/274 | 155 +++++++++++++++++++++
8
tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++
9
tests/qemu-iotests/group | 1 +
10
3 files changed, 424 insertions(+)
11
create mode 100755 tests/qemu-iotests/274
12
create mode 100644 tests/qemu-iotests/274.out
7
13
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
---
11
tests/qemu-iotests/262 | 82 ++++++++++++++++++++++++++++++++++++++
12
tests/qemu-iotests/262.out | 17 ++++++++
13
tests/qemu-iotests/group | 1 +
14
3 files changed, 100 insertions(+)
15
create mode 100755 tests/qemu-iotests/262
16
create mode 100644 tests/qemu-iotests/262.out
17
18
diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262
19
new file mode 100755
15
new file mode 100755
20
index XXXXXXX..XXXXXXX
16
index XXXXXXX..XXXXXXX
21
--- /dev/null
17
--- /dev/null
22
+++ b/tests/qemu-iotests/262
18
+++ b/tests/qemu-iotests/274
23
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
24
+#!/usr/bin/env python
20
+#!/usr/bin/env python3
25
+#
21
+#
26
+# Copyright (C) 2019 Red Hat, Inc.
22
+# Copyright (C) 2019 Red Hat, Inc.
27
+#
23
+#
28
+# This program is free software; you can redistribute it and/or modify
24
+# This program is free software; you can redistribute it and/or modify
29
+# it under the terms of the GNU General Public License as published by
25
+# it under the terms of the GNU General Public License as published by
...
...
38
+# You should have received a copy of the GNU General Public License
34
+# You should have received a copy of the GNU General Public License
39
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
35
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
40
+#
36
+#
41
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
37
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
42
+#
38
+#
43
+# Test migration with filter drivers present. Keep everything in an
39
+# Some tests for short backing files and short overlays
44
+# iothread just for fun.
45
+
40
+
46
+import iotests
41
+import iotests
47
+import os
48
+
42
+
49
+iotests.verify_image_format(supported_fmts=['qcow2'])
43
+iotests.verify_image_format(supported_fmts=['qcow2'])
50
+iotests.verify_platform(['linux'])
44
+iotests.verify_platform(['linux'])
51
+
45
+
52
+with iotests.FilePath('img') as img_path, \
46
+size_short = 1 * 1024 * 1024
53
+ iotests.FilePath('mig_fifo') as fifo, \
47
+size_long = 2 * 1024 * 1024
54
+ iotests.VM(path_suffix='a') as vm_a, \
48
+size_diff = size_long - size_short
55
+ iotests.VM(path_suffix='b') as vm_b:
49
+
56
+
50
+def create_chain() -> None:
57
+ def add_opts(vm):
51
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base,
58
+ vm.add_object('iothread,id=iothread0')
52
+ str(size_long))
59
+ vm.add_object('throttle-group,id=tg0,x-bps-total=65536')
53
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid,
60
+ vm.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
54
+ str(size_short))
61
+ vm.add_blockdev('%s,file=drive0-file,node-name=drive0-fmt' % (iotests.imgfmt))
55
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top,
62
+ vm.add_blockdev('copy-on-read,file=drive0-fmt,node-name=drive0-cor')
56
+ str(size_long))
63
+ vm.add_blockdev('throttle,file=drive0-cor,node-name=drive0-throttle,throttle-group=tg0')
57
+
64
+ vm.add_blockdev('blkdebug,image=drive0-throttle,node-name=drive0-dbg')
58
+ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base)
65
+ vm.add_blockdev('null-co,node-name=null,read-zeroes=on')
59
+
66
+ vm.add_blockdev('blkverify,test=drive0-dbg,raw=null,node-name=drive0-verify')
60
+def create_vm() -> iotests.VM:
67
+
61
+ vm = iotests.VM()
68
+ if iotests.supports_quorum():
62
+ vm.add_blockdev('file,filename=%s,node-name=base-file' % base)
69
+ vm.add_blockdev('quorum,children.0=drive0-verify,vote-threshold=1,node-name=drive0-quorum')
63
+ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt)
70
+ root = "drive0-quorum"
64
+ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid)
71
+ else:
65
+ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base'
72
+ root = "drive0-verify"
66
+ % iotests.imgfmt)
73
+
67
+ vm.add_drive(top, 'backing=mid,node-name=top')
74
+ vm.add_device('virtio-blk,drive=%s,iothread=iothread0' % root)
68
+ return vm
75
+
69
+
76
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, img_path, '64M')
70
+with iotests.FilePath('base') as base, \
77
+
71
+ iotests.FilePath('mid') as mid, \
78
+ os.mkfifo(fifo)
72
+ iotests.FilePath('top') as top:
79
+
73
+
80
+ iotests.log('Launching source VM...')
74
+ iotests.log('== Commit tests ==')
81
+ add_opts(vm_a)
75
+
82
+ vm_a.launch()
76
+ create_chain()
83
+
77
+
84
+ vm_a.enable_migration_events('A')
78
+ iotests.log('=== Check visible data ===')
85
+
79
+
86
+ iotests.log('Launching destination VM...')
80
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top)
87
+ add_opts(vm_b)
81
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top)
88
+ vm_b.add_incoming("exec: cat '%s'" % (fifo))
82
+
89
+ vm_b.launch()
83
+ iotests.log('=== Checking allocation status ===')
90
+
84
+
91
+ vm_b.enable_migration_events('B')
85
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
92
+
86
+ '-c', 'alloc %d %d' % (size_short, size_diff),
93
+ iotests.log('Starting migration to B...')
87
+ base)
94
+ iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo)))
88
+
95
+ with iotests.Timeout(3, 'Migration does not complete'):
89
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
96
+ # Wait for the source first (which includes setup=setup)
90
+ '-c', 'alloc %d %d' % (size_short, size_diff),
97
+ vm_a.wait_migration()
91
+ mid)
98
+ # Wait for the destination second (which does not)
92
+
99
+ vm_b.wait_migration()
93
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
100
+
94
+ '-c', 'alloc %d %d' % (size_short, size_diff),
101
+ iotests.log(vm_a.qmp('query-migrate')['return']['status'])
95
+ top)
102
+ iotests.log(vm_b.qmp('query-migrate')['return']['status'])
96
+
103
+
97
+ iotests.log('=== Checking map ===')
104
+ iotests.log(vm_a.qmp('query-status'))
98
+
105
+ iotests.log(vm_b.qmp('query-status'))
99
+ iotests.qemu_img_log('map', '--output=json', base)
106
diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out
100
+ iotests.qemu_img_log('map', '--output=human', base)
101
+ iotests.qemu_img_log('map', '--output=json', mid)
102
+ iotests.qemu_img_log('map', '--output=human', mid)
103
+ iotests.qemu_img_log('map', '--output=json', top)
104
+ iotests.qemu_img_log('map', '--output=human', top)
105
+
106
+ iotests.log('=== Testing qemu-img commit (top -> mid) ===')
107
+
108
+ iotests.qemu_img_log('commit', top)
109
+ iotests.img_info_log(mid)
110
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
111
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
112
+
113
+ iotests.log('=== Testing HMP commit (top -> mid) ===')
114
+
115
+ create_chain()
116
+ with create_vm() as vm:
117
+ vm.launch()
118
+ vm.qmp_log('human-monitor-command', command_line='commit drive0')
119
+
120
+ iotests.img_info_log(mid)
121
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
122
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
123
+
124
+ iotests.log('=== Testing QMP active commit (top -> mid) ===')
125
+
126
+ create_chain()
127
+ with create_vm() as vm:
128
+ vm.launch()
129
+ vm.qmp_log('block-commit', device='top', base_node='mid',
130
+ job_id='job0', auto_dismiss=False)
131
+ vm.run_job('job0', wait=5)
132
+
133
+ iotests.img_info_log(mid)
134
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
135
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
136
+
137
+
138
+ iotests.log('== Resize tests ==')
139
+
140
+ # Use different sizes for different allocation modes:
141
+ #
142
+ # We want to have at least one test where 32 bit truncation in the size of
143
+ # the overlapping area becomes visible. This is covered by the
144
+ # prealloc='off' case (1G to 6G is an overlap of 5G).
145
+ #
146
+ # However, we can only do this for modes that don't preallocate data
147
+ # because otherwise we might run out of space on the test host.
148
+ #
149
+ # We also want to test some unaligned combinations.
150
+ for (prealloc, base_size, top_size_old, top_size_new, off) in [
151
+ ('off', '6G', '1G', '8G', '5G'),
152
+ ('metadata', '32G', '30G', '33G', '31G'),
153
+ ('falloc', '10M', '5M', '15M', '9M'),
154
+ ('full', '16M', '8M', '12M', '11M'),
155
+ ('off', '384k', '253k', '512k', '253k'),
156
+ ('off', '400k', '256k', '512k', '336k'),
157
+ ('off', '512k', '256k', '500k', '436k')]:
158
+
159
+ iotests.log('=== preallocation=%s ===' % prealloc)
160
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size)
161
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top,
162
+ top_size_old)
163
+ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base)
164
+
165
+ # After this, top_size_old to base_size should be allocated/zeroed.
166
+ #
167
+ # In theory, leaving base_size to top_size_new unallocated would be
168
+ # correct, but in practice, if we zero out anything, we zero out
169
+ # everything up to top_size_new.
170
+ iotests.qemu_img_log('resize', '-f', iotests.imgfmt,
171
+ '--preallocation', prealloc, top, top_size_new)
172
+ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top)
173
+ iotests.qemu_io_log('-c', 'map', top)
174
+ iotests.qemu_img_log('map', '--output=json', top)
175
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
107
new file mode 100644
176
new file mode 100644
108
index XXXXXXX..XXXXXXX
177
index XXXXXXX..XXXXXXX
109
--- /dev/null
178
--- /dev/null
110
+++ b/tests/qemu-iotests/262.out
179
+++ b/tests/qemu-iotests/274.out
111
@@ -XXX,XX +XXX,XX @@
180
@@ -XXX,XX +XXX,XX @@
112
+Launching source VM...
181
+== Commit tests ==
113
+Enabling migration QMP events on A...
182
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
183
+
184
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
185
+
186
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
187
+
188
+wrote 2097152/2097152 bytes at offset 0
189
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
190
+
191
+=== Check visible data ===
192
+read 1048576/1048576 bytes at offset 0
193
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
194
+
195
+read 1048576/1048576 bytes at offset 1048576
196
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
197
+
198
+=== Checking allocation status ===
199
+1048576/1048576 bytes allocated at offset 0 bytes
200
+1048576/1048576 bytes allocated at offset 1 MiB
201
+
202
+0/1048576 bytes allocated at offset 0 bytes
203
+0/0 bytes allocated at offset 1 MiB
204
+
205
+0/1048576 bytes allocated at offset 0 bytes
206
+0/1048576 bytes allocated at offset 1 MiB
207
+
208
+=== Checking map ===
209
+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}]
210
+
211
+Offset Length Mapped to File
212
+0 0x200000 0x50000 TEST_DIR/PID-base
213
+
214
+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}]
215
+
216
+Offset Length Mapped to File
217
+0 0x100000 0x50000 TEST_DIR/PID-base
218
+
219
+[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680},
220
+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}]
221
+
222
+Offset Length Mapped to File
223
+0 0x100000 0x50000 TEST_DIR/PID-base
224
+
225
+=== Testing qemu-img commit (top -> mid) ===
226
+Image committed.
227
+
228
+image: TEST_IMG
229
+file format: IMGFMT
230
+virtual size: 2 MiB (2097152 bytes)
231
+cluster_size: 65536
232
+backing file: TEST_DIR/PID-base
233
+Format specific information:
234
+ compat: 1.1
235
+ lazy refcounts: false
236
+ refcount bits: 16
237
+ corrupt: false
238
+
239
+read 1048576/1048576 bytes at offset 0
240
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
241
+
242
+read 1048576/1048576 bytes at offset 1048576
243
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
244
+
245
+=== Testing HMP commit (top -> mid) ===
246
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
247
+
248
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
249
+
250
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
251
+
252
+wrote 2097152/2097152 bytes at offset 0
253
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
254
+
255
+{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}}
256
+{"return": ""}
257
+image: TEST_IMG
258
+file format: IMGFMT
259
+virtual size: 2 MiB (2097152 bytes)
260
+cluster_size: 65536
261
+backing file: TEST_DIR/PID-base
262
+Format specific information:
263
+ compat: 1.1
264
+ lazy refcounts: false
265
+ refcount bits: 16
266
+ corrupt: false
267
+
268
+read 1048576/1048576 bytes at offset 0
269
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
270
+
271
+read 1048576/1048576 bytes at offset 1048576
272
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
273
+
274
+=== Testing QMP active commit (top -> mid) ===
275
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
276
+
277
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
278
+
279
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
280
+
281
+wrote 2097152/2097152 bytes at offset 0
282
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
283
+
284
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}}
114
+{"return": {}}
285
+{"return": {}}
115
+Launching destination VM...
286
+{"execute": "job-complete", "arguments": {"id": "job0"}}
116
+Enabling migration QMP events on B...
117
+{"return": {}}
287
+{"return": {}}
118
+Starting migration to B...
288
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
289
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
290
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
119
+{"return": {}}
291
+{"return": {}}
120
+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
292
+image: TEST_IMG
121
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
293
+file format: IMGFMT
122
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
294
+virtual size: 2 MiB (2097152 bytes)
123
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
295
+cluster_size: 65536
124
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
296
+backing file: TEST_DIR/PID-base
125
+completed
297
+Format specific information:
126
+completed
298
+ compat: 1.1
127
+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}}
299
+ lazy refcounts: false
128
+{"return": {"running": true, "singlestep": false, "status": "running"}}
300
+ refcount bits: 16
301
+ corrupt: false
302
+
303
+read 1048576/1048576 bytes at offset 0
304
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
305
+
306
+read 1048576/1048576 bytes at offset 1048576
307
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
308
+
309
+== Resize tests ==
310
+=== preallocation=off ===
311
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16
312
+
313
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
314
+
315
+wrote 65536/65536 bytes at offset 5368709120
316
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
317
+
318
+Image resized.
319
+
320
+read 65536/65536 bytes at offset 5368709120
321
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
322
+
323
+1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
324
+7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000)
325
+
326
+[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false},
327
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}]
328
+
329
+=== preallocation=metadata ===
330
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16
331
+
332
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
333
+
334
+wrote 65536/65536 bytes at offset 33285996544
335
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
336
+
337
+Image resized.
338
+
339
+read 65536/65536 bytes at offset 33285996544
340
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
341
+
342
+30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
343
+3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000)
344
+
345
+[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false},
346
+{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680},
347
+{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128},
348
+{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576},
349
+{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024},
350
+{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008},
351
+{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}]
352
+
353
+=== preallocation=falloc ===
354
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16
355
+
356
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
357
+
358
+wrote 65536/65536 bytes at offset 9437184
359
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
360
+
361
+Image resized.
362
+
363
+read 65536/65536 bytes at offset 9437184
364
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
365
+
366
+5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
367
+10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
368
+
369
+[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
370
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
371
+
372
+=== preallocation=full ===
373
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
374
+
375
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
376
+
377
+wrote 65536/65536 bytes at offset 11534336
378
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
379
+
380
+Image resized.
381
+
382
+read 65536/65536 bytes at offset 11534336
383
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
384
+
385
+8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
386
+4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
387
+
388
+[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
389
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
390
+
391
+=== preallocation=off ===
392
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
393
+
394
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
395
+
396
+wrote 65536/65536 bytes at offset 259072
397
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
398
+
399
+Image resized.
400
+
401
+read 65536/65536 bytes at offset 259072
402
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
403
+
404
+192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
405
+320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000)
406
+
407
+[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false},
408
+{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680},
409
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
410
+
411
+=== preallocation=off ===
412
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16
413
+
414
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
415
+
416
+wrote 65536/65536 bytes at offset 344064
417
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
418
+
419
+Image resized.
420
+
421
+read 65536/65536 bytes at offset 344064
422
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
423
+
424
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
425
+256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000)
426
+
427
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
428
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
429
+
430
+=== preallocation=off ===
431
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16
432
+
433
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
434
+
435
+wrote 65536/65536 bytes at offset 446464
436
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
437
+
438
+Image resized.
439
+
440
+read 65536/65536 bytes at offset 446464
441
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
442
+
443
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
444
+244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000)
445
+
446
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
447
+{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}]
448
+
129
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
449
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
130
index XXXXXXX..XXXXXXX 100644
450
index XXXXXXX..XXXXXXX 100644
131
--- a/tests/qemu-iotests/group
451
--- a/tests/qemu-iotests/group
132
+++ b/tests/qemu-iotests/group
452
+++ b/tests/qemu-iotests/group
133
@@ -XXX,XX +XXX,XX @@
453
@@ -XXX,XX +XXX,XX @@
134
254 rw backing quick
454
270 rw backing quick
135
255 rw quick
455
272 rw
136
256 rw quick
456
273 backing quick
137
+262 rw quick migration
457
+274 rw backing
458
277 rw quick
459
279 rw backing quick
460
280 rw migration quick
138
--
461
--
139
2.20.1
462
2.25.3
140
463
141
464
diff view generated by jsdifflib
1
We're getting a ridiculous number of child classes of
1
The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the
2
TestInitiallyFilled and TestInitiallyEmpty that differ only in a few
2
image is possibly preallocated and then the zero flag is added to all
3
attributes that we want to test in all combinations.
3
clusters. This means that a copy-on-write operation may be needed when
4
writing to these clusters, despite having used preallocation, negating
5
one of the major benefits of preallocation.
4
6
5
Instead of explicitly writing down every combination, let's use a loop
7
Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver,
6
and create those classes dynamically.
8
and if the protocol driver can ensure that the new area reads as zeros,
9
we can skip setting the zero flag in the qcow2 layer.
10
11
Unfortunately, the same approach doesn't work for metadata
12
preallocation, so we'll still set the zero flag there.
7
13
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Message-Id: <20200424142701.67053-1-kwolf@redhat.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
19
---
11
tests/qemu-iotests/118 | 69 +++++++++++++-----------------------------
20
block/qcow2.c | 22 +++++++++++++++++++---
12
1 file changed, 21 insertions(+), 48 deletions(-)
21
tests/qemu-iotests/274.out | 4 ++--
22
2 files changed, 21 insertions(+), 5 deletions(-)
13
23
14
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
24
diff --git a/block/qcow2.c b/block/qcow2.c
15
index XXXXXXX..XXXXXXX 100755
25
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qemu-iotests/118
26
--- a/block/qcow2.c
17
+++ b/tests/qemu-iotests/118
27
+++ b/block/qcow2.c
18
@@ -XXX,XX +XXX,XX @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
19
class TestInitiallyFilled(GeneralChangeTestsBaseClass):
29
/* Allocate the data area */
20
was_empty = False
30
new_file_size = allocation_start +
21
31
nb_new_data_clusters * s->cluster_size;
22
- def setUp(self, media, interface):
32
- /* Image file grows, so @exact does not matter */
23
+ def setUp(self):
33
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
24
qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
34
- errp);
25
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
35
+ /*
26
self.vm = iotests.VM()
36
+ * Image file grows, so @exact does not matter.
27
- self.vm.add_drive(old_img, 'media=%s' % media, 'none')
37
+ *
28
- if interface == 'scsi':
38
+ * If we need to zero out the new area, try first whether the protocol
29
+ self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
39
+ * driver can already take care of this.
30
+ if self.interface == 'scsi':
40
+ */
31
self.vm.add_device('virtio-scsi-pci')
41
+ if (flags & BDRV_REQ_ZERO_WRITE) {
32
self.vm.add_device('%s,drive=drive0,id=%s' %
42
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
33
- (interface_to_device_name(interface),
43
+ BDRV_REQ_ZERO_WRITE, NULL);
34
+ (interface_to_device_name(self.interface),
44
+ if (ret >= 0) {
35
self.device_name))
45
+ flags &= ~BDRV_REQ_ZERO_WRITE;
36
self.vm.launch()
46
+ }
37
47
+ } else {
38
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
48
+ ret = -1;
39
class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
49
+ }
40
was_empty = True
50
+ if (ret < 0) {
41
51
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
42
- def setUp(self, media, interface):
52
+ errp);
43
+ def setUp(self):
53
+ }
44
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
54
if (ret < 0) {
45
- self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none')
55
error_prepend(errp, "Failed to resize underlying file: ");
46
- if interface == 'scsi':
56
qcow2_free_clusters(bs, allocation_start,
47
+ self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none')
57
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
48
+ if self.interface == 'scsi':
58
index XXXXXXX..XXXXXXX 100644
49
self.vm.add_device('virtio-scsi-pci')
59
--- a/tests/qemu-iotests/274.out
50
self.vm.add_device('%s,drive=drive0,id=%s' %
60
+++ b/tests/qemu-iotests/274.out
51
- (interface_to_device_name(interface),
61
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184
52
+ (interface_to_device_name(self.interface),
62
10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
53
self.device_name))
63
54
self.vm.launch()
64
[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
55
65
-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
56
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
66
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}]
57
# Should be a no-op
67
58
self.assert_qmp(result, 'return', {})
68
=== preallocation=full ===
59
69
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
60
-class TestCDInitiallyFilled(TestInitiallyFilled):
70
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336
61
- TestInitiallyFilled = TestInitiallyFilled
71
4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
62
- has_real_tray = True
72
63
-
73
[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
64
- def setUp(self):
74
-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
65
- self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide')
75
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}]
66
-
76
67
-class TestCDInitiallyEmpty(TestInitiallyEmpty):
77
=== preallocation=off ===
68
- TestInitiallyEmpty = TestInitiallyEmpty
78
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
69
- has_real_tray = True
70
-
71
- def setUp(self):
72
- self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide')
73
+# Do this in a function to avoid leaking variables like case into the global
74
+# name space (otherwise tests would be run for the abstract base classes)
75
+def create_basic_test_classes():
76
+ for (media, interface, has_real_tray) in [ ('cdrom', 'ide', True),
77
+ ('cdrom', 'scsi', True),
78
+ ('disk', 'floppy', False) ]:
79
80
-class TestSCSICDInitiallyFilled(TestInitiallyFilled):
81
- TestInitiallyFilled = TestInitiallyFilled
82
- has_real_tray = True
83
+ for case in [ TestInitiallyFilled, TestInitiallyEmpty ]:
84
85
- def setUp(self):
86
- self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi')
87
+ attr = { 'media': media,
88
+ 'interface': interface,
89
+ 'has_real_tray': has_real_tray }
90
91
-class TestSCSICDInitiallyEmpty(TestInitiallyEmpty):
92
- TestInitiallyEmpty = TestInitiallyEmpty
93
- has_real_tray = True
94
+ name = '%s_%s_%s' % (case.__name__, media, interface)
95
+ globals()[name] = type(name, (case, ), attr)
96
97
- def setUp(self):
98
- self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi')
99
-
100
-class TestFloppyInitiallyFilled(TestInitiallyFilled):
101
- TestInitiallyFilled = TestInitiallyFilled
102
- has_real_tray = False
103
-
104
- def setUp(self):
105
- self.TestInitiallyFilled.setUp(self, 'disk', 'floppy')
106
-
107
-class TestFloppyInitiallyEmpty(TestInitiallyEmpty):
108
- TestInitiallyEmpty = TestInitiallyEmpty
109
- has_real_tray = False
110
-
111
- def setUp(self):
112
- self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy')
113
- # FDDs not having a real tray and there not being a medium inside the
114
- # tray at startup means the tray will be considered open
115
- self.has_opened = True
116
+create_basic_test_classes()
117
118
class TestChangeReadOnly(ChangeBaseClass):
119
device_name = 'qdev0'
120
--
79
--
121
2.20.1
80
2.25.3
122
81
123
82
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
2
2
3
Currently, bdrv_replace_child_noperm() undrains the parent until it is
3
This patch introduces support for PMR that has been defined as part of NVMe 1.4
4
completely undrained, then re-drains it after attaching the new child
4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
5
node.
5
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
6
device. Guest OS can perform mmio read and writes to the PMR region that will stay
7
persistent across system reboot.
6
8
7
This is a problem with bdrv_drop_intermediate(): We want to keep the
9
Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
8
whole subtree drained, including parents, while the operation is
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
9
under way. bdrv_replace_child_noperm() breaks this by allowing every
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
parent to become unquiesced briefly, and then redraining it.
12
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
11
13
Reviewed-by: Keith Busch <kbusch@kernel.org>
12
In fact, there is no reason why the parent should become unquiesced and
13
be allowed to submit requests to the new child node if that new node is
14
supposed to be kept drained. So if anything, we have to drain the
15
parent before detaching the old child node. Conversely, we have to
16
undrain it only after attaching the new child node.
17
18
Thus, change the whole drain algorithm here: Calculate the number of
19
times we have to drain/undrain the parent before replacing the child
20
node then drain it (if necessary), replace the child node, and then
21
undrain it.
22
23
Signed-off-by: Max Reitz <mreitz@redhat.com>
24
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
25
---
15
---
26
block.c | 49 +++++++++++++++++++++++++++++++++----------------
16
hw/block/nvme.h | 2 +
27
1 file changed, 33 insertions(+), 16 deletions(-)
17
include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++
18
hw/block/nvme.c | 109 ++++++++++++++++++++++++++
19
hw/block/Makefile.objs | 2 +-
20
hw/block/trace-events | 4 +
21
5 files changed, 288 insertions(+), 1 deletion(-)
28
22
29
diff --git a/block.c b/block.c
23
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
30
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
31
--- a/block.c
25
--- a/hw/block/nvme.h
32
+++ b/block.c
26
+++ b/hw/block/nvme.h
33
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
27
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl {
34
BlockDriverState *new_bs)
28
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
35
{
29
36
BlockDriverState *old_bs = child->bs;
30
char *serial;
37
- int i;
31
+ HostMemoryBackend *pmrdev;
38
+ int new_bs_quiesce_counter;
32
+
39
+ int drain_saldo;
33
NvmeNamespace *namespaces;
40
34
NvmeSQueue **sq;
41
assert(!child->frozen);
35
NvmeCQueue **cq;
42
36
diff --git a/include/block/nvme.h b/include/block/nvme.h
43
if (old_bs && new_bs) {
37
index XXXXXXX..XXXXXXX 100644
44
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
38
--- a/include/block/nvme.h
39
+++ b/include/block/nvme.h
40
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar {
41
uint64_t acq;
42
uint32_t cmbloc;
43
uint32_t cmbsz;
44
+ uint8_t padding[3520]; /* not used by QEMU */
45
+ uint32_t pmrcap;
46
+ uint32_t pmrctl;
47
+ uint32_t pmrsts;
48
+ uint32_t pmrebs;
49
+ uint32_t pmrswtp;
50
+ uint32_t pmrmsc;
51
} NvmeBar;
52
53
enum NvmeCapShift {
54
@@ -XXX,XX +XXX,XX @@ enum NvmeCapShift {
55
CAP_CSS_SHIFT = 37,
56
CAP_MPSMIN_SHIFT = 48,
57
CAP_MPSMAX_SHIFT = 52,
58
+ CAP_PMR_SHIFT = 56,
59
};
60
61
enum NvmeCapMask {
62
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
63
CAP_CSS_MASK = 0xff,
64
CAP_MPSMIN_MASK = 0xf,
65
CAP_MPSMAX_MASK = 0xf,
66
+ CAP_PMR_MASK = 0x1,
67
};
68
69
#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK)
70
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
71
<< CAP_MPSMIN_SHIFT)
72
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
73
<< CAP_MPSMAX_SHIFT)
74
+#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
75
+ << CAP_PMR_SHIFT)
76
77
enum NvmeCcShift {
78
CC_EN_SHIFT = 0,
79
@@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask {
80
#define NVME_CMBSZ_GETSIZE(cmbsz) \
81
(NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
82
83
+enum NvmePmrcapShift {
84
+ PMRCAP_RDS_SHIFT = 3,
85
+ PMRCAP_WDS_SHIFT = 4,
86
+ PMRCAP_BIR_SHIFT = 5,
87
+ PMRCAP_PMRTU_SHIFT = 8,
88
+ PMRCAP_PMRWBM_SHIFT = 10,
89
+ PMRCAP_PMRTO_SHIFT = 16,
90
+ PMRCAP_CMSS_SHIFT = 24,
91
+};
92
+
93
+enum NvmePmrcapMask {
94
+ PMRCAP_RDS_MASK = 0x1,
95
+ PMRCAP_WDS_MASK = 0x1,
96
+ PMRCAP_BIR_MASK = 0x7,
97
+ PMRCAP_PMRTU_MASK = 0x3,
98
+ PMRCAP_PMRWBM_MASK = 0xf,
99
+ PMRCAP_PMRTO_MASK = 0xff,
100
+ PMRCAP_CMSS_MASK = 0x1,
101
+};
102
+
103
+#define NVME_PMRCAP_RDS(pmrcap) \
104
+ ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK)
105
+#define NVME_PMRCAP_WDS(pmrcap) \
106
+ ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK)
107
+#define NVME_PMRCAP_BIR(pmrcap) \
108
+ ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK)
109
+#define NVME_PMRCAP_PMRTU(pmrcap) \
110
+ ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK)
111
+#define NVME_PMRCAP_PMRWBM(pmrcap) \
112
+ ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK)
113
+#define NVME_PMRCAP_PMRTO(pmrcap) \
114
+ ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK)
115
+#define NVME_PMRCAP_CMSS(pmrcap) \
116
+ ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK)
117
+
118
+#define NVME_PMRCAP_SET_RDS(pmrcap, val) \
119
+ (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT)
120
+#define NVME_PMRCAP_SET_WDS(pmrcap, val) \
121
+ (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT)
122
+#define NVME_PMRCAP_SET_BIR(pmrcap, val) \
123
+ (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT)
124
+#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \
125
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT)
126
+#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \
127
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT)
128
+#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \
129
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT)
130
+#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \
131
+ (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT)
132
+
133
+enum NvmePmrctlShift {
134
+ PMRCTL_EN_SHIFT = 0,
135
+};
136
+
137
+enum NvmePmrctlMask {
138
+ PMRCTL_EN_MASK = 0x1,
139
+};
140
+
141
+#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK)
142
+
143
+#define NVME_PMRCTL_SET_EN(pmrctl, val) \
144
+ (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT)
145
+
146
+enum NvmePmrstsShift {
147
+ PMRSTS_ERR_SHIFT = 0,
148
+ PMRSTS_NRDY_SHIFT = 8,
149
+ PMRSTS_HSTS_SHIFT = 9,
150
+ PMRSTS_CBAI_SHIFT = 12,
151
+};
152
+
153
+enum NvmePmrstsMask {
154
+ PMRSTS_ERR_MASK = 0xff,
155
+ PMRSTS_NRDY_MASK = 0x1,
156
+ PMRSTS_HSTS_MASK = 0x7,
157
+ PMRSTS_CBAI_MASK = 0x1,
158
+};
159
+
160
+#define NVME_PMRSTS_ERR(pmrsts) \
161
+ ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK)
162
+#define NVME_PMRSTS_NRDY(pmrsts) \
163
+ ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK)
164
+#define NVME_PMRSTS_HSTS(pmrsts) \
165
+ ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK)
166
+#define NVME_PMRSTS_CBAI(pmrsts) \
167
+ ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK)
168
+
169
+#define NVME_PMRSTS_SET_ERR(pmrsts, val) \
170
+ (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT)
171
+#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \
172
+ (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT)
173
+#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \
174
+ (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT)
175
+#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \
176
+ (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT)
177
+
178
+enum NvmePmrebsShift {
179
+ PMREBS_PMRSZU_SHIFT = 0,
180
+ PMREBS_RBB_SHIFT = 4,
181
+ PMREBS_PMRWBZ_SHIFT = 8,
182
+};
183
+
184
+enum NvmePmrebsMask {
185
+ PMREBS_PMRSZU_MASK = 0xf,
186
+ PMREBS_RBB_MASK = 0x1,
187
+ PMREBS_PMRWBZ_MASK = 0xffffff,
188
+};
189
+
190
+#define NVME_PMREBS_PMRSZU(pmrebs) \
191
+ ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK)
192
+#define NVME_PMREBS_RBB(pmrebs) \
193
+ ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK)
194
+#define NVME_PMREBS_PMRWBZ(pmrebs) \
195
+ ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK)
196
+
197
+#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \
198
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT)
199
+#define NVME_PMREBS_SET_RBB(pmrebs, val) \
200
+ (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT)
201
+#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \
202
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT)
203
+
204
+enum NvmePmrswtpShift {
205
+ PMRSWTP_PMRSWTU_SHIFT = 0,
206
+ PMRSWTP_PMRSWTV_SHIFT = 8,
207
+};
208
+
209
+enum NvmePmrswtpMask {
210
+ PMRSWTP_PMRSWTU_MASK = 0xf,
211
+ PMRSWTP_PMRSWTV_MASK = 0xffffff,
212
+};
213
+
214
+#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \
215
+ ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK)
216
+#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \
217
+ ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK)
218
+
219
+#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \
220
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT)
221
+#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \
222
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
223
+
224
+enum NvmePmrmscShift {
225
+ PMRMSC_CMSE_SHIFT = 1,
226
+ PMRMSC_CBA_SHIFT = 12,
227
+};
228
+
229
+enum NvmePmrmscMask {
230
+ PMRMSC_CMSE_MASK = 0x1,
231
+ PMRMSC_CBA_MASK = 0xfffffffffffff,
232
+};
233
+
234
+#define NVME_PMRMSC_CMSE(pmrmsc) \
235
+ ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK)
236
+#define NVME_PMRMSC_CBA(pmrmsc) \
237
+ ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK)
238
+
239
+#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \
240
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
241
+#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \
242
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
243
+
244
typedef struct NvmeCmd {
245
uint8_t opcode;
246
uint8_t fuse;
247
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
248
index XXXXXXX..XXXXXXX 100644
249
--- a/hw/block/nvme.c
250
+++ b/hw/block/nvme.c
251
@@ -XXX,XX +XXX,XX @@
252
* -drive file=<file>,if=none,id=<drive_id>
253
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
254
* cmb_size_mb=<cmb_size_mb[optional]>, \
255
+ * [pmrdev=<mem_backend_file_id>,] \
256
* num_queues=<N[optional]>
257
*
258
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
259
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
260
+ *
261
+ * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
262
+ * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
263
+ * both provided.
264
+ * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
265
+ * For example:
266
+ * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
267
+ * size=<size> .... -device nvme,...,pmrdev=<mem_id>
268
*/
269
270
#include "qemu/osdep.h"
271
@@ -XXX,XX +XXX,XX @@
272
#include "sysemu/sysemu.h"
273
#include "qapi/error.h"
274
#include "qapi/visitor.h"
275
+#include "sysemu/hostmem.h"
276
#include "sysemu/block-backend.h"
277
+#include "exec/ram_addr.h"
278
279
#include "qemu/log.h"
280
#include "qemu/module.h"
281
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
282
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
283
"invalid write to read only CMBSZ, ignored");
284
return;
285
+ case 0xE00: /* PMRCAP */
286
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
287
+ "invalid write to PMRCAP register, ignored");
288
+ return;
289
+ case 0xE04: /* TODO PMRCTL */
290
+ break;
291
+ case 0xE08: /* PMRSTS */
292
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
293
+ "invalid write to PMRSTS register, ignored");
294
+ return;
295
+ case 0xE0C: /* PMREBS */
296
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
297
+ "invalid write to PMREBS register, ignored");
298
+ return;
299
+ case 0xE10: /* PMRSWTP */
300
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
301
+ "invalid write to PMRSWTP register, ignored");
302
+ return;
303
+ case 0xE14: /* TODO PMRMSC */
304
+ break;
305
default:
306
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
307
"invalid MMIO write,"
308
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
45
}
309
}
46
+
310
47
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
311
if (addr < sizeof(n->bar)) {
48
+ drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
312
+ /*
49
+
313
+ * When PMRWBM bit 1 is set then read from
50
+ /*
314
+ * from PMRSTS should ensure prior writes
51
+ * If the new child node is drained but the old one was not, flush
315
+ * made it to persistent media
52
+ * all outstanding requests to the old child node.
316
+ */
53
+ */
317
+ if (addr == 0xE08 &&
54
+ while (drain_saldo > 0 && child->role->drained_begin) {
318
+ (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
55
+ bdrv_parent_drained_begin_single(child, true);
319
+ qemu_ram_writeback(n->pmrdev->mr.ram_block,
56
+ drain_saldo--;
320
+ 0, n->pmrdev->size);
321
+ }
322
memcpy(&val, ptr + addr, size);
323
} else {
324
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
325
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
326
error_setg(errp, "serial property not set");
327
return;
328
}
329
+
330
+ if (!n->cmb_size_mb && n->pmrdev) {
331
+ if (host_memory_backend_is_mapped(n->pmrdev)) {
332
+ char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
333
+ error_setg(errp, "can't use already busy memdev: %s", path);
334
+ g_free(path);
335
+ return;
336
+ }
337
+
338
+ if (!is_power_of_2(n->pmrdev->size)) {
339
+ error_setg(errp, "pmr backend size needs to be power of 2 in size");
340
+ return;
341
+ }
342
+
343
+ host_memory_backend_set_mapped(n->pmrdev, true);
57
+ }
344
+ }
58
+
345
+
59
if (old_bs) {
346
blkconf_blocksizes(&n->conf);
60
/* Detach first so that the recursive drain sections coming from @child
347
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
61
* are already gone and we only end the drain sections that came from
348
false, errp)) {
62
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
349
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
63
if (child->role->detach) {
350
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
64
child->role->detach(child);
351
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
65
}
352
66
- while (child->parent_quiesce_counter) {
353
+ } else if (n->pmrdev) {
67
- bdrv_parent_drained_end_single(child);
354
+ /* Controller Capabilities register */
68
- }
355
+ NVME_CAP_SET_PMRS(n->bar.cap, 1);
69
QLIST_REMOVE(child, next_parent);
356
+
70
- } else {
357
+ /* PMR Capabities register */
71
- assert(child->parent_quiesce_counter == 0);
358
+ n->bar.pmrcap = 0;
359
+ NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
360
+ NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
361
+ NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
362
+ NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
363
+ /* Turn on bit 1 support */
364
+ NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
365
+ NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
366
+ NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
367
+
368
+ /* PMR Control register */
369
+ n->bar.pmrctl = 0;
370
+ NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
371
+
372
+ /* PMR Status register */
373
+ n->bar.pmrsts = 0;
374
+ NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
375
+ NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
376
+ NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
377
+ NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
378
+
379
+ /* PMR Elasticity Buffer Size register */
380
+ n->bar.pmrebs = 0;
381
+ NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
382
+ NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
383
+ NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
384
+
385
+ /* PMR Sustained Write Throughput register */
386
+ n->bar.pmrswtp = 0;
387
+ NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
388
+ NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
389
+
390
+ /* PMR Memory Space Control register */
391
+ n->bar.pmrmsc = 0;
392
+ NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
393
+ NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
394
+
395
+ pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
396
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
397
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
72
}
398
}
73
399
74
child->bs = new_bs;
400
for (i = 0; i < n->num_namespaces; i++) {
75
401
@@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev)
76
if (new_bs) {
402
if (n->cmb_size_mb) {
77
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
403
g_free(n->cmbuf);
78
- if (new_bs->quiesce_counter) {
79
- int num = new_bs->quiesce_counter;
80
- if (child->role->parent_is_bds) {
81
- num -= bdrv_drain_all_count;
82
- }
83
- assert(num >= 0);
84
- for (i = 0; i < num; i++) {
85
- bdrv_parent_drained_begin_single(child, true);
86
- }
87
- }
88
+
89
+ /*
90
+ * Detaching the old node may have led to the new node's
91
+ * quiesce_counter having been decreased. Not a problem, we
92
+ * just need to recognize this here and then invoke
93
+ * drained_end appropriately more often.
94
+ */
95
+ assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
96
+ drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
97
98
/* Attach only after starting new drained sections, so that recursive
99
* drain sections coming from @child don't get an extra .drained_begin
100
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
101
child->role->attach(child);
102
}
103
}
404
}
104
+
405
+
105
+ /*
406
+ if (n->pmrdev) {
106
+ * If the old child node was drained but the new one is not, allow
407
+ host_memory_backend_set_mapped(n->pmrdev, false);
107
+ * requests to come in only after the new node has been attached.
108
+ */
109
+ while (drain_saldo < 0 && child->role->drained_end) {
110
+ bdrv_parent_drained_end_single(child);
111
+ drain_saldo++;
112
+ }
408
+ }
409
msix_uninit_exclusive_bar(pci_dev);
113
}
410
}
114
411
115
/*
412
static Property nvme_props[] = {
413
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
414
+ DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
415
+ HostMemoryBackend *),
416
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
417
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
418
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
419
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
420
index XXXXXXX..XXXXXXX 100644
421
--- a/hw/block/Makefile.objs
422
+++ b/hw/block/Makefile.objs
423
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
424
common-obj-$(CONFIG_XEN) += xen-block.o
425
common-obj-$(CONFIG_ECC) += ecc.o
426
common-obj-$(CONFIG_ONENAND) += onenand.o
427
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
428
common-obj-$(CONFIG_SWIM) += swim.o
429
430
common-obj-$(CONFIG_SH4) += tc58128.o
431
432
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
433
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
434
+obj-$(CONFIG_NVME_PCI) += nvme.o
435
436
obj-y += dataplane/
437
diff --git a/hw/block/trace-events b/hw/block/trace-events
438
index XXXXXXX..XXXXXXX 100644
439
--- a/hw/block/trace-events
440
+++ b/hw/block/trace-events
441
@@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
442
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
443
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
444
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
445
+nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
446
+nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
447
+nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
448
+nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
449
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
450
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
451
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
116
--
452
--
117
2.20.1
453
2.25.3
118
454
119
455
diff view generated by jsdifflib
1
The same change as commit 2b23f28639 ('block/copy-on-read: Fix
1
The QMP handler qmp_object_add() and the implementation of --object in
2
permissions for inactive node') made for the copy-on-read driver can be
2
qemu-storage-daemon can share most of the code. Currently,
3
made for bdrv_filter_default_perms(): Retaining the old permissions from
3
qemu-storage-daemon calls qmp_object_add(), but this is not correct
4
the BdrvChild if it is given complicates things unnecessarily when in
4
because different visitors need to be used.
5
the end this only means that the options set in the c == NULL case (i.e.
5
6
during child creation) are retained.
6
As a first step towards a fix, make qmp_object_add() a wrapper around a
7
new function user_creatable_add_dict() that can get an additional
8
parameter. The handling of "props" is only required for compatibility
9
and not required for the qemu-storage-daemon command line, so it stays
10
in qmp_object_add().
7
11
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
13
---
12
block.c | 12 ++----------
14
include/qom/object_interfaces.h | 12 ++++++++++++
13
1 file changed, 2 insertions(+), 10 deletions(-)
15
qom/object_interfaces.c | 27 +++++++++++++++++++++++++++
16
qom/qom-qmp-cmds.c | 24 +-----------------------
17
3 files changed, 40 insertions(+), 23 deletions(-)
14
18
15
diff --git a/block.c b/block.c
19
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/block.c
21
--- a/include/qom/object_interfaces.h
18
+++ b/block.c
22
+++ b/include/qom/object_interfaces.h
19
@@ -XXX,XX +XXX,XX @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
23
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
20
uint64_t perm, uint64_t shared,
24
const QDict *qdict,
21
uint64_t *nperm, uint64_t *nshared)
25
Visitor *v, Error **errp);
26
27
+/**
28
+ * user_creatable_add_dict:
29
+ * @qdict: the object definition
30
+ * @errp: if an error occurs, a pointer to an area to store the error
31
+ *
32
+ * Create an instance of the user creatable object that is defined by
33
+ * @qdict. The object type is taken from the QDict key 'qom-type', its
34
+ * ID from the key 'id'. The remaining entries in @qdict are used to
35
+ * initialize the object properties.
36
+ */
37
+void user_creatable_add_dict(QDict *qdict, Error **errp);
38
+
39
/**
40
* user_creatable_add_opts:
41
* @opts: the object definition
42
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qom/object_interfaces.c
45
+++ b/qom/object_interfaces.c
46
@@ -XXX,XX +XXX,XX @@
47
#include "qapi/qmp/qerror.h"
48
#include "qapi/qmp/qjson.h"
49
#include "qapi/qmp/qstring.h"
50
+#include "qapi/qobject-input-visitor.h"
51
#include "qom/object_interfaces.h"
52
#include "qemu/help_option.h"
53
#include "qemu/module.h"
54
@@ -XXX,XX +XXX,XX @@ out:
55
return obj;
56
}
57
58
+void user_creatable_add_dict(QDict *qdict, Error **errp)
59
+{
60
+ Visitor *v;
61
+ Object *obj;
62
+ g_autofree char *type = NULL;
63
+ g_autofree char *id = NULL;
64
+
65
+ type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
66
+ if (!type) {
67
+ error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
68
+ return;
69
+ }
70
+ qdict_del(qdict, "qom-type");
71
+
72
+ id = g_strdup(qdict_get_try_str(qdict, "id"));
73
+ if (!id) {
74
+ error_setg(errp, QERR_MISSING_PARAMETER, "id");
75
+ return;
76
+ }
77
+ qdict_del(qdict, "id");
78
+
79
+ v = qobject_input_visitor_new(QOBJECT(qdict));
80
+ obj = user_creatable_add_type(type, id, qdict, v, errp);
81
+ visit_free(v);
82
+ object_unref(obj);
83
+}
84
85
Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
22
{
86
{
23
- if (c == NULL) {
87
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
24
- *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
88
index XXXXXXX..XXXXXXX 100644
25
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
89
--- a/qom/qom-qmp-cmds.c
90
+++ b/qom/qom-qmp-cmds.c
91
@@ -XXX,XX +XXX,XX @@
92
#include "qapi/qapi-commands-qom.h"
93
#include "qapi/qmp/qdict.h"
94
#include "qapi/qmp/qerror.h"
95
-#include "qapi/qobject-input-visitor.h"
96
#include "qemu/cutils.h"
97
#include "qom/object_interfaces.h"
98
#include "qom/qom-qobject.h"
99
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
100
{
101
QObject *props;
102
QDict *pdict;
103
- Visitor *v;
104
- Object *obj;
105
- g_autofree char *type = NULL;
106
- g_autofree char *id = NULL;
107
-
108
- type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
109
- if (!type) {
110
- error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
26
- return;
111
- return;
27
- }
112
- }
113
- qdict_del(qdict, "qom-type");
28
-
114
-
29
- *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
115
- id = g_strdup(qdict_get_try_str(qdict, "id"));
30
- (c->perm & DEFAULT_PERM_UNCHANGED);
116
- if (!id) {
31
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
117
- error_setg(errp, QERR_MISSING_PARAMETER, "id");
32
- (c->shared_perm & DEFAULT_PERM_UNCHANGED);
118
- return;
33
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
119
- }
34
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
120
- qdict_del(qdict, "id");
121
122
props = qdict_get(qdict, "props");
123
if (props) {
124
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
125
qobject_unref(pdict);
126
}
127
128
- v = qobject_input_visitor_new(QOBJECT(qdict));
129
- obj = user_creatable_add_type(type, id, qdict, v, errp);
130
- visit_free(v);
131
- object_unref(obj);
132
+ user_creatable_add_dict(qdict, errp);
35
}
133
}
36
134
37
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
135
void qmp_object_del(const char *id, Error **errp)
38
--
136
--
39
2.20.1
137
2.25.3
40
138
41
139
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
After processing the option string with the keyval parser, we get a
2
QDict that contains only strings. This QDict must be fed to a keyval
3
visitor which converts the strings into the right data types.
2
4
3
Signed-off-by: Max Reitz <mreitz@redhat.com>
5
qmp_object_add(), however, uses the normal QObject input visitor, which
6
expects a QDict where all properties already have the QType that matches
7
the data type required by the QOM object type.
8
9
Change the --object implementation in qemu-storage-daemon so that it
10
doesn't call qmp_object_add(), but calls user_creatable_add_dict()
11
directly instead and pass it a new keyval boolean that decides which
12
visitor must be used.
13
14
Reported-by: Coiby Xu <coiby.xu@gmail.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
---
16
---
6
tests/test-bdrv-drain.c | 167 ++++++++++++++++++++++++++++++++++++++++
17
include/qom/object_interfaces.h | 6 +++++-
7
1 file changed, 167 insertions(+)
18
qemu-storage-daemon.c | 4 +---
19
qom/object_interfaces.c | 8 ++++++--
20
qom/qom-qmp-cmds.c | 2 +-
21
4 files changed, 13 insertions(+), 7 deletions(-)
8
22
9
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
23
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
10
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
11
--- a/tests/test-bdrv-drain.c
25
--- a/include/qom/object_interfaces.h
12
+++ b/tests/test-bdrv-drain.c
26
+++ b/include/qom/object_interfaces.h
13
@@ -XXX,XX +XXX,XX @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c,
27
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
14
nperm, nshared);
28
/**
29
* user_creatable_add_dict:
30
* @qdict: the object definition
31
+ * @keyval: if true, use a keyval visitor for processing @qdict (i.e.
32
+ * assume that all @qdict values are strings); otherwise, use
33
+ * the normal QObject visitor (i.e. assume all @qdict values
34
+ * have the QType expected by the QOM object type)
35
* @errp: if an error occurs, a pointer to an area to store the error
36
*
37
* Create an instance of the user creatable object that is defined by
38
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
39
* ID from the key 'id'. The remaining entries in @qdict are used to
40
* initialize the object properties.
41
*/
42
-void user_creatable_add_dict(QDict *qdict, Error **errp);
43
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp);
44
45
/**
46
* user_creatable_add_opts:
47
diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/qemu-storage-daemon.c
50
+++ b/qemu-storage-daemon.c
51
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
52
QemuOpts *opts;
53
const char *type;
54
QDict *args;
55
- QObject *ret_data = NULL;
56
57
/* FIXME The keyval parser rejects 'help' arguments, so we must
58
* unconditionall try QemuOpts first. */
59
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
60
qemu_opts_del(opts);
61
62
args = keyval_parse(optarg, "qom-type", &error_fatal);
63
- qmp_object_add(args, &ret_data, &error_fatal);
64
+ user_creatable_add_dict(args, true, &error_fatal);
65
qobject_unref(args);
66
- qobject_unref(ret_data);
67
break;
68
}
69
default:
70
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/qom/object_interfaces.c
73
+++ b/qom/object_interfaces.c
74
@@ -XXX,XX +XXX,XX @@ out:
75
return obj;
15
}
76
}
16
77
17
+static int bdrv_test_change_backing_file(BlockDriverState *bs,
78
-void user_creatable_add_dict(QDict *qdict, Error **errp)
18
+ const char *backing_file,
79
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
19
+ const char *backing_fmt)
80
{
20
+{
81
Visitor *v;
21
+ return 0;
82
Object *obj;
22
+}
83
@@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp)
23
+
84
}
24
static BlockDriver bdrv_test = {
85
qdict_del(qdict, "id");
25
.format_name = "test",
86
26
.instance_size = sizeof(BDRVTestState),
87
- v = qobject_input_visitor_new(QOBJECT(qdict));
27
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
88
+ if (keyval) {
28
.bdrv_co_drain_end = bdrv_test_co_drain_end,
89
+ v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
29
90
+ } else {
30
.bdrv_child_perm = bdrv_test_child_perm,
91
+ v = qobject_input_visitor_new(QOBJECT(qdict));
31
+
92
+ }
32
+ .bdrv_change_backing_file = bdrv_test_change_backing_file,
93
obj = user_creatable_add_type(type, id, qdict, v, errp);
33
};
94
visit_free(v);
34
95
object_unref(obj);
35
static void aio_ret_cb(void *opaque, int ret)
96
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_commit_by_drained_end(void)
97
index XXXXXXX..XXXXXXX 100644
37
bdrv_unref(bs_child);
98
--- a/qom/qom-qmp-cmds.c
99
+++ b/qom/qom-qmp-cmds.c
100
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
101
qobject_unref(pdict);
102
}
103
104
- user_creatable_add_dict(qdict, errp);
105
+ user_creatable_add_dict(qdict, false, errp);
38
}
106
}
39
107
40
+
108
void qmp_object_del(const char *id, Error **errp)
41
+typedef struct TestSimpleBlockJob {
42
+ BlockJob common;
43
+ bool should_complete;
44
+ bool *did_complete;
45
+} TestSimpleBlockJob;
46
+
47
+static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
48
+{
49
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
50
+
51
+ while (!s->should_complete) {
52
+ job_sleep_ns(job, 0);
53
+ }
54
+
55
+ return 0;
56
+}
57
+
58
+static void test_simple_job_clean(Job *job)
59
+{
60
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
61
+ *s->did_complete = true;
62
+}
63
+
64
+static const BlockJobDriver test_simple_job_driver = {
65
+ .job_driver = {
66
+ .instance_size = sizeof(TestSimpleBlockJob),
67
+ .free = block_job_free,
68
+ .user_resume = block_job_user_resume,
69
+ .drain = block_job_drain,
70
+ .run = test_simple_job_run,
71
+ .clean = test_simple_job_clean,
72
+ },
73
+};
74
+
75
+static int drop_intermediate_poll_update_filename(BdrvChild *child,
76
+ BlockDriverState *new_base,
77
+ const char *filename,
78
+ Error **errp)
79
+{
80
+ /*
81
+ * We are free to poll here, which may change the block graph, if
82
+ * it is not drained.
83
+ */
84
+
85
+ /* If the job is not drained: Complete it, schedule job_exit() */
86
+ aio_poll(qemu_get_current_aio_context(), false);
87
+ /* If the job is not drained: Run job_exit(), finish the job */
88
+ aio_poll(qemu_get_current_aio_context(), false);
89
+
90
+ return 0;
91
+}
92
+
93
+/**
94
+ * Test a poll in the midst of bdrv_drop_intermediate().
95
+ *
96
+ * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(),
97
+ * which can yield or poll. This may lead to graph changes, unless
98
+ * the whole subtree in question is drained.
99
+ *
100
+ * We test this on the following graph:
101
+ *
102
+ * Job
103
+ *
104
+ * |
105
+ * job-node
106
+ * |
107
+ * v
108
+ *
109
+ * job-node
110
+ *
111
+ * |
112
+ * backing
113
+ * |
114
+ * v
115
+ *
116
+ * node-2 --chain--> node-1 --chain--> node-0
117
+ *
118
+ * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
119
+ *
120
+ * This first updates node-2's backing filename by invoking
121
+ * drop_intermediate_poll_update_filename(), which polls twice. This
122
+ * causes the job to finish, which in turns causes the job-node to be
123
+ * deleted.
124
+ *
125
+ * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
126
+ * already has a pointer to the BdrvChild edge between job-node and
127
+ * node-1. When it tries to handle that edge, we probably get a
128
+ * segmentation fault because the object no longer exists.
129
+ *
130
+ *
131
+ * The solution is for bdrv_drop_intermediate() to drain top's
132
+ * subtree. This prevents graph changes from happening just because
133
+ * BdrvChildRole.update_filename() yields or polls. Thus, the block
134
+ * job is paused during that drained section and must finish before or
135
+ * after.
136
+ *
137
+ * (In addition, bdrv_replace_child() must keep the job paused.)
138
+ */
139
+static void test_drop_intermediate_poll(void)
140
+{
141
+ static BdrvChildRole chain_child_role;
142
+ BlockDriverState *chain[3];
143
+ TestSimpleBlockJob *job;
144
+ BlockDriverState *job_node;
145
+ bool job_has_completed = false;
146
+ int i;
147
+ int ret;
148
+
149
+ chain_child_role = child_backing;
150
+ chain_child_role.update_filename = drop_intermediate_poll_update_filename;
151
+
152
+ for (i = 0; i < 3; i++) {
153
+ char name[32];
154
+ snprintf(name, 32, "node-%i", i);
155
+
156
+ chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
157
+ }
158
+
159
+ job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
160
+ &error_abort);
161
+ bdrv_set_backing_hd(job_node, chain[1], &error_abort);
162
+
163
+ /*
164
+ * Establish the chain last, so the chain links are the first
165
+ * elements in the BDS.parents lists
166
+ */
167
+ for (i = 0; i < 3; i++) {
168
+ if (i) {
169
+ /* Takes the reference to chain[i - 1] */
170
+ chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1],
171
+ "chain", &chain_child_role,
172
+ &error_abort);
173
+ }
174
+ }
175
+
176
+ job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
177
+ 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
178
+
179
+ /* The job has a reference now */
180
+ bdrv_unref(job_node);
181
+
182
+ job->did_complete = &job_has_completed;
183
+
184
+ job_start(&job->common.job);
185
+ job->should_complete = true;
186
+
187
+ g_assert(!job_has_completed);
188
+ ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
189
+ g_assert(ret == 0);
190
+ g_assert(job_has_completed);
191
+
192
+ bdrv_unref(chain[2]);
193
+}
194
+
195
int main(int argc, char **argv)
196
{
197
int ret;
198
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
199
g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
200
test_blockjob_commit_by_drained_end);
201
202
+ g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
203
+ test_drop_intermediate_poll);
204
+
205
ret = g_test_run();
206
qemu_event_destroy(&done_event);
207
return ret;
208
--
109
--
209
2.20.1
110
2.25.3
210
111
211
112
diff view generated by jsdifflib