1
The following changes since commit 9e06029aea3b2eca1d5261352e695edc1e7d7b8b:
1
The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8:
2
2
3
Update version for v4.1.0 release (2019-08-15 13:03:37 +0100)
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to a6b257a08e3d72219f03e461a52152672fec0612:
9
for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8:
10
10
11
file-posix: Handle undetectable alignment (2019-08-16 11:29:11 +0200)
11
block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches
15
16
- file-posix: Fix O_DIRECT alignment detection
17
- Fixes for concurrent block jobs
18
- block-backend: Queue requests while drained (fix IDE vs. job crashes)
19
- qemu-img convert: Deprecate using -n and -o together
20
- iotests: Migration tests with filter nodes
21
- iotests: More media change tests
22
15
23
----------------------------------------------------------------
16
----------------------------------------------------------------
24
Kevin Wolf (10):
17
Doug Gale (1):
25
iotests/118: Test media change for scsi-cd
18
nvme: Add tracing
26
iotests/118: Create test classes dynamically
27
iotests/118: Add -blockdev based tests
28
iotests: Move migration helpers to iotests.py
29
iotests: Test migration with all kinds of filter nodes
30
block: Simplify bdrv_filter_default_perms()
31
block: Remove blk_pread_unthrottled()
32
mirror: Keep mirror_top_bs drained after dropping permissions
33
block-backend: Queue requests while drained
34
qemu-img convert: Deprecate using -n and -o together
35
19
36
Max Reitz (5):
20
Edgar Kaziakhmedov (1):
37
block: Keep subtree drained in drop_intermediate
21
qcow2: get rid of qcow2_backing_read1 routine
38
block: Reduce (un)drains when replacing a child
39
tests: Test polling in bdrv_drop_intermediate()
40
tests: Test mid-drain bdrv_replace_child_noperm()
41
iotests: Add test for concurrent stream/commit
42
22
43
Nir Soffer (1):
23
Fam Zheng (2):
44
file-posix: Handle undetectable alignment
24
block: Open backing image in force share mode for size probe
25
block: Remove unused bdrv_requests_pending
45
26
46
include/sysemu/block-backend.h | 3 +-
27
John Snow (1):
47
block.c | 63 +++---
28
iotests: fix 197 for vpc
48
block/backup.c | 1 +
49
block/block-backend.c | 69 ++++--
50
block/commit.c | 2 +
51
block/file-posix.c | 36 +++-
52
block/mirror.c | 7 +-
53
blockjob.c | 3 +
54
hw/block/hd-geometry.c | 7 +-
55
qemu-img.c | 5 +
56
tests/test-bdrv-drain.c | 476 +++++++++++++++++++++++++++++++++++++++++
57
qemu-deprecated.texi | 7 +
58
tests/qemu-iotests/118 | 84 ++++----
59
tests/qemu-iotests/118.out | 4 +-
60
tests/qemu-iotests/234 | 30 +--
61
tests/qemu-iotests/258 | 163 ++++++++++++++
62
tests/qemu-iotests/258.out | 33 +++
63
tests/qemu-iotests/262 | 82 +++++++
64
tests/qemu-iotests/262.out | 17 ++
65
tests/qemu-iotests/group | 2 +
66
tests/qemu-iotests/iotests.py | 16 ++
67
21 files changed, 983 insertions(+), 127 deletions(-)
68
create mode 100755 tests/qemu-iotests/258
69
create mode 100644 tests/qemu-iotests/258.out
70
create mode 100755 tests/qemu-iotests/262
71
create mode 100644 tests/qemu-iotests/262.out
72
29
30
Kevin Wolf (27):
31
block: Formats don't need CONSISTENT_READ with NO_IO
32
block: Make bdrv_drain_invoke() recursive
33
block: Call .drain_begin only once in bdrv_drain_all_begin()
34
test-bdrv-drain: Test BlockDriver callbacks for drain
35
block: bdrv_drain_recurse(): Remove unused begin parameter
36
block: Don't wait for requests in bdrv_drain*_end()
37
block: Unify order in drain functions
38
block: Don't acquire AioContext in hmp_qemu_io()
39
block: Document that x-blockdev-change breaks quorum children list
40
block: Assert drain_all is only called from main AioContext
41
block: Make bdrv_drain() driver callbacks non-recursive
42
test-bdrv-drain: Test callback for bdrv_drain
43
test-bdrv-drain: Test bs->quiesce_counter
44
blockjob: Pause job on draining any job BDS
45
test-bdrv-drain: Test drain vs. block jobs
46
block: Don't block_job_pause_all() in bdrv_drain_all()
47
block: Nested drain_end must still call callbacks
48
test-bdrv-drain: Test nested drain sections
49
block: Don't notify parents in drain call chain
50
block: Add bdrv_subtree_drained_begin/end()
51
test-bdrv-drain: Tests for bdrv_subtree_drain
52
test-bdrv-drain: Test behaviour in coroutine context
53
test-bdrv-drain: Recursive draining with multiple parents
54
block: Allow graph changes in subtree drained section
55
test-bdrv-drain: Test graph changes in drained section
56
commit: Simplify reopen of base
57
block: Keep nodes drained between reopen_queue/multiple
58
59
Thomas Huth (3):
60
block: Remove the obsolete -drive boot=on|off parameter
61
block: Remove the deprecated -hdachs option
62
block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter
63
64
qapi/block-core.json | 4 +
65
block/qcow2.h | 3 -
66
include/block/block.h | 15 +-
67
include/block/block_int.h | 6 +-
68
block.c | 75 ++++-
69
block/commit.c | 8 +-
70
block/io.c | 164 +++++++---
71
block/qcow2.c | 51 +--
72
block/replication.c | 6 +
73
blockdev.c | 11 -
74
blockjob.c | 22 +-
75
hmp.c | 6 -
76
hw/block/nvme.c | 349 +++++++++++++++++----
77
qemu-io-cmds.c | 3 +
78
tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++
79
vl.c | 86 +-----
80
hw/block/trace-events | 93 ++++++
81
qemu-doc.texi | 29 +-
82
qemu-options.hx | 19 +-
83
tests/Makefile.include | 2 +
84
tests/qemu-iotests/197 | 4 +
85
tests/qemu-iotests/common.filter | 3 +-
86
22 files changed, 1294 insertions(+), 316 deletions(-)
87
create mode 100644 tests/test-bdrv-drain.c
88
diff view generated by jsdifflib
New patch
1
Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently
2
in use as a mirror target. It is not enough for image formats, though,
3
as these still unconditionally request BLK_PERM_CONSISTENT_READ.
1
4
5
As this permission is geared towards whether the guest-visible data is
6
consistent, and has no impact on whether the metadata is sane, and
7
'qemu-img info' does not read guest-visible data (except for the raw
8
format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there
9
is not going to be any guest I/O performed, regardless of image format.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block.c | 6 +++++-
14
1 file changed, 5 insertions(+), 1 deletion(-)
15
16
diff --git a/block.c b/block.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block.c
19
+++ b/block.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
21
assert(role == &child_backing || role == &child_file);
22
23
if (!backing) {
24
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
25
+
26
/* Apart from the modifications below, the same permissions are
27
* forwarded and left alone as for filters */
28
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
29
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
30
31
/* bs->file always needs to be consistent because of the metadata. We
32
* can never allow other users to resize or write to it. */
33
- perm |= BLK_PERM_CONSISTENT_READ;
34
+ if (!(flags & BDRV_O_NO_IO)) {
35
+ perm |= BLK_PERM_CONSISTENT_READ;
36
+ }
37
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
38
} else {
39
/* We want consistent read from backing files if the parent needs it.
40
--
41
2.13.6
42
43
diff view generated by jsdifflib
New patch
1
From: John Snow <jsnow@redhat.com>
1
2
3
VPC has some difficulty creating geometries of particular size.
4
However, we can indeed force it to use a literal one, so let's
5
do that for the sake of test 197, which is testing some specific
6
offsets.
7
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Lukáš Doktor <ldoktor@redhat.com>
13
---
14
tests/qemu-iotests/197 | 4 ++++
15
tests/qemu-iotests/common.filter | 3 ++-
16
2 files changed, 6 insertions(+), 1 deletion(-)
17
18
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
19
index XXXXXXX..XXXXXXX 100755
20
--- a/tests/qemu-iotests/197
21
+++ b/tests/qemu-iotests/197
22
@@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ==='
23
echo
24
25
# Prep the images
26
+# VPC rounds image sizes to a specific geometry, force a specific size.
27
+if [ "$IMGFMT" = "vpc" ]; then
28
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
29
+fi
30
_make_test_img 4G
31
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
32
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
33
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tests/qemu-iotests/common.filter
36
+++ b/tests/qemu-iotests/common.filter
37
@@ -XXX,XX +XXX,XX @@ _filter_img_create()
38
-e "s# log_size=[0-9]\\+##g" \
39
-e "s# refcount_bits=[0-9]\\+##g" \
40
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
41
- -e "s# iter-time=[0-9]\\+##g"
42
+ -e "s# iter-time=[0-9]\\+##g" \
43
+ -e "s# force_size=\\(on\\|off\\)##g"
44
}
45
46
_filter_img_info()
47
--
48
2.13.6
49
50
diff view generated by jsdifflib
New patch
1
This change separates bdrv_drain_invoke(), which calls the BlockDriver
2
drain callbacks, from bdrv_drain_recurse(). Instead, the function
3
performs its own recursion now.
1
4
5
One reason for this is that bdrv_drain_recurse() can be called multiple
6
times by bdrv_drain_all_begin(), but the callbacks may only be called
7
once. The separation is necessary to fix this bug.
8
9
The other reason is that we intend to go to a model where we call all
10
driver callbacks first, and only then start polling. This is not fully
11
achieved yet with this patch, as bdrv_drain_invoke() contains a
12
BDRV_POLL_WHILE() loop for the block driver callbacks, which can still
13
call callbacks for any unrelated event. It's a step in this direction
14
anyway.
15
16
Cc: qemu-stable@nongnu.org
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
block/io.c | 14 +++++++++++---
21
1 file changed, 11 insertions(+), 3 deletions(-)
22
23
diff --git a/block/io.c b/block/io.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io.c
26
+++ b/block/io.c
27
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
28
bdrv_wakeup(bs);
29
}
30
31
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
32
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
33
{
34
+ BdrvChild *child, *tmp;
35
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
36
37
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
38
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
39
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
40
bdrv_coroutine_enter(bs, data.co);
41
BDRV_POLL_WHILE(bs, !data.done);
42
+
43
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
44
+ bdrv_drain_invoke(child->bs, begin);
45
+ }
46
}
47
48
static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
49
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
50
BdrvChild *child, *tmp;
51
bool waited;
52
53
- /* Ensure any pending metadata writes are submitted to bs->file. */
54
- bdrv_drain_invoke(bs, begin);
55
-
56
/* Wait for drained requests to finish */
57
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
58
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
60
bdrv_parent_drained_begin(bs);
61
}
62
63
+ bdrv_drain_invoke(bs, true);
64
bdrv_drain_recurse(bs, true);
65
}
66
67
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
68
}
69
70
bdrv_parent_drained_end(bs);
71
+ bdrv_drain_invoke(bs, false);
72
bdrv_drain_recurse(bs, false);
73
aio_enable_external(bdrv_get_aio_context(bs));
74
}
75
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
76
aio_context_acquire(aio_context);
77
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
78
if (aio_context == bdrv_get_aio_context(bs)) {
79
+ /* FIXME Calling this multiple times is wrong */
80
+ bdrv_drain_invoke(bs, true);
81
waited |= bdrv_drain_recurse(bs, true);
82
}
83
}
84
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
85
aio_context_acquire(aio_context);
86
aio_enable_external(aio_context);
87
bdrv_parent_drained_end(bs);
88
+ bdrv_drain_invoke(bs, false);
89
bdrv_drain_recurse(bs, false);
90
aio_context_release(aio_context);
91
}
92
--
93
2.13.6
94
95
diff view generated by jsdifflib
1
mirror_top_bs is currently implicitly drained through its connection to
1
bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver
2
the source or the target node. However, the drain section for target_bs
2
callback inside its polling loop. This means that how many times it got
3
ends early after moving mirror_top_bs from src to target_bs, so that
3
called for each node depended on long it had to poll the event loop.
4
requests can already be restarted while mirror_top_bs is still present
5
in the chain, but has dropped all permissions and therefore runs into an
6
assertion failure like this:
7
4
8
qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare:
5
This is obviously not right and results in nodes that stay drained even
9
Assertion `child->perm & BLK_PERM_WRITE' failed.
6
after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per
7
node.
10
8
11
Keep mirror_top_bs drained until all graph changes have completed.
9
Fix bdrv_drain_all_begin() to call the callback only once, too.
12
10
13
Cc: qemu-stable@nongnu.org
11
Cc: qemu-stable@nongnu.org
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
14
---
17
block/mirror.c | 6 +++++-
15
block/io.c | 3 +--
18
1 file changed, 5 insertions(+), 1 deletion(-)
16
1 file changed, 1 insertion(+), 2 deletions(-)
19
17
20
diff --git a/block/mirror.c b/block/mirror.c
18
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
22
--- a/block/mirror.c
20
--- a/block/io.c
23
+++ b/block/mirror.c
21
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
25
s->target = NULL;
23
aio_context_acquire(aio_context);
26
24
bdrv_parent_drained_begin(bs);
27
/* We don't access the source any more. Dropping any WRITE/RESIZE is
25
aio_disable_external(aio_context);
28
- * required before it could become a backing file of target_bs. */
26
+ bdrv_drain_invoke(bs, true);
29
+ * required before it could become a backing file of target_bs. Not having
27
aio_context_release(aio_context);
30
+ * these permissions any more means that we can't allow any new requests on
28
31
+ * mirror_top_bs from now on, so keep it drained. */
29
if (!g_slist_find(aio_ctxs, aio_context)) {
32
+ bdrv_drained_begin(mirror_top_bs);
30
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
33
bs_opaque->stop = true;
31
aio_context_acquire(aio_context);
34
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
32
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
35
&error_abort);
33
if (aio_context == bdrv_get_aio_context(bs)) {
36
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
34
- /* FIXME Calling this multiple times is wrong */
37
bs_opaque->job = NULL;
35
- bdrv_drain_invoke(bs, true);
38
36
waited |= bdrv_drain_recurse(bs, true);
39
bdrv_drained_end(src);
37
}
40
+ bdrv_drained_end(mirror_top_bs);
38
}
41
s->in_drain = false;
42
bdrv_unref(mirror_top_bs);
43
bdrv_unref(src);
44
--
39
--
45
2.20.1
40
2.13.6
46
41
47
42
diff view generated by jsdifflib
1
This test case is motivated by commit 2b23f28639 ('block/copy-on-read:
1
This adds a test case that the BlockDriver callbacks for drain are
2
Fix permissions for inactive node'). Instead of just testing
2
called in bdrv_drained_all_begin/end(), and that both of them are called
3
copy-on-read on migration, let's stack all sorts of filter nodes on top
3
exactly once.
4
of each other and try if the resulting VM can still migrate
5
successfully. For good measure, put everything into an iothread, because
6
why not?
7
4
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
10
---
8
---
11
tests/qemu-iotests/262 | 82 ++++++++++++++++++++++++++++++++++++++
9
tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
12
tests/qemu-iotests/262.out | 17 ++++++++
10
tests/Makefile.include | 2 +
13
tests/qemu-iotests/group | 1 +
11
2 files changed, 139 insertions(+)
14
3 files changed, 100 insertions(+)
12
create mode 100644 tests/test-bdrv-drain.c
15
create mode 100755 tests/qemu-iotests/262
16
create mode 100644 tests/qemu-iotests/262.out
17
13
18
diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
19
new file mode 100755
20
index XXXXXXX..XXXXXXX
21
--- /dev/null
22
+++ b/tests/qemu-iotests/262
23
@@ -XXX,XX +XXX,XX @@
24
+#!/usr/bin/env python
25
+#
26
+# Copyright (C) 2019 Red Hat, Inc.
27
+#
28
+# This program is free software; you can redistribute it and/or modify
29
+# it under the terms of the GNU General Public License as published by
30
+# the Free Software Foundation; either version 2 of the License, or
31
+# (at your option) any later version.
32
+#
33
+# This program is distributed in the hope that it will be useful,
34
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
35
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36
+# GNU General Public License for more details.
37
+#
38
+# You should have received a copy of the GNU General Public License
39
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
40
+#
41
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
42
+#
43
+# Test migration with filter drivers present. Keep everything in an
44
+# iothread just for fun.
45
+
46
+import iotests
47
+import os
48
+
49
+iotests.verify_image_format(supported_fmts=['qcow2'])
50
+iotests.verify_platform(['linux'])
51
+
52
+with iotests.FilePath('img') as img_path, \
53
+ iotests.FilePath('mig_fifo') as fifo, \
54
+ iotests.VM(path_suffix='a') as vm_a, \
55
+ iotests.VM(path_suffix='b') as vm_b:
56
+
57
+ def add_opts(vm):
58
+ vm.add_object('iothread,id=iothread0')
59
+ vm.add_object('throttle-group,id=tg0,x-bps-total=65536')
60
+ vm.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
61
+ vm.add_blockdev('%s,file=drive0-file,node-name=drive0-fmt' % (iotests.imgfmt))
62
+ vm.add_blockdev('copy-on-read,file=drive0-fmt,node-name=drive0-cor')
63
+ vm.add_blockdev('throttle,file=drive0-cor,node-name=drive0-throttle,throttle-group=tg0')
64
+ vm.add_blockdev('blkdebug,image=drive0-throttle,node-name=drive0-dbg')
65
+ vm.add_blockdev('null-co,node-name=null,read-zeroes=on')
66
+ vm.add_blockdev('blkverify,test=drive0-dbg,raw=null,node-name=drive0-verify')
67
+
68
+ if iotests.supports_quorum():
69
+ vm.add_blockdev('quorum,children.0=drive0-verify,vote-threshold=1,node-name=drive0-quorum')
70
+ root = "drive0-quorum"
71
+ else:
72
+ root = "drive0-verify"
73
+
74
+ vm.add_device('virtio-blk,drive=%s,iothread=iothread0' % root)
75
+
76
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, img_path, '64M')
77
+
78
+ os.mkfifo(fifo)
79
+
80
+ iotests.log('Launching source VM...')
81
+ add_opts(vm_a)
82
+ vm_a.launch()
83
+
84
+ vm_a.enable_migration_events('A')
85
+
86
+ iotests.log('Launching destination VM...')
87
+ add_opts(vm_b)
88
+ vm_b.add_incoming("exec: cat '%s'" % (fifo))
89
+ vm_b.launch()
90
+
91
+ vm_b.enable_migration_events('B')
92
+
93
+ iotests.log('Starting migration to B...')
94
+ iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo)))
95
+ with iotests.Timeout(3, 'Migration does not complete'):
96
+ # Wait for the source first (which includes setup=setup)
97
+ vm_a.wait_migration()
98
+ # Wait for the destination second (which does not)
99
+ vm_b.wait_migration()
100
+
101
+ iotests.log(vm_a.qmp('query-migrate')['return']['status'])
102
+ iotests.log(vm_b.qmp('query-migrate')['return']['status'])
103
+
104
+ iotests.log(vm_a.qmp('query-status'))
105
+ iotests.log(vm_b.qmp('query-status'))
106
diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out
107
new file mode 100644
15
new file mode 100644
108
index XXXXXXX..XXXXXXX
16
index XXXXXXX..XXXXXXX
109
--- /dev/null
17
--- /dev/null
110
+++ b/tests/qemu-iotests/262.out
18
+++ b/tests/test-bdrv-drain.c
111
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
112
+Launching source VM...
20
+/*
113
+Enabling migration QMP events on A...
21
+ * Block node draining tests
114
+{"return": {}}
22
+ *
115
+Launching destination VM...
23
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
116
+Enabling migration QMP events on B...
24
+ *
117
+{"return": {}}
25
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
118
+Starting migration to B...
26
+ * of this software and associated documentation files (the "Software"), to deal
119
+{"return": {}}
27
+ * in the Software without restriction, including without limitation the rights
120
+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
28
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
121
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
29
+ * copies of the Software, and to permit persons to whom the Software is
122
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
30
+ * furnished to do so, subject to the following conditions:
123
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
31
+ *
124
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
32
+ * The above copyright notice and this permission notice shall be included in
125
+completed
33
+ * all copies or substantial portions of the Software.
126
+completed
34
+ *
127
+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}}
35
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
128
+{"return": {"running": true, "singlestep": false, "status": "running"}}
36
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
129
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
37
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
38
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
39
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
41
+ * THE SOFTWARE.
42
+ */
43
+
44
+#include "qemu/osdep.h"
45
+#include "block/block.h"
46
+#include "sysemu/block-backend.h"
47
+#include "qapi/error.h"
48
+
49
+typedef struct BDRVTestState {
50
+ int drain_count;
51
+} BDRVTestState;
52
+
53
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
54
+{
55
+ BDRVTestState *s = bs->opaque;
56
+ s->drain_count++;
57
+}
58
+
59
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
60
+{
61
+ BDRVTestState *s = bs->opaque;
62
+ s->drain_count--;
63
+}
64
+
65
+static void bdrv_test_close(BlockDriverState *bs)
66
+{
67
+ BDRVTestState *s = bs->opaque;
68
+ g_assert_cmpint(s->drain_count, >, 0);
69
+}
70
+
71
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
72
+ uint64_t offset, uint64_t bytes,
73
+ QEMUIOVector *qiov, int flags)
74
+{
75
+ /* We want this request to stay until the polling loop in drain waits for
76
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
77
+ * first and polls its result, too, but it shouldn't accidentally complete
78
+ * this request yet. */
79
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
80
+
81
+ return 0;
82
+}
83
+
84
+static BlockDriver bdrv_test = {
85
+ .format_name = "test",
86
+ .instance_size = sizeof(BDRVTestState),
87
+
88
+ .bdrv_close = bdrv_test_close,
89
+ .bdrv_co_preadv = bdrv_test_co_preadv,
90
+
91
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
92
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
93
+};
94
+
95
+static void aio_ret_cb(void *opaque, int ret)
96
+{
97
+ int *aio_ret = opaque;
98
+ *aio_ret = ret;
99
+}
100
+
101
+static void test_drv_cb_drain_all(void)
102
+{
103
+ BlockBackend *blk;
104
+ BlockDriverState *bs;
105
+ BDRVTestState *s;
106
+ BlockAIOCB *acb;
107
+ int aio_ret;
108
+
109
+ QEMUIOVector qiov;
110
+ struct iovec iov = {
111
+ .iov_base = NULL,
112
+ .iov_len = 0,
113
+ };
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
115
+
116
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
117
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
118
+ &error_abort);
119
+ s = bs->opaque;
120
+ blk_insert_bs(blk, bs, &error_abort);
121
+
122
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
123
+ g_assert_cmpint(s->drain_count, ==, 0);
124
+ bdrv_drain_all_begin();
125
+ g_assert_cmpint(s->drain_count, ==, 1);
126
+ bdrv_drain_all_end();
127
+ g_assert_cmpint(s->drain_count, ==, 0);
128
+
129
+ /* Now do the same while a request is pending */
130
+ aio_ret = -EINPROGRESS;
131
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
132
+ g_assert(acb != NULL);
133
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
134
+
135
+ g_assert_cmpint(s->drain_count, ==, 0);
136
+ bdrv_drain_all_begin();
137
+ g_assert_cmpint(aio_ret, ==, 0);
138
+ g_assert_cmpint(s->drain_count, ==, 1);
139
+ bdrv_drain_all_end();
140
+ g_assert_cmpint(s->drain_count, ==, 0);
141
+
142
+ bdrv_unref(bs);
143
+ blk_unref(blk);
144
+}
145
+
146
+int main(int argc, char **argv)
147
+{
148
+ bdrv_init();
149
+ qemu_init_main_loop(&error_abort);
150
+
151
+ g_test_init(&argc, &argv, NULL);
152
+
153
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
154
+
155
+ return g_test_run();
156
+}
157
diff --git a/tests/Makefile.include b/tests/Makefile.include
130
index XXXXXXX..XXXXXXX 100644
158
index XXXXXXX..XXXXXXX 100644
131
--- a/tests/qemu-iotests/group
159
--- a/tests/Makefile.include
132
+++ b/tests/qemu-iotests/group
160
+++ b/tests/Makefile.include
133
@@ -XXX,XX +XXX,XX @@
161
@@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c
134
254 rw backing quick
162
gcov-files-test-hbitmap-y = util/hbitmap.c
135
255 rw quick
163
check-unit-y += tests/test-hbitmap$(EXESUF)
136
256 rw quick
164
gcov-files-test-hbitmap-y = blockjob.c
137
+262 rw quick migration
165
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
166
check-unit-y += tests/test-blockjob$(EXESUF)
167
check-unit-y += tests/test-blockjob-txn$(EXESUF)
168
check-unit-y += tests/test-x86-cpuid$(EXESUF)
169
@@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
170
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
171
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
172
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
173
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
174
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
175
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
176
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
138
--
177
--
139
2.20.1
178
2.13.6
140
179
141
180
diff view generated by jsdifflib
New patch
1
Now that the bdrv_drain_invoke() calls are pulled up to the callers of
2
bdrv_drain_recurse(), the 'begin' parameter isn't needed any more.
1
3
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/io.c | 12 ++++++------
8
1 file changed, 6 insertions(+), 6 deletions(-)
9
10
diff --git a/block/io.c b/block/io.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
13
+++ b/block/io.c
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
15
}
16
}
17
18
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
19
+static bool bdrv_drain_recurse(BlockDriverState *bs)
20
{
21
BdrvChild *child, *tmp;
22
bool waited;
23
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
24
*/
25
bdrv_ref(bs);
26
}
27
- waited |= bdrv_drain_recurse(bs, begin);
28
+ waited |= bdrv_drain_recurse(bs);
29
if (in_main_loop) {
30
bdrv_unref(bs);
31
}
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
33
}
34
35
bdrv_drain_invoke(bs, true);
36
- bdrv_drain_recurse(bs, true);
37
+ bdrv_drain_recurse(bs);
38
}
39
40
void bdrv_drained_end(BlockDriverState *bs)
41
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
42
43
bdrv_parent_drained_end(bs);
44
bdrv_drain_invoke(bs, false);
45
- bdrv_drain_recurse(bs, false);
46
+ bdrv_drain_recurse(bs);
47
aio_enable_external(bdrv_get_aio_context(bs));
48
}
49
50
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
51
aio_context_acquire(aio_context);
52
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
53
if (aio_context == bdrv_get_aio_context(bs)) {
54
- waited |= bdrv_drain_recurse(bs, true);
55
+ waited |= bdrv_drain_recurse(bs);
56
}
57
}
58
aio_context_release(aio_context);
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
60
aio_enable_external(aio_context);
61
bdrv_parent_drained_end(bs);
62
bdrv_drain_invoke(bs, false);
63
- bdrv_drain_recurse(bs, false);
64
+ bdrv_drain_recurse(bs);
65
aio_context_release(aio_context);
66
}
67
68
--
69
2.13.6
70
71
diff view generated by jsdifflib
New patch
1
The device is drained, so there is no point in waiting for requests at
2
the end of the drained section. Remove the bdrv_drain_recurse() calls
3
there.
1
4
5
The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e
6
in order to call the .bdrv_co_drain_end() driver callback. This is now
7
done by a separate bdrv_drain_invoke() call.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/io.c | 2 --
14
1 file changed, 2 deletions(-)
15
16
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/io.c
19
+++ b/block/io.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
21
22
bdrv_parent_drained_end(bs);
23
bdrv_drain_invoke(bs, false);
24
- bdrv_drain_recurse(bs);
25
aio_enable_external(bdrv_get_aio_context(bs));
26
}
27
28
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
29
aio_enable_external(aio_context);
30
bdrv_parent_drained_end(bs);
31
bdrv_drain_invoke(bs, false);
32
- bdrv_drain_recurse(bs);
33
aio_context_release(aio_context);
34
}
35
36
--
37
2.13.6
38
39
diff view generated by jsdifflib
New patch
1
Drain requests are propagated to child nodes, parent nodes and directly
2
to the AioContext. The order in which this happened was different
3
between all combinations of drain/drain_all and begin/end.
1
4
5
The correct order is to keep children only drained when their parents
6
are also drained. This means that at the start of a drained section, the
7
AioContext needs to be drained first, the parents second and only then
8
the children. The correct order for the end of a drained section is the
9
opposite.
10
11
This patch changes the three other functions to follow the example of
12
bdrv_drained_begin(), which is the only one that got it right.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/io.c | 12 ++++++++----
18
1 file changed, 8 insertions(+), 4 deletions(-)
19
20
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/io.c
23
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
25
return;
26
}
27
28
+ /* Stop things in parent-to-child order */
29
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
30
aio_disable_external(bdrv_get_aio_context(bs));
31
bdrv_parent_drained_begin(bs);
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
33
return;
34
}
35
36
- bdrv_parent_drained_end(bs);
37
+ /* Re-enable things in child-to-parent order */
38
bdrv_drain_invoke(bs, false);
39
+ bdrv_parent_drained_end(bs);
40
aio_enable_external(bdrv_get_aio_context(bs));
41
}
42
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
44
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
45
AioContext *aio_context = bdrv_get_aio_context(bs);
46
47
+ /* Stop things in parent-to-child order */
48
aio_context_acquire(aio_context);
49
- bdrv_parent_drained_begin(bs);
50
aio_disable_external(aio_context);
51
+ bdrv_parent_drained_begin(bs);
52
bdrv_drain_invoke(bs, true);
53
aio_context_release(aio_context);
54
55
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
56
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
57
AioContext *aio_context = bdrv_get_aio_context(bs);
58
59
+ /* Re-enable things in child-to-parent order */
60
aio_context_acquire(aio_context);
61
- aio_enable_external(aio_context);
62
- bdrv_parent_drained_end(bs);
63
bdrv_drain_invoke(bs, false);
64
+ bdrv_parent_drained_end(bs);
65
+ aio_enable_external(aio_context);
66
aio_context_release(aio_context);
67
}
68
69
--
70
2.13.6
71
72
diff view generated by jsdifflib
1
bdrv_create options specified with -o have no effect when skipping image
1
Commit 15afd94a047 added code to acquire and release the AioContext in
2
creation with -n, so this doesn't make sense. Warn against the misuse
2
qemuio_command(). This means that the lock is taken twice now in the
3
and deprecate the combination so we can make it a hard error later.
3
call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for
4
any requests issued to nodes in a non-mainloop AioContext.
5
6
Dropping the first locking from hmp_qemu_io() fixes the problem.
4
7
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: John Snow <jsnow@redhat.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
---
10
---
10
qemu-img.c | 5 +++++
11
hmp.c | 6 ------
11
qemu-deprecated.texi | 7 +++++++
12
1 file changed, 6 deletions(-)
12
2 files changed, 12 insertions(+)
13
13
14
diff --git a/qemu-img.c b/qemu-img.c
14
diff --git a/hmp.c b/hmp.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-img.c
16
--- a/hmp.c
17
+++ b/qemu-img.c
17
+++ b/hmp.c
18
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
18
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
19
goto fail_getopt;
19
{
20
BlockBackend *blk;
21
BlockBackend *local_blk = NULL;
22
- AioContext *aio_context;
23
const char* device = qdict_get_str(qdict, "device");
24
const char* command = qdict_get_str(qdict, "command");
25
Error *err = NULL;
26
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
27
}
20
}
28
}
21
29
22
+ if (skip_create && options) {
30
- aio_context = blk_get_aio_context(blk);
23
+ warn_report("-o has no effect when skipping image creation");
31
- aio_context_acquire(aio_context);
24
+ warn_report("This will become an error in future QEMU versions.");
32
-
25
+ }
33
/*
26
+
34
* Notably absent: Proper permission management. This is sad, but it seems
27
s.src_num = argc - optind - 1;
35
* almost impossible to achieve without changing the semantics and thereby
28
out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
36
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
29
37
*/
30
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
38
qemuio_command(blk, command);
31
index XXXXXXX..XXXXXXX 100644
39
32
--- a/qemu-deprecated.texi
40
- aio_context_release(aio_context);
33
+++ b/qemu-deprecated.texi
41
-
34
@@ -XXX,XX +XXX,XX @@ to just export the entire image and then mount only /dev/nbd0p1 than
42
fail:
35
it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a
43
blk_unref(local_blk);
36
subset of the image.
44
hmp_handle_error(mon, &err);
37
38
+@subsection qemu-img convert -n -o (since 4.2.0)
39
+
40
+All options specified in @option{-o} are image creation options, so
41
+they have no effect when used with @option{-n} to skip image creation.
42
+Silently ignored options can be confusing, so this combination of
43
+options will be made an error in future versions.
44
+
45
@section Build system
46
47
@subsection Python 2 support (since 4.1.0)
48
--
45
--
49
2.20.1
46
2.13.6
50
47
51
48
diff view generated by jsdifflib
New patch
1
From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
1
2
3
Since bdrv_co_preadv does all neccessary checks including
4
reading after the end of the backing file, avoid duplication
5
of verification before bdrv_co_preadv call.
6
7
Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
block/qcow2.h | 3 ---
13
block/qcow2.c | 51 ++++++++-------------------------------------------
14
2 files changed, 8 insertions(+), 46 deletions(-)
15
16
diff --git a/block/qcow2.h b/block/qcow2.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.h
19
+++ b/block/qcow2.h
20
@@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
21
}
22
23
/* qcow2.c functions */
24
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
25
- int64_t sector_num, int nb_sectors);
26
-
27
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
28
int refcount_order, bool generous_increase,
29
uint64_t *refblock_count);
30
diff --git a/block/qcow2.c b/block/qcow2.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/qcow2.c
33
+++ b/block/qcow2.c
34
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
35
return status;
36
}
37
38
-/* handle reading after the end of the backing file */
39
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
40
- int64_t offset, int bytes)
41
-{
42
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
43
- int n1;
44
-
45
- if ((offset + bytes) <= bs_size) {
46
- return bytes;
47
- }
48
-
49
- if (offset >= bs_size) {
50
- n1 = 0;
51
- } else {
52
- n1 = bs_size - offset;
53
- }
54
-
55
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
56
-
57
- return n1;
58
-}
59
-
60
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
61
uint64_t bytes, QEMUIOVector *qiov,
62
int flags)
63
{
64
BDRVQcow2State *s = bs->opaque;
65
- int offset_in_cluster, n1;
66
+ int offset_in_cluster;
67
int ret;
68
unsigned int cur_bytes; /* number of bytes in current iteration */
69
uint64_t cluster_offset = 0;
70
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
71
case QCOW2_CLUSTER_UNALLOCATED:
72
73
if (bs->backing) {
74
- /* read from the base image */
75
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
76
- offset, cur_bytes);
77
- if (n1 > 0) {
78
- QEMUIOVector local_qiov;
79
-
80
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
81
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
82
-
83
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
84
- qemu_co_mutex_unlock(&s->lock);
85
- ret = bdrv_co_preadv(bs->backing, offset, n1,
86
- &local_qiov, 0);
87
- qemu_co_mutex_lock(&s->lock);
88
-
89
- qemu_iovec_destroy(&local_qiov);
90
-
91
- if (ret < 0) {
92
- goto fail;
93
- }
94
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
95
+ qemu_co_mutex_unlock(&s->lock);
96
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
97
+ &hd_qiov, 0);
98
+ qemu_co_mutex_lock(&s->lock);
99
+ if (ret < 0) {
100
+ goto fail;
101
}
102
} else {
103
/* Note: in this case, no need to wait */
104
--
105
2.13.6
106
107
diff view generated by jsdifflib
New patch
1
Removing a quorum child node with x-blockdev-change results in a quorum
2
driver state that cannot be recreated with create options because it
3
would require a list with gaps. This causes trouble in at least
4
.bdrv_refresh_filename().
1
5
6
Document this problem so that we won't accidentally mark the command
7
stable without having addressed it.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
---
12
qapi/block-core.json | 4 ++++
13
1 file changed, 4 insertions(+)
14
15
diff --git a/qapi/block-core.json b/qapi/block-core.json
16
index XXXXXXX..XXXXXXX 100644
17
--- a/qapi/block-core.json
18
+++ b/qapi/block-core.json
19
@@ -XXX,XX +XXX,XX @@
20
# does not support all kinds of operations, all kinds of children, nor
21
# all block drivers.
22
#
23
+# FIXME Removing children from a quorum node means introducing gaps in the
24
+# child indices. This cannot be represented in the 'children' list of
25
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
26
+#
27
# Warning: The data in a new quorum child MUST be consistent with that of
28
# the rest of the array.
29
#
30
--
31
2.13.6
32
33
diff view generated by jsdifflib
New patch
1
From: Doug Gale <doug16k@gmail.com>
1
2
3
Add trace output for commands, errors, and undefined behavior.
4
Add guest error log output for undefined behavior.
5
Report invalid undefined accesses to MMIO.
6
Annotate unlikely error checks with unlikely.
7
8
Signed-off-by: Doug Gale <doug16k@gmail.com>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++--------
14
hw/block/trace-events | 93 ++++++++++++++
15
2 files changed, 390 insertions(+), 52 deletions(-)
16
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/block/nvme.c
20
+++ b/hw/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@
22
#include "qapi/visitor.h"
23
#include "sysemu/block-backend.h"
24
25
+#include "qemu/log.h"
26
+#include "trace.h"
27
#include "nvme.h"
28
29
+#define NVME_GUEST_ERR(trace, fmt, ...) \
30
+ do { \
31
+ (trace_##trace)(__VA_ARGS__); \
32
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
33
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
34
+ } while (0)
35
+
36
static void nvme_process_sq(void *opaque);
37
38
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
39
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
40
{
41
if (cq->irq_enabled) {
42
if (msix_enabled(&(n->parent_obj))) {
43
+ trace_nvme_irq_msix(cq->vector);
44
msix_notify(&(n->parent_obj), cq->vector);
45
} else {
46
+ trace_nvme_irq_pin();
47
pci_irq_pulse(&n->parent_obj);
48
}
49
+ } else {
50
+ trace_nvme_irq_masked();
51
}
52
}
53
54
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
55
trans_len = MIN(len, trans_len);
56
int num_prps = (len >> n->page_bits) + 1;
57
58
- if (!prp1) {
59
+ if (unlikely(!prp1)) {
60
+ trace_nvme_err_invalid_prp();
61
return NVME_INVALID_FIELD | NVME_DNR;
62
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
63
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
64
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
65
}
66
len -= trans_len;
67
if (len) {
68
- if (!prp2) {
69
+ if (unlikely(!prp2)) {
70
+ trace_nvme_err_invalid_prp2_missing();
71
goto unmap;
72
}
73
if (len > n->page_size) {
74
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
if (i == n->max_prp_ents - 1 && len > n->page_size) {
78
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
79
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
80
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
81
goto unmap;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
85
prp_ent = le64_to_cpu(prp_list[i]);
86
}
87
88
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
89
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
90
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
91
goto unmap;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
95
i++;
96
}
97
} else {
98
- if (prp2 & (n->page_size - 1)) {
99
+ if (unlikely(prp2 & (n->page_size - 1))) {
100
+ trace_nvme_err_invalid_prp2_align(prp2);
101
goto unmap;
102
}
103
if (qsg->nsg) {
104
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
105
QEMUIOVector iov;
106
uint16_t status = NVME_SUCCESS;
107
108
+ trace_nvme_dma_read(prp1, prp2);
109
+
110
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
111
return NVME_INVALID_FIELD | NVME_DNR;
112
}
113
if (qsg.nsg > 0) {
114
- if (dma_buf_read(ptr, len, &qsg)) {
115
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
116
+ trace_nvme_err_invalid_dma();
117
status = NVME_INVALID_FIELD | NVME_DNR;
118
}
119
qemu_sglist_destroy(&qsg);
120
} else {
121
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
122
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
123
+ trace_nvme_err_invalid_dma();
124
status = NVME_INVALID_FIELD | NVME_DNR;
125
}
126
qemu_iovec_destroy(&iov);
127
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
128
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
129
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
130
131
- if (slba + nlb > ns->id_ns.nsze) {
132
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
133
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
134
return NVME_LBA_RANGE | NVME_DNR;
135
}
136
137
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
138
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
139
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
140
141
- if ((slba + nlb) > ns->id_ns.nsze) {
142
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
143
+
144
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
145
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
146
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
147
return NVME_LBA_RANGE | NVME_DNR;
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
151
NvmeNamespace *ns;
152
uint32_t nsid = le32_to_cpu(cmd->nsid);
153
154
- if (nsid == 0 || nsid > n->num_namespaces) {
155
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
156
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
157
return NVME_INVALID_NSID | NVME_DNR;
158
}
159
160
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
161
case NVME_CMD_READ:
162
return nvme_rw(n, ns, cmd, req);
163
default:
164
+ trace_nvme_err_invalid_opc(cmd->opcode);
165
return NVME_INVALID_OPCODE | NVME_DNR;
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
169
NvmeCQueue *cq;
170
uint16_t qid = le16_to_cpu(c->qid);
171
172
- if (!qid || nvme_check_sqid(n, qid)) {
173
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
174
+ trace_nvme_err_invalid_del_sq(qid);
175
return NVME_INVALID_QID | NVME_DNR;
176
}
177
178
+ trace_nvme_del_sq(qid);
179
+
180
sq = n->sq[qid];
181
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
182
req = QTAILQ_FIRST(&sq->out_req_list);
183
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
184
uint16_t qflags = le16_to_cpu(c->sq_flags);
185
uint64_t prp1 = le64_to_cpu(c->prp1);
186
187
- if (!cqid || nvme_check_cqid(n, cqid)) {
188
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
189
+
190
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
191
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
192
return NVME_INVALID_CQID | NVME_DNR;
193
}
194
- if (!sqid || !nvme_check_sqid(n, sqid)) {
195
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
196
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
197
return NVME_INVALID_QID | NVME_DNR;
198
}
199
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
200
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
201
+ trace_nvme_err_invalid_create_sq_size(qsize);
202
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
203
}
204
- if (!prp1 || prp1 & (n->page_size - 1)) {
205
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
206
+ trace_nvme_err_invalid_create_sq_addr(prp1);
207
return NVME_INVALID_FIELD | NVME_DNR;
208
}
209
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
210
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
211
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
212
return NVME_INVALID_FIELD | NVME_DNR;
213
}
214
sq = g_malloc0(sizeof(*sq));
215
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
216
NvmeCQueue *cq;
217
uint16_t qid = le16_to_cpu(c->qid);
218
219
- if (!qid || nvme_check_cqid(n, qid)) {
220
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
221
+ trace_nvme_err_invalid_del_cq_cqid(qid);
222
return NVME_INVALID_CQID | NVME_DNR;
223
}
224
225
cq = n->cq[qid];
226
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
227
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
228
+ trace_nvme_err_invalid_del_cq_notempty(qid);
229
return NVME_INVALID_QUEUE_DEL;
230
}
231
+ trace_nvme_del_cq(qid);
232
nvme_free_cq(cq, n);
233
return NVME_SUCCESS;
234
}
235
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
236
uint16_t qflags = le16_to_cpu(c->cq_flags);
237
uint64_t prp1 = le64_to_cpu(c->prp1);
238
239
- if (!cqid || !nvme_check_cqid(n, cqid)) {
240
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
241
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
242
+
243
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
244
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
245
return NVME_INVALID_CQID | NVME_DNR;
246
}
247
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
248
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
249
+ trace_nvme_err_invalid_create_cq_size(qsize);
250
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
251
}
252
- if (!prp1) {
253
+ if (unlikely(!prp1)) {
254
+ trace_nvme_err_invalid_create_cq_addr(prp1);
255
return NVME_INVALID_FIELD | NVME_DNR;
256
}
257
- if (vector > n->num_queues) {
258
+ if (unlikely(vector > n->num_queues)) {
259
+ trace_nvme_err_invalid_create_cq_vector(vector);
260
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
261
}
262
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
263
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
264
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
265
return NVME_INVALID_FIELD | NVME_DNR;
266
}
267
268
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
269
uint64_t prp1 = le64_to_cpu(c->prp1);
270
uint64_t prp2 = le64_to_cpu(c->prp2);
271
272
+ trace_nvme_identify_ctrl();
273
+
274
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
275
prp1, prp2);
276
}
277
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
278
uint64_t prp1 = le64_to_cpu(c->prp1);
279
uint64_t prp2 = le64_to_cpu(c->prp2);
280
281
- if (nsid == 0 || nsid > n->num_namespaces) {
282
+ trace_nvme_identify_ns(nsid);
283
+
284
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
285
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
286
return NVME_INVALID_NSID | NVME_DNR;
287
}
288
289
ns = &n->namespaces[nsid - 1];
290
+
291
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
292
prp1, prp2);
293
}
294
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
295
uint16_t ret;
296
int i, j = 0;
297
298
+ trace_nvme_identify_nslist(min_nsid);
299
+
300
list = g_malloc0(data_len);
301
for (i = 0; i < n->num_namespaces; i++) {
302
if (i < min_nsid) {
303
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
304
case 0x02:
305
return nvme_identify_nslist(n, c);
306
default:
307
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
308
return NVME_INVALID_FIELD | NVME_DNR;
309
}
310
}
311
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
312
switch (dw10) {
313
case NVME_VOLATILE_WRITE_CACHE:
314
result = blk_enable_write_cache(n->conf.blk);
315
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
316
break;
317
case NVME_NUMBER_OF_QUEUES:
318
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
319
+ trace_nvme_getfeat_numq(result);
320
break;
321
default:
322
+ trace_nvme_err_invalid_getfeat(dw10);
323
return NVME_INVALID_FIELD | NVME_DNR;
324
}
325
326
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
327
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
328
break;
329
case NVME_NUMBER_OF_QUEUES:
330
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
331
+ ((dw11 >> 16) & 0xFFFF) + 1,
332
+ n->num_queues - 1, n->num_queues - 1);
333
req->cqe.result =
334
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
335
break;
336
default:
337
+ trace_nvme_err_invalid_setfeat(dw10);
338
return NVME_INVALID_FIELD | NVME_DNR;
339
}
340
return NVME_SUCCESS;
341
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
342
case NVME_ADM_CMD_GET_FEATURES:
343
return nvme_get_feature(n, cmd, req);
344
default:
345
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
346
return NVME_INVALID_OPCODE | NVME_DNR;
347
}
348
}
349
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
350
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
351
uint32_t page_size = 1 << page_bits;
352
353
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
354
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
355
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
356
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
357
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
358
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
359
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
360
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
361
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
362
+ if (unlikely(n->cq[0])) {
363
+ trace_nvme_err_startfail_cq();
364
+ return -1;
365
+ }
366
+ if (unlikely(n->sq[0])) {
367
+ trace_nvme_err_startfail_sq();
368
+ return -1;
369
+ }
370
+ if (unlikely(!n->bar.asq)) {
371
+ trace_nvme_err_startfail_nbarasq();
372
+ return -1;
373
+ }
374
+ if (unlikely(!n->bar.acq)) {
375
+ trace_nvme_err_startfail_nbaracq();
376
+ return -1;
377
+ }
378
+ if (unlikely(n->bar.asq & (page_size - 1))) {
379
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
380
+ return -1;
381
+ }
382
+ if (unlikely(n->bar.acq & (page_size - 1))) {
383
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
384
+ return -1;
385
+ }
386
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
387
+ NVME_CAP_MPSMIN(n->bar.cap))) {
388
+ trace_nvme_err_startfail_page_too_small(
389
+ NVME_CC_MPS(n->bar.cc),
390
+ NVME_CAP_MPSMIN(n->bar.cap));
391
+ return -1;
392
+ }
393
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
394
+ NVME_CAP_MPSMAX(n->bar.cap))) {
395
+ trace_nvme_err_startfail_page_too_large(
396
+ NVME_CC_MPS(n->bar.cc),
397
+ NVME_CAP_MPSMAX(n->bar.cap));
398
+ return -1;
399
+ }
400
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
401
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
402
+ trace_nvme_err_startfail_cqent_too_small(
403
+ NVME_CC_IOCQES(n->bar.cc),
404
+ NVME_CTRL_CQES_MIN(n->bar.cap));
405
+ return -1;
406
+ }
407
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
408
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
409
+ trace_nvme_err_startfail_cqent_too_large(
410
+ NVME_CC_IOCQES(n->bar.cc),
411
+ NVME_CTRL_CQES_MAX(n->bar.cap));
412
+ return -1;
413
+ }
414
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
415
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
416
+ trace_nvme_err_startfail_sqent_too_small(
417
+ NVME_CC_IOSQES(n->bar.cc),
418
+ NVME_CTRL_SQES_MIN(n->bar.cap));
419
+ return -1;
420
+ }
421
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
422
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
423
+ trace_nvme_err_startfail_sqent_too_large(
424
+ NVME_CC_IOSQES(n->bar.cc),
425
+ NVME_CTRL_SQES_MAX(n->bar.cap));
426
+ return -1;
427
+ }
428
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
429
+ trace_nvme_err_startfail_asqent_sz_zero();
430
+ return -1;
431
+ }
432
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
433
+ trace_nvme_err_startfail_acqent_sz_zero();
434
return -1;
435
}
436
437
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
438
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
439
unsigned size)
440
{
441
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
442
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
443
+ "MMIO write not 32-bit aligned,"
444
+ " offset=0x%"PRIx64"", offset);
445
+ /* should be ignored, fall through for now */
446
+ }
447
+
448
+ if (unlikely(size < sizeof(uint32_t))) {
449
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
450
+ "MMIO write smaller than 32-bits,"
451
+ " offset=0x%"PRIx64", size=%u",
452
+ offset, size);
453
+ /* should be ignored, fall through for now */
454
+ }
455
+
456
switch (offset) {
457
- case 0xc:
458
+ case 0xc: /* INTMS */
459
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
460
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
461
+ "undefined access to interrupt mask set"
462
+ " when MSI-X is enabled");
463
+ /* should be ignored, fall through for now */
464
+ }
465
n->bar.intms |= data & 0xffffffff;
466
n->bar.intmc = n->bar.intms;
467
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
468
+ n->bar.intmc);
469
break;
470
- case 0x10:
471
+ case 0x10: /* INTMC */
472
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
473
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
474
+ "undefined access to interrupt mask clr"
475
+ " when MSI-X is enabled");
476
+ /* should be ignored, fall through for now */
477
+ }
478
n->bar.intms &= ~(data & 0xffffffff);
479
n->bar.intmc = n->bar.intms;
480
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
481
+ n->bar.intmc);
482
break;
483
- case 0x14:
484
+ case 0x14: /* CC */
485
+ trace_nvme_mmio_cfg(data & 0xffffffff);
486
/* Windows first sends data, then sends enable bit */
487
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
488
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
489
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
490
491
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
492
n->bar.cc = data;
493
- if (nvme_start_ctrl(n)) {
494
+ if (unlikely(nvme_start_ctrl(n))) {
495
+ trace_nvme_err_startfail();
496
n->bar.csts = NVME_CSTS_FAILED;
497
} else {
498
+ trace_nvme_mmio_start_success();
499
n->bar.csts = NVME_CSTS_READY;
500
}
501
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
502
+ trace_nvme_mmio_stopped();
503
nvme_clear_ctrl(n);
504
n->bar.csts &= ~NVME_CSTS_READY;
505
}
506
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
507
- nvme_clear_ctrl(n);
508
- n->bar.cc = data;
509
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
510
+ trace_nvme_mmio_shutdown_set();
511
+ nvme_clear_ctrl(n);
512
+ n->bar.cc = data;
513
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
514
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
515
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
516
- n->bar.cc = data;
517
+ trace_nvme_mmio_shutdown_cleared();
518
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
519
+ n->bar.cc = data;
520
+ }
521
+ break;
522
+ case 0x1C: /* CSTS */
523
+ if (data & (1 << 4)) {
524
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
525
+ "attempted to W1C CSTS.NSSRO"
526
+ " but CAP.NSSRS is zero (not supported)");
527
+ } else if (data != 0) {
528
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
529
+ "attempted to set a read only bit"
530
+ " of controller status");
531
+ }
532
+ break;
533
+ case 0x20: /* NSSR */
534
+ if (data == 0x4E564D65) {
535
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
536
+ } else {
537
+ /* The spec says that writes of other values have no effect */
538
+ return;
539
}
540
break;
541
- case 0x24:
542
+ case 0x24: /* AQA */
543
n->bar.aqa = data & 0xffffffff;
544
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
545
break;
546
- case 0x28:
547
+ case 0x28: /* ASQ */
548
n->bar.asq = data;
549
+ trace_nvme_mmio_asqaddr(data);
550
break;
551
- case 0x2c:
552
+ case 0x2c: /* ASQ hi */
553
n->bar.asq |= data << 32;
554
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
555
break;
556
- case 0x30:
557
+ case 0x30: /* ACQ */
558
+ trace_nvme_mmio_acqaddr(data);
559
n->bar.acq = data;
560
break;
561
- case 0x34:
562
+ case 0x34: /* ACQ hi */
563
n->bar.acq |= data << 32;
564
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
565
break;
566
+ case 0x38: /* CMBLOC */
567
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
568
+ "invalid write to reserved CMBLOC"
569
+ " when CMBSZ is zero, ignored");
570
+ return;
571
+ case 0x3C: /* CMBSZ */
572
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
573
+ "invalid write to read only CMBSZ, ignored");
574
+ return;
575
default:
576
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
577
+ "invalid MMIO write,"
578
+ " offset=0x%"PRIx64", data=%"PRIx64"",
579
+ offset, data);
580
break;
581
}
582
}
583
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
584
uint8_t *ptr = (uint8_t *)&n->bar;
585
uint64_t val = 0;
586
587
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
588
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
589
+ "MMIO read not 32-bit aligned,"
590
+ " offset=0x%"PRIx64"", addr);
591
+ /* should RAZ, fall through for now */
592
+ } else if (unlikely(size < sizeof(uint32_t))) {
593
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
594
+ "MMIO read smaller than 32-bits,"
595
+ " offset=0x%"PRIx64"", addr);
596
+ /* should RAZ, fall through for now */
597
+ }
598
+
599
if (addr < sizeof(n->bar)) {
600
memcpy(&val, ptr + addr, size);
601
+ } else {
602
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
603
+ "MMIO read beyond last register,"
604
+ " offset=0x%"PRIx64", returning 0", addr);
605
}
606
+
607
return val;
608
}
609
610
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
611
{
612
uint32_t qid;
613
614
- if (addr & ((1 << 2) - 1)) {
615
+ if (unlikely(addr & ((1 << 2) - 1))) {
616
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
617
+ "doorbell write not 32-bit aligned,"
618
+ " offset=0x%"PRIx64", ignoring", addr);
619
return;
620
}
621
622
if (((addr - 0x1000) >> 2) & 1) {
623
+ /* Completion queue doorbell write */
624
+
625
uint16_t new_head = val & 0xffff;
626
int start_sqs;
627
NvmeCQueue *cq;
628
629
qid = (addr - (0x1000 + (1 << 2))) >> 3;
630
- if (nvme_check_cqid(n, qid)) {
631
+ if (unlikely(nvme_check_cqid(n, qid))) {
632
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
633
+ "completion queue doorbell write"
634
+ " for nonexistent queue,"
635
+ " sqid=%"PRIu32", ignoring", qid);
636
return;
637
}
638
639
cq = n->cq[qid];
640
- if (new_head >= cq->size) {
641
+ if (unlikely(new_head >= cq->size)) {
642
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
643
+ "completion queue doorbell write value"
644
+ " beyond queue size, sqid=%"PRIu32","
645
+ " new_head=%"PRIu16", ignoring",
646
+ qid, new_head);
647
return;
648
}
649
650
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
651
nvme_isr_notify(n, cq);
652
}
653
} else {
654
+ /* Submission queue doorbell write */
655
+
656
uint16_t new_tail = val & 0xffff;
657
NvmeSQueue *sq;
658
659
qid = (addr - 0x1000) >> 3;
660
- if (nvme_check_sqid(n, qid)) {
661
+ if (unlikely(nvme_check_sqid(n, qid))) {
662
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
663
+ "submission queue doorbell write"
664
+ " for nonexistent queue,"
665
+ " sqid=%"PRIu32", ignoring", qid);
666
return;
667
}
668
669
sq = n->sq[qid];
670
- if (new_tail >= sq->size) {
671
+ if (unlikely(new_tail >= sq->size)) {
672
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
673
+ "submission queue doorbell write value"
674
+ " beyond queue size, sqid=%"PRIu32","
675
+ " new_tail=%"PRIu16", ignoring",
676
+ qid, new_tail);
677
return;
678
}
679
680
diff --git a/hw/block/trace-events b/hw/block/trace-events
681
index XXXXXXX..XXXXXXX 100644
682
--- a/hw/block/trace-events
683
+++ b/hw/block/trace-events
684
@@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
685
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
686
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
687
688
+# hw/block/nvme.c
689
+# nvme traces for successful events
690
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
691
+nvme_irq_pin(void) "pulsing IRQ pin"
692
+nvme_irq_masked(void) "IRQ is masked"
693
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
694
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
695
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
696
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
697
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
698
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
699
+nvme_identify_ctrl(void) "identify controller"
700
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
701
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
702
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
703
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
704
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
705
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
706
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
707
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
708
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
709
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
710
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
711
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
712
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
713
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
714
+nvme_mmio_stopped(void) "cleared controller enable bit"
715
+nvme_mmio_shutdown_set(void) "shutdown bit set"
716
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
717
+
718
+# nvme traces for error conditions
719
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
720
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
721
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
722
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
723
+nvme_err_invalid_field(void) "invalid field"
724
+nvme_err_invalid_prp(void) "invalid PRP"
725
+nvme_err_invalid_sgl(void) "invalid SGL"
726
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
727
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
728
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
729
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
730
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
731
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
732
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
733
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
734
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
735
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
736
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
737
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
738
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
739
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
740
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
741
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
742
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
743
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
744
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
745
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
746
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
747
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
748
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
749
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
750
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
751
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
752
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
753
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
754
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
755
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
756
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
757
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
758
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
759
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
760
+nvme_err_startfail(void) "setting controller enable bit failed"
761
+
762
+# Traces for undefined behavior
763
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
764
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
765
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
766
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
767
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
768
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
769
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
770
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
771
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
772
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
773
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
774
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
775
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
776
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
777
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
778
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
779
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
780
+
781
# hw/block/xen_disk.c
782
xen_disk_alloc(char *name) "%s"
783
xen_disk_init(char *name) "%s"
784
--
785
2.13.6
786
787
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Fam Zheng <famz@redhat.com>
2
2
3
bdrv_drop_intermediate() calls BdrvChildRole.update_filename(). That
3
Management tools create overlays of running guests with qemu-img:
4
may poll, thus changing the graph, which potentially breaks the
5
QLIST_FOREACH_SAFE() loop.
6
4
7
Just keep the whole subtree drained. This is probably the right thing
5
$ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2
8
to do anyway (dropping nodes while the subtree is not drained seems
9
wrong).
10
6
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
but this doesn't work anymore due to image locking:
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
9
qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock
10
Is another process using the image?
11
Could not open backing image to determine size.
12
Use the force share option to allow this use case again.
13
14
Cc: qemu-stable@nongnu.org
15
Signed-off-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
18
---
15
block.c | 2 ++
19
block.c | 3 ++-
16
1 file changed, 2 insertions(+)
20
1 file changed, 2 insertions(+), 1 deletion(-)
17
21
18
diff --git a/block.c b/block.c
22
diff --git a/block.c b/block.c
19
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
20
--- a/block.c
24
--- a/block.c
21
+++ b/block.c
25
+++ b/block.c
22
@@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
26
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
23
int ret = -EIO;
27
back_flags = flags;
24
28
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
25
bdrv_ref(top);
29
26
+ bdrv_subtree_drained_begin(top);
30
+ backing_options = qdict_new();
27
31
if (backing_fmt) {
28
if (!top->drv || !base->drv) {
32
- backing_options = qdict_new();
29
goto exit;
33
qdict_put_str(backing_options, "driver", backing_fmt);
30
@@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
34
}
31
35
+ qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
32
ret = 0;
36
33
exit:
37
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
34
+ bdrv_subtree_drained_end(top);
38
&local_err);
35
bdrv_unref(top);
36
return ret;
37
}
38
--
39
--
39
2.20.1
40
2.13.6
40
41
41
42
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
It's not working anymore since QEMU v1.3.0 - time to remove it now.
4
5
Signed-off-by: Thomas Huth <thuth@redhat.com>
6
Reviewed-by: John Snow <jsnow@redhat.com>
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
blockdev.c | 11 -----------
11
qemu-doc.texi | 6 ------
12
2 files changed, 17 deletions(-)
13
14
diff --git a/blockdev.c b/blockdev.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/blockdev.c
17
+++ b/blockdev.c
18
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = {
19
.type = QEMU_OPT_STRING,
20
.help = "chs translation (auto, lba, none)",
21
},{
22
- .name = "boot",
23
- .type = QEMU_OPT_BOOL,
24
- .help = "(deprecated, ignored)",
25
- },{
26
.name = "addr",
27
.type = QEMU_OPT_STRING,
28
.help = "pci address (virtio only)",
29
@@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
30
goto fail;
31
}
32
33
- /* Deprecated option boot=[on|off] */
34
- if (qemu_opt_get(legacy_opts, "boot") != NULL) {
35
- fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
36
- "ignored. Future versions will reject this parameter. Please "
37
- "update your scripts.\n");
38
- }
39
-
40
/* Other deprecated options */
41
if (!qtest_enabled()) {
42
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
43
diff --git a/qemu-doc.texi b/qemu-doc.texi
44
index XXXXXXX..XXXXXXX 100644
45
--- a/qemu-doc.texi
46
+++ b/qemu-doc.texi
47
@@ -XXX,XX +XXX,XX @@ deprecated.
48
49
@section System emulator command line arguments
50
51
-@subsection -drive boot=on|off (since 1.3.0)
52
-
53
-The ``boot=on|off'' option to the ``-drive'' argument is
54
-ignored. Applications should use the ``bootindex=N'' parameter
55
-to set an absolute ordering between devices instead.
56
-
57
@subsection -tdf (since 1.3.0)
58
59
The ``-tdf'' argument is ignored. The behaviour implemented
60
--
61
2.13.6
62
63
diff view generated by jsdifflib
New patch
1
1
From: Thomas Huth <thuth@redhat.com>
2
3
It's been marked as deprecated since QEMU v2.10.0, and so far nobody
4
complained that we should keep it, so let's remove this legacy option
5
now to simplify the code quite a bit.
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
vl.c | 86 ++-------------------------------------------------------
13
qemu-doc.texi | 8 ------
14
qemu-options.hx | 19 ++-----------
15
3 files changed, 4 insertions(+), 109 deletions(-)
16
17
diff --git a/vl.c b/vl.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/vl.c
20
+++ b/vl.c
21
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
22
const char *boot_order = NULL;
23
const char *boot_once = NULL;
24
DisplayState *ds;
25
- int cyls, heads, secs, translation;
26
QemuOpts *opts, *machine_opts;
27
- QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
28
+ QemuOpts *icount_opts = NULL, *accel_opts = NULL;
29
QemuOptsList *olist;
30
int optind;
31
const char *optarg;
32
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
33
34
cpu_model = NULL;
35
snapshot = 0;
36
- cyls = heads = secs = 0;
37
- translation = BIOS_ATA_TRANSLATION_AUTO;
38
39
nb_nics = 0;
40
41
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
42
if (optind >= argc)
43
break;
44
if (argv[optind][0] != '-') {
45
- hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
46
+ drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
47
} else {
48
const QEMUOption *popt;
49
50
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
51
cpu_model = optarg;
52
break;
53
case QEMU_OPTION_hda:
54
- {
55
- char buf[256];
56
- if (cyls == 0)
57
- snprintf(buf, sizeof(buf), "%s", HD_OPTS);
58
- else
59
- snprintf(buf, sizeof(buf),
60
- "%s,cyls=%d,heads=%d,secs=%d%s",
61
- HD_OPTS , cyls, heads, secs,
62
- translation == BIOS_ATA_TRANSLATION_LBA ?
63
- ",trans=lba" :
64
- translation == BIOS_ATA_TRANSLATION_NONE ?
65
- ",trans=none" : "");
66
- drive_add(IF_DEFAULT, 0, optarg, buf);
67
- break;
68
- }
69
case QEMU_OPTION_hdb:
70
case QEMU_OPTION_hdc:
71
case QEMU_OPTION_hdd:
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
73
case QEMU_OPTION_snapshot:
74
snapshot = 1;
75
break;
76
- case QEMU_OPTION_hdachs:
77
- {
78
- const char *p;
79
- p = optarg;
80
- cyls = strtol(p, (char **)&p, 0);
81
- if (cyls < 1 || cyls > 16383)
82
- goto chs_fail;
83
- if (*p != ',')
84
- goto chs_fail;
85
- p++;
86
- heads = strtol(p, (char **)&p, 0);
87
- if (heads < 1 || heads > 16)
88
- goto chs_fail;
89
- if (*p != ',')
90
- goto chs_fail;
91
- p++;
92
- secs = strtol(p, (char **)&p, 0);
93
- if (secs < 1 || secs > 63)
94
- goto chs_fail;
95
- if (*p == ',') {
96
- p++;
97
- if (!strcmp(p, "large")) {
98
- translation = BIOS_ATA_TRANSLATION_LARGE;
99
- } else if (!strcmp(p, "rechs")) {
100
- translation = BIOS_ATA_TRANSLATION_RECHS;
101
- } else if (!strcmp(p, "none")) {
102
- translation = BIOS_ATA_TRANSLATION_NONE;
103
- } else if (!strcmp(p, "lba")) {
104
- translation = BIOS_ATA_TRANSLATION_LBA;
105
- } else if (!strcmp(p, "auto")) {
106
- translation = BIOS_ATA_TRANSLATION_AUTO;
107
- } else {
108
- goto chs_fail;
109
- }
110
- } else if (*p != '\0') {
111
- chs_fail:
112
- error_report("invalid physical CHS format");
113
- exit(1);
114
- }
115
- if (hda_opts != NULL) {
116
- qemu_opt_set_number(hda_opts, "cyls", cyls,
117
- &error_abort);
118
- qemu_opt_set_number(hda_opts, "heads", heads,
119
- &error_abort);
120
- qemu_opt_set_number(hda_opts, "secs", secs,
121
- &error_abort);
122
- if (translation == BIOS_ATA_TRANSLATION_LARGE) {
123
- qemu_opt_set(hda_opts, "trans", "large",
124
- &error_abort);
125
- } else if (translation == BIOS_ATA_TRANSLATION_RECHS) {
126
- qemu_opt_set(hda_opts, "trans", "rechs",
127
- &error_abort);
128
- } else if (translation == BIOS_ATA_TRANSLATION_LBA) {
129
- qemu_opt_set(hda_opts, "trans", "lba",
130
- &error_abort);
131
- } else if (translation == BIOS_ATA_TRANSLATION_NONE) {
132
- qemu_opt_set(hda_opts, "trans", "none",
133
- &error_abort);
134
- }
135
- }
136
- }
137
- error_report("'-hdachs' is deprecated, please use '-device"
138
- " ide-hd,cyls=c,heads=h,secs=s,...' instead");
139
- break;
140
case QEMU_OPTION_numa:
141
opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
142
optarg, true);
143
diff --git a/qemu-doc.texi b/qemu-doc.texi
144
index XXXXXXX..XXXXXXX 100644
145
--- a/qemu-doc.texi
146
+++ b/qemu-doc.texi
147
@@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the
148
``-object filter-dump'' argument which works in combination
149
with the modern ``-netdev`` backends instead.
150
151
-@subsection -hdachs (since 2.10.0)
152
-
153
-The ``-hdachs'' argument is now a synonym for setting
154
-the ``cyls'', ``heads'', ``secs'', and ``trans'' properties
155
-on the ``ide-hd'' device using the ``-device'' argument.
156
-The new syntax allows different settings to be provided
157
-per disk.
158
-
159
@subsection -usbdevice (since 2.10.0)
160
161
The ``-usbdevice DEV'' argument is now a synonym for setting
162
diff --git a/qemu-options.hx b/qemu-options.hx
163
index XXXXXXX..XXXXXXX 100644
164
--- a/qemu-options.hx
165
+++ b/qemu-options.hx
166
@@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type.
167
@item media=@var{media}
168
This option defines the type of the media: disk or cdrom.
169
@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
170
-These options have the same definition as they have in @option{-hdachs}.
171
-These parameters are deprecated, use the corresponding parameters
172
+Force disk physical geometry and the optional BIOS translation (trans=none or
173
+lba). These parameters are deprecated, use the corresponding parameters
174
of @code{-device} instead.
175
@item snapshot=@var{snapshot}
176
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
177
@@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force
178
the write back by pressing @key{C-a s} (@pxref{disk_images}).
179
ETEXI
180
181
-DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \
182
- "-hdachs c,h,s[,t]\n" \
183
- " force hard disk 0 physical geometry and the optional BIOS\n" \
184
- " translation (t=none or lba) (usually QEMU can guess them)\n",
185
- QEMU_ARCH_ALL)
186
-STEXI
187
-@item -hdachs @var{c},@var{h},@var{s},[,@var{t}]
188
-@findex -hdachs
189
-Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <=
190
-@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS
191
-translation mode (@var{t}=none, lba or auto). Usually QEMU can guess
192
-all those parameters. This option is deprecated, please use
193
-@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead.
194
-ETEXI
195
-
196
DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
197
"-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
198
" [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n"
199
--
200
2.13.6
201
202
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
Looks like we forgot to announce the deprecation of these options in
4
the corresponding chapter of the qemu-doc text, so let's do that now.
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Reviewed-by: John Snow <jsnow@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
qemu-doc.texi | 15 +++++++++++++++
12
1 file changed, 15 insertions(+)
13
14
diff --git a/qemu-doc.texi b/qemu-doc.texi
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-doc.texi
17
+++ b/qemu-doc.texi
18
@@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU.
19
The ``-drive if=scsi'' argument is replaced by the the
20
``-device BUS-TYPE'' argument combined with ``-drive if=none''.
21
22
+@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
23
+
24
+The drive geometry arguments are replaced by the the geometry arguments
25
+that can be specified with the ``-device'' parameter.
26
+
27
+@subsection -drive serial=... (since 2.10.0)
28
+
29
+The drive serial argument is replaced by the the serial argument
30
+that can be specified with the ``-device'' parameter.
31
+
32
+@subsection -drive addr=... (since 2.10.0)
33
+
34
+The drive addr argument is replaced by the the addr argument
35
+that can be specified with the ``-device'' parameter.
36
+
37
@subsection -net dump (since 2.10.0)
38
39
The ``--net dump'' argument is now replaced with the
40
--
41
2.13.6
42
43
diff view generated by jsdifflib
1
The functionality offered by blk_pread_unthrottled() goes back to commit
1
From: Fam Zheng <famz@redhat.com>
2
498e386c584. Then, we couldn't perform I/O throttling with synchronous
3
requests because timers wouldn't be executed in polling loops. So the
4
commit automatically disabled I/O throttling as soon as a synchronous
5
request was issued.
6
2
7
However, for geometry detection during disk initialisation, we always
3
Signed-off-by: Fam Zheng <famz@redhat.com>
8
used (and still use) synchronous requests even if guest requests use AIO
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
later. Geometry detection was not wanted to disable I/O throttling, so
5
---
10
bdrv_pread_unthrottled() was introduced which disabled throttling only
6
include/block/block_int.h | 1 -
11
temporarily.
7
block/io.c | 18 ------------------
8
2 files changed, 19 deletions(-)
12
9
13
All of this isn't necessary any more because we do run timers in polling
10
diff --git a/include/block/block_int.h b/include/block/block_int.h
14
loop and even synchronous requests are now using coroutine
15
infrastructure internally. For this reason, commit 90c78624f already
16
removed the automatic disabling of I/O throttling.
17
18
It's time to get rid of the workaround for the removed code, and its
19
abuse of blk_root_drained_begin()/end(), as well.
20
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
Reviewed-by: Max Reitz <mreitz@redhat.com>
23
---
24
include/sysemu/block-backend.h | 2 --
25
block/block-backend.c | 16 ----------------
26
hw/block/hd-geometry.c | 7 +------
27
3 files changed, 1 insertion(+), 24 deletions(-)
28
29
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
30
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
31
--- a/include/sysemu/block-backend.h
12
--- a/include/block/block_int.h
32
+++ b/include/sysemu/block-backend.h
13
+++ b/include/block/block_int.h
33
@@ -XXX,XX +XXX,XX @@ char *blk_get_attached_dev_id(BlockBackend *blk);
14
@@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk);
34
BlockBackend *blk_by_dev(void *dev);
15
bool blk_dev_is_medium_locked(BlockBackend *blk);
35
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
16
36
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
17
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
37
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
18
-bool bdrv_requests_pending(BlockDriverState *bs);
38
- int bytes);
19
39
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
20
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
40
unsigned int bytes, QEMUIOVector *qiov,
21
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
41
BdrvRequestFlags flags);
22
diff --git a/block/io.c b/block/io.c
42
diff --git a/block/block-backend.c b/block/block-backend.c
43
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
44
--- a/block/block-backend.c
24
--- a/block/io.c
45
+++ b/block/block-backend.c
25
+++ b/block/io.c
46
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
26
@@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
47
return rwco.ret;
27
assert(old >= 1);
48
}
28
}
49
29
50
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
30
-/* Check if any requests are in-flight (including throttled requests) */
51
- int count)
31
-bool bdrv_requests_pending(BlockDriverState *bs)
52
-{
32
-{
53
- int ret;
33
- BdrvChild *child;
54
-
34
-
55
- ret = blk_check_byte_request(blk, offset, count);
35
- if (atomic_read(&bs->in_flight)) {
56
- if (ret < 0) {
36
- return true;
57
- return ret;
58
- }
37
- }
59
-
38
-
60
- blk_root_drained_begin(blk->root);
39
- QLIST_FOREACH(child, &bs->children, next) {
61
- ret = blk_pread(blk, offset, buf, count);
40
- if (bdrv_requests_pending(child->bs)) {
62
- blk_root_drained_end(blk->root, NULL);
41
- return true;
63
- return ret;
42
- }
43
- }
44
-
45
- return false;
64
-}
46
-}
65
-
47
-
66
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
48
typedef struct {
67
int bytes, BdrvRequestFlags flags)
49
Coroutine *co;
68
{
50
BlockDriverState *bs;
69
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/hw/block/hd-geometry.c
72
+++ b/hw/block/hd-geometry.c
73
@@ -XXX,XX +XXX,XX @@ static int guess_disk_lchs(BlockBackend *blk,
74
75
blk_get_geometry(blk, &nb_sectors);
76
77
- /**
78
- * The function will be invoked during startup not only in sync I/O mode,
79
- * but also in async I/O mode. So the I/O throttling function has to
80
- * be disabled temporarily here, not permanently.
81
- */
82
- if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
83
+ if (blk_pread(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
84
return -1;
85
}
86
/* test msdos magic */
87
--
51
--
88
2.20.1
52
2.13.6
89
53
90
54
diff view generated by jsdifflib
New patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Fam Zheng <famz@redhat.com>
3
---
4
block/io.c | 6 ++++++
5
1 file changed, 6 insertions(+)
1
6
7
diff --git a/block/io.c b/block/io.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/block/io.c
10
+++ b/block/io.c
11
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
12
BdrvNextIterator it;
13
GSList *aio_ctxs = NULL, *ctx;
14
15
+ /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
16
+ * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
17
+ * nodes in several different AioContexts, so make sure we're in the main
18
+ * context. */
19
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
20
+
21
block_job_pause_all();
22
23
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
24
--
25
2.13.6
26
27
diff view generated by jsdifflib
New patch
1
bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively
2
and also doesn't notify other parent nodes of children, which both means
3
that the child nodes are not actually drained, and bdrv_drained_begin()
4
is providing useful functionality only on a single node.
1
5
6
To keep things consistent, we also shouldn't call the block driver
7
callbacks recursively.
8
9
A proper recursive drain version that provides an actually working
10
drained section for child nodes will be introduced later.
11
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
---
15
block/io.c | 16 +++++++++-------
16
1 file changed, 9 insertions(+), 7 deletions(-)
17
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
23
}
24
25
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
26
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
27
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
28
{
29
BdrvChild *child, *tmp;
30
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
31
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
32
bdrv_coroutine_enter(bs, data.co);
33
BDRV_POLL_WHILE(bs, !data.done);
34
35
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
36
- bdrv_drain_invoke(child->bs, begin);
37
+ if (recursive) {
38
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
39
+ bdrv_drain_invoke(child->bs, begin, true);
40
+ }
41
}
42
}
43
44
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
45
bdrv_parent_drained_begin(bs);
46
}
47
48
- bdrv_drain_invoke(bs, true);
49
+ bdrv_drain_invoke(bs, true, false);
50
bdrv_drain_recurse(bs);
51
}
52
53
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
54
}
55
56
/* Re-enable things in child-to-parent order */
57
- bdrv_drain_invoke(bs, false);
58
+ bdrv_drain_invoke(bs, false, false);
59
bdrv_parent_drained_end(bs);
60
aio_enable_external(bdrv_get_aio_context(bs));
61
}
62
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
63
aio_context_acquire(aio_context);
64
aio_disable_external(aio_context);
65
bdrv_parent_drained_begin(bs);
66
- bdrv_drain_invoke(bs, true);
67
+ bdrv_drain_invoke(bs, true, true);
68
aio_context_release(aio_context);
69
70
if (!g_slist_find(aio_ctxs, aio_context)) {
71
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
72
73
/* Re-enable things in child-to-parent order */
74
aio_context_acquire(aio_context);
75
- bdrv_drain_invoke(bs, false);
76
+ bdrv_drain_invoke(bs, false, true);
77
bdrv_parent_drained_end(bs);
78
aio_enable_external(aio_context);
79
aio_context_release(aio_context);
80
--
81
2.13.6
82
83
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
The existing test is for bdrv_drain_all_begin/end() only. Generalise the
2
test case so that it can be run for the other variants as well. At the
3
moment this is only bdrv_drain_begin/end(), but in a while, we'll add
4
another one.
2
5
3
Add a test for what happens when you call bdrv_replace_child_noperm()
6
Also, add a backing file to the test node to test whether the operations
4
for various drain situations ({old,new} child {drained,not drained}).
7
work recursively.
5
8
6
Most importantly, if both the old and the new child are drained, the
7
parent must not be undrained at any point.
8
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
10
---
12
tests/test-bdrv-drain.c | 308 ++++++++++++++++++++++++++++++++++++++++
11
tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-----
13
1 file changed, 308 insertions(+)
12
1 file changed, 62 insertions(+), 7 deletions(-)
14
13
15
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/test-bdrv-drain.c
16
--- a/tests/test-bdrv-drain.c
18
+++ b/tests/test-bdrv-drain.c
17
+++ b/tests/test-bdrv-drain.c
19
@@ -XXX,XX +XXX,XX @@ static void test_drop_intermediate_poll(void)
18
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
20
bdrv_unref(chain[2]);
19
20
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
21
.bdrv_co_drain_end = bdrv_test_co_drain_end,
22
+
23
+ .bdrv_child_perm = bdrv_format_default_perms,
24
};
25
26
static void aio_ret_cb(void *opaque, int ret)
27
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
28
*aio_ret = ret;
21
}
29
}
22
30
31
-static void test_drv_cb_drain_all(void)
32
+enum drain_type {
33
+ BDRV_DRAIN_ALL,
34
+ BDRV_DRAIN,
35
+};
23
+
36
+
24
+typedef struct BDRVReplaceTestState {
37
+static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
25
+ bool was_drained;
26
+ bool was_undrained;
27
+ bool has_read;
28
+
29
+ int drain_count;
30
+
31
+ bool yield_before_read;
32
+ Coroutine *io_co;
33
+ Coroutine *drain_co;
34
+} BDRVReplaceTestState;
35
+
36
+static void bdrv_replace_test_close(BlockDriverState *bs)
37
+{
38
+{
38
+}
39
+ switch (drain_type) {
39
+
40
+ case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
40
+/**
41
+ case BDRV_DRAIN: bdrv_drained_begin(bs); break;
41
+ * If @bs has a backing file:
42
+ default: g_assert_not_reached();
42
+ * Yield if .yield_before_read is true (and wait for drain_begin to
43
+ * wake us up).
44
+ * Forward the read to bs->backing. Set .has_read to true.
45
+ * If drain_begin has woken us, wake it in turn.
46
+ *
47
+ * Otherwise:
48
+ * Set .has_read to true and return success.
49
+ */
50
+static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
51
+ uint64_t offset,
52
+ uint64_t bytes,
53
+ QEMUIOVector *qiov,
54
+ int flags)
55
+{
56
+ BDRVReplaceTestState *s = bs->opaque;
57
+
58
+ if (bs->backing) {
59
+ int ret;
60
+
61
+ g_assert(!s->drain_count);
62
+
63
+ s->io_co = qemu_coroutine_self();
64
+ if (s->yield_before_read) {
65
+ s->yield_before_read = false;
66
+ qemu_coroutine_yield();
67
+ }
68
+ s->io_co = NULL;
69
+
70
+ ret = bdrv_preadv(bs->backing, offset, qiov);
71
+ s->has_read = true;
72
+
73
+ /* Wake up drain_co if it runs */
74
+ if (s->drain_co) {
75
+ aio_co_wake(s->drain_co);
76
+ }
77
+
78
+ return ret;
79
+ }
80
+
81
+ s->has_read = true;
82
+ return 0;
83
+}
84
+
85
+/**
86
+ * If .drain_count is 0, wake up .io_co if there is one; and set
87
+ * .was_drained.
88
+ * Increment .drain_count.
89
+ */
90
+static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
91
+{
92
+ BDRVReplaceTestState *s = bs->opaque;
93
+
94
+ if (!s->drain_count) {
95
+ /* Keep waking io_co up until it is done */
96
+ s->drain_co = qemu_coroutine_self();
97
+ while (s->io_co) {
98
+ aio_co_wake(s->io_co);
99
+ s->io_co = NULL;
100
+ qemu_coroutine_yield();
101
+ }
102
+ s->drain_co = NULL;
103
+
104
+ s->was_drained = true;
105
+ }
106
+ s->drain_count++;
107
+}
108
+
109
+/**
110
+ * Reduce .drain_count, set .was_undrained once it reaches 0.
111
+ * If .drain_count reaches 0 and the node has a backing file, issue a
112
+ * read request.
113
+ */
114
+static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
115
+{
116
+ BDRVReplaceTestState *s = bs->opaque;
117
+
118
+ g_assert(s->drain_count > 0);
119
+ if (!--s->drain_count) {
120
+ int ret;
121
+
122
+ s->was_undrained = true;
123
+
124
+ if (bs->backing) {
125
+ char data;
126
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
127
+
128
+ /* Queue a read request post-drain */
129
+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
130
+ g_assert(ret >= 0);
131
+ }
132
+ }
43
+ }
133
+}
44
+}
134
+
45
+
135
+static BlockDriver bdrv_replace_test = {
46
+static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
136
+ .format_name = "replace_test",
137
+ .instance_size = sizeof(BDRVReplaceTestState),
138
+
139
+ .bdrv_close = bdrv_replace_test_close,
140
+ .bdrv_co_preadv = bdrv_replace_test_co_preadv,
141
+
142
+ .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
143
+ .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
144
+
145
+ .bdrv_child_perm = bdrv_format_default_perms,
146
+};
147
+
148
+static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
149
+{
47
+{
150
+ int ret;
48
+ switch (drain_type) {
151
+ char data;
49
+ case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
152
+
50
+ case BDRV_DRAIN: bdrv_drained_end(bs); break;
153
+ ret = blk_co_pread(opaque, 0, 1, &data, 0);
51
+ default: g_assert_not_reached();
154
+ g_assert(ret >= 0);
52
+ }
155
+}
53
+}
156
+
54
+
157
+/**
55
+static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
158
+ * We test two things:
56
{
159
+ * (1) bdrv_replace_child_noperm() must not undrain the parent if both
57
BlockBackend *blk;
160
+ * children are drained.
58
- BlockDriverState *bs;
161
+ * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
59
- BDRVTestState *s;
162
+ * drained child. If the old child is drained, it must flush I/O
60
+ BlockDriverState *bs, *backing;
163
+ * requests after the new one has been attached. If the new child
61
+ BDRVTestState *s, *backing_s;
164
+ * is drained, it must flush I/O requests before the old one is
62
BlockAIOCB *acb;
165
+ * detached.
63
int aio_ret;
166
+ *
64
167
+ * To do so, we create one parent node and two child nodes; then
65
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
168
+ * attach one of the children (old_child_bs) to the parent, then
66
s = bs->opaque;
169
+ * drain both old_child_bs and new_child_bs according to
67
blk_insert_bs(blk, bs, &error_abort);
170
+ * old_drain_count and new_drain_count, respectively, and finally
68
171
+ * we invoke bdrv_replace_node() to replace old_child_bs by
69
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
172
+ * new_child_bs.
70
+ backing_s = backing->opaque;
173
+ *
71
+ bdrv_set_backing_hd(bs, backing, &error_abort);
174
+ * The test block driver we use here (bdrv_replace_test) has a read
72
+
175
+ * function that:
73
/* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
176
+ * - For the parent node, can optionally yield, and then forwards the
74
g_assert_cmpint(s->drain_count, ==, 0);
177
+ * read to bdrv_preadv(),
75
- bdrv_drain_all_begin();
178
+ * - For the child node, just returns immediately.
76
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
179
+ *
77
+
180
+ * If the read yields, the drain_begin function will wake it up.
78
+ do_drain_begin(drain_type, bs);
181
+ *
79
+
182
+ * The drain_end function issues a read on the parent once it is fully
80
g_assert_cmpint(s->drain_count, ==, 1);
183
+ * undrained (which simulates requests starting to come in again).
81
- bdrv_drain_all_end();
184
+ */
82
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
185
+static void do_test_replace_child_mid_drain(int old_drain_count,
83
+
186
+ int new_drain_count)
84
+ do_drain_end(drain_type, bs);
85
+
86
g_assert_cmpint(s->drain_count, ==, 0);
87
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
88
89
/* Now do the same while a request is pending */
90
aio_ret = -EINPROGRESS;
91
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
92
g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
93
94
g_assert_cmpint(s->drain_count, ==, 0);
95
- bdrv_drain_all_begin();
96
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
97
+
98
+ do_drain_begin(drain_type, bs);
99
+
100
g_assert_cmpint(aio_ret, ==, 0);
101
g_assert_cmpint(s->drain_count, ==, 1);
102
- bdrv_drain_all_end();
103
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
104
+
105
+ do_drain_end(drain_type, bs);
106
+
107
g_assert_cmpint(s->drain_count, ==, 0);
108
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
109
110
+ bdrv_unref(backing);
111
bdrv_unref(bs);
112
blk_unref(blk);
113
}
114
115
+static void test_drv_cb_drain_all(void)
187
+{
116
+{
188
+ BlockBackend *parent_blk;
117
+ test_drv_cb_common(BDRV_DRAIN_ALL, true);
189
+ BlockDriverState *parent_bs;
190
+ BlockDriverState *old_child_bs, *new_child_bs;
191
+ BDRVReplaceTestState *parent_s;
192
+ BDRVReplaceTestState *old_child_s, *new_child_s;
193
+ Coroutine *io_co;
194
+ int i;
195
+
196
+ parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
197
+ &error_abort);
198
+ parent_s = parent_bs->opaque;
199
+
200
+ parent_blk = blk_new(qemu_get_aio_context(),
201
+ BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
202
+ blk_insert_bs(parent_blk, parent_bs, &error_abort);
203
+
204
+ old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
205
+ &error_abort);
206
+ new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
207
+ &error_abort);
208
+ old_child_s = old_child_bs->opaque;
209
+ new_child_s = new_child_bs->opaque;
210
+
211
+ /* So that we can read something */
212
+ parent_bs->total_sectors = 1;
213
+ old_child_bs->total_sectors = 1;
214
+ new_child_bs->total_sectors = 1;
215
+
216
+ bdrv_ref(old_child_bs);
217
+ parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child",
218
+ &child_backing, &error_abort);
219
+
220
+ for (i = 0; i < old_drain_count; i++) {
221
+ bdrv_drained_begin(old_child_bs);
222
+ }
223
+ for (i = 0; i < new_drain_count; i++) {
224
+ bdrv_drained_begin(new_child_bs);
225
+ }
226
+
227
+ if (!old_drain_count) {
228
+ /*
229
+ * Start a read operation that will yield, so it will not
230
+ * complete before the node is drained.
231
+ */
232
+ parent_s->yield_before_read = true;
233
+ io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
234
+ parent_blk);
235
+ qemu_coroutine_enter(io_co);
236
+ }
237
+
238
+ /* If we have started a read operation, it should have yielded */
239
+ g_assert(!parent_s->has_read);
240
+
241
+ /* Reset drained status so we can see what bdrv_replace_node() does */
242
+ parent_s->was_drained = false;
243
+ parent_s->was_undrained = false;
244
+
245
+ g_assert(parent_bs->quiesce_counter == old_drain_count);
246
+ bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
247
+ g_assert(parent_bs->quiesce_counter == new_drain_count);
248
+
249
+ if (!old_drain_count && !new_drain_count) {
250
+ /*
251
+ * From undrained to undrained drains and undrains the parent,
252
+ * because bdrv_replace_node() contains a drained section for
253
+ * @old_child_bs.
254
+ */
255
+ g_assert(parent_s->was_drained && parent_s->was_undrained);
256
+ } else if (!old_drain_count && new_drain_count) {
257
+ /*
258
+ * From undrained to drained should drain the parent and keep
259
+ * it that way.
260
+ */
261
+ g_assert(parent_s->was_drained && !parent_s->was_undrained);
262
+ } else if (old_drain_count && !new_drain_count) {
263
+ /*
264
+ * From drained to undrained should undrain the parent and
265
+ * keep it that way.
266
+ */
267
+ g_assert(!parent_s->was_drained && parent_s->was_undrained);
268
+ } else /* if (old_drain_count && new_drain_count) */ {
269
+ /*
270
+ * From drained to drained must not undrain the parent at any
271
+ * point
272
+ */
273
+ g_assert(!parent_s->was_drained && !parent_s->was_undrained);
274
+ }
275
+
276
+ if (!old_drain_count || !new_drain_count) {
277
+ /*
278
+ * If !old_drain_count, we have started a read request before
279
+ * bdrv_replace_node(). If !new_drain_count, the parent must
280
+ * have been undrained at some point, and
281
+ * bdrv_replace_test_co_drain_end() starts a read request
282
+ * then.
283
+ */
284
+ g_assert(parent_s->has_read);
285
+ } else {
286
+ /*
287
+ * If the parent was never undrained, there is no way to start
288
+ * a read request.
289
+ */
290
+ g_assert(!parent_s->has_read);
291
+ }
292
+
293
+ /* A drained child must have not received any request */
294
+ g_assert(!(old_drain_count && old_child_s->has_read));
295
+ g_assert(!(new_drain_count && new_child_s->has_read));
296
+
297
+ for (i = 0; i < new_drain_count; i++) {
298
+ bdrv_drained_end(new_child_bs);
299
+ }
300
+ for (i = 0; i < old_drain_count; i++) {
301
+ bdrv_drained_end(old_child_bs);
302
+ }
303
+
304
+ /*
305
+ * By now, bdrv_replace_test_co_drain_end() must have been called
306
+ * at some point while the new child was attached to the parent.
307
+ */
308
+ g_assert(parent_s->has_read);
309
+ g_assert(new_child_s->has_read);
310
+
311
+ blk_unref(parent_blk);
312
+ bdrv_unref(parent_bs);
313
+ bdrv_unref(old_child_bs);
314
+ bdrv_unref(new_child_bs);
315
+}
118
+}
316
+
119
+
317
+static void test_replace_child_mid_drain(void)
120
+static void test_drv_cb_drain(void)
318
+{
121
+{
319
+ int old_drain_count, new_drain_count;
122
+ test_drv_cb_common(BDRV_DRAIN, false);
320
+
321
+ for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
322
+ for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
323
+ do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
324
+ }
325
+ }
326
+}
123
+}
327
+
124
+
328
int main(int argc, char **argv)
125
int main(int argc, char **argv)
329
{
126
{
330
int ret;
127
bdrv_init();
331
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
128
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
332
g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
129
g_test_init(&argc, &argv, NULL);
333
test_drop_intermediate_poll);
130
334
131
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
335
+ g_test_add_func("/bdrv-drain/replace_child/mid-drain",
132
+ g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
336
+ test_replace_child_mid_drain);
133
337
+
134
return g_test_run();
338
ret = g_test_run();
135
}
339
qemu_event_destroy(&done_event);
340
return ret;
341
--
136
--
342
2.20.1
137
2.13.6
343
138
344
139
diff view generated by jsdifflib
New patch
1
This is currently only working correctly for bdrv_drain(), not for
2
bdrv_drain_all(). Leave a comment for the drain_all case, we'll address
3
it later.
1
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 45 insertions(+)
9
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tests/test-bdrv-drain.c
13
+++ b/tests/test-bdrv-drain.c
14
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
15
test_drv_cb_common(BDRV_DRAIN, false);
16
}
17
18
+static void test_quiesce_common(enum drain_type drain_type, bool recursive)
19
+{
20
+ BlockBackend *blk;
21
+ BlockDriverState *bs, *backing;
22
+
23
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
24
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
25
+ &error_abort);
26
+ blk_insert_bs(blk, bs, &error_abort);
27
+
28
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
29
+ bdrv_set_backing_hd(bs, backing, &error_abort);
30
+
31
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
32
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
33
+
34
+ do_drain_begin(drain_type, bs);
35
+
36
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
37
+ g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
38
+
39
+ do_drain_end(drain_type, bs);
40
+
41
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
43
+
44
+ bdrv_unref(backing);
45
+ bdrv_unref(bs);
46
+ blk_unref(blk);
47
+}
48
+
49
+static void test_quiesce_drain_all(void)
50
+{
51
+ // XXX drain_all doesn't quiesce
52
+ //test_quiesce_common(BDRV_DRAIN_ALL, true);
53
+}
54
+
55
+static void test_quiesce_drain(void)
56
+{
57
+ test_quiesce_common(BDRV_DRAIN, false);
58
+}
59
+
60
int main(int argc, char **argv)
61
{
62
bdrv_init();
63
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
64
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
65
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
66
67
+ g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
68
+ g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
69
+
70
return g_test_run();
71
}
72
--
73
2.13.6
74
75
diff view generated by jsdifflib
New patch
1
Block jobs already paused themselves when their main BlockBackend
2
entered a drained section. This is not good enough: We also want to
3
pause a block job and may not submit new requests if, for example, the
4
mirror target node should be drained.
1
5
6
This implements .drained_begin/end callbacks in child_job in order to
7
consider all block nodes related to the job, and removes the
8
BlockBackend callbacks which are unnecessary now because the root of the
9
job main BlockBackend is always referenced with a child_job, too.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
blockjob.c | 22 +++++++++-------------
14
1 file changed, 9 insertions(+), 13 deletions(-)
15
16
diff --git a/blockjob.c b/blockjob.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/blockjob.c
19
+++ b/blockjob.c
20
@@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c)
21
job->id);
22
}
23
24
-static const BdrvChildRole child_job = {
25
- .get_parent_desc = child_job_get_parent_desc,
26
- .stay_at_node = true,
27
-};
28
-
29
-static void block_job_drained_begin(void *opaque)
30
+static void child_job_drained_begin(BdrvChild *c)
31
{
32
- BlockJob *job = opaque;
33
+ BlockJob *job = c->opaque;
34
block_job_pause(job);
35
}
36
37
-static void block_job_drained_end(void *opaque)
38
+static void child_job_drained_end(BdrvChild *c)
39
{
40
- BlockJob *job = opaque;
41
+ BlockJob *job = c->opaque;
42
block_job_resume(job);
43
}
44
45
-static const BlockDevOps block_job_dev_ops = {
46
- .drained_begin = block_job_drained_begin,
47
- .drained_end = block_job_drained_end,
48
+static const BdrvChildRole child_job = {
49
+ .get_parent_desc = child_job_get_parent_desc,
50
+ .drained_begin = child_job_drained_begin,
51
+ .drained_end = child_job_drained_end,
52
+ .stay_at_node = true,
53
};
54
55
void block_job_remove_all_bdrv(BlockJob *job)
56
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
57
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
58
bs->job = job;
59
60
- blk_set_dev_ops(blk, &block_job_dev_ops, job);
61
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
62
63
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
64
--
65
2.13.6
66
67
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
Block jobs must be paused if any of the involved nodes are drained.
2
2
3
Signed-off-by: Max Reitz <mreitz@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
---
4
---
6
tests/test-bdrv-drain.c | 167 ++++++++++++++++++++++++++++++++++++++++
5
tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 167 insertions(+)
6
1 file changed, 121 insertions(+)
8
7
9
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
10
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
11
--- a/tests/test-bdrv-drain.c
10
--- a/tests/test-bdrv-drain.c
12
+++ b/tests/test-bdrv-drain.c
11
+++ b/tests/test-bdrv-drain.c
13
@@ -XXX,XX +XXX,XX @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c,
12
@@ -XXX,XX +XXX,XX @@
14
nperm, nshared);
13
14
#include "qemu/osdep.h"
15
#include "block/block.h"
16
+#include "block/blockjob_int.h"
17
#include "sysemu/block-backend.h"
18
#include "qapi/error.h"
19
20
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
21
test_quiesce_common(BDRV_DRAIN, false);
15
}
22
}
16
23
17
+static int bdrv_test_change_backing_file(BlockDriverState *bs,
24
+
18
+ const char *backing_file,
25
+typedef struct TestBlockJob {
19
+ const char *backing_fmt)
26
+ BlockJob common;
27
+ bool should_complete;
28
+} TestBlockJob;
29
+
30
+static void test_job_completed(BlockJob *job, void *opaque)
20
+{
31
+{
21
+ return 0;
32
+ block_job_completed(job, 0);
22
+}
33
+}
23
+
34
+
24
static BlockDriver bdrv_test = {
35
+static void coroutine_fn test_job_start(void *opaque)
25
.format_name = "test",
26
.instance_size = sizeof(BDRVTestState),
27
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
28
.bdrv_co_drain_end = bdrv_test_co_drain_end,
29
30
.bdrv_child_perm = bdrv_test_child_perm,
31
+
32
+ .bdrv_change_backing_file = bdrv_test_change_backing_file,
33
};
34
35
static void aio_ret_cb(void *opaque, int ret)
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_commit_by_drained_end(void)
37
bdrv_unref(bs_child);
38
}
39
40
+
41
+typedef struct TestSimpleBlockJob {
42
+ BlockJob common;
43
+ bool should_complete;
44
+ bool *did_complete;
45
+} TestSimpleBlockJob;
46
+
47
+static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
48
+{
36
+{
49
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
37
+ TestBlockJob *s = opaque;
50
+
38
+
51
+ while (!s->should_complete) {
39
+ while (!s->should_complete) {
52
+ job_sleep_ns(job, 0);
40
+ block_job_sleep_ns(&s->common, 100000);
53
+ }
41
+ }
54
+
42
+
55
+ return 0;
43
+ block_job_defer_to_main_loop(&s->common, test_job_completed, NULL);
56
+}
44
+}
57
+
45
+
58
+static void test_simple_job_clean(Job *job)
46
+static void test_job_complete(BlockJob *job, Error **errp)
59
+{
47
+{
60
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
48
+ TestBlockJob *s = container_of(job, TestBlockJob, common);
61
+ *s->did_complete = true;
49
+ s->should_complete = true;
62
+}
50
+}
63
+
51
+
64
+static const BlockJobDriver test_simple_job_driver = {
52
+BlockJobDriver test_job_driver = {
65
+ .job_driver = {
53
+ .instance_size = sizeof(TestBlockJob),
66
+ .instance_size = sizeof(TestSimpleBlockJob),
54
+ .start = test_job_start,
67
+ .free = block_job_free,
55
+ .complete = test_job_complete,
68
+ .user_resume = block_job_user_resume,
69
+ .drain = block_job_drain,
70
+ .run = test_simple_job_run,
71
+ .clean = test_simple_job_clean,
72
+ },
73
+};
56
+};
74
+
57
+
75
+static int drop_intermediate_poll_update_filename(BdrvChild *child,
58
+static void test_blockjob_common(enum drain_type drain_type)
76
+ BlockDriverState *new_base,
77
+ const char *filename,
78
+ Error **errp)
79
+{
59
+{
80
+ /*
60
+ BlockBackend *blk_src, *blk_target;
81
+ * We are free to poll here, which may change the block graph, if
61
+ BlockDriverState *src, *target;
82
+ * it is not drained.
62
+ BlockJob *job;
83
+ */
63
+ int ret;
84
+
64
+
85
+ /* If the job is not drained: Complete it, schedule job_exit() */
65
+ src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
86
+ aio_poll(qemu_get_current_aio_context(), false);
66
+ &error_abort);
87
+ /* If the job is not drained: Run job_exit(), finish the job */
67
+ blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
88
+ aio_poll(qemu_get_current_aio_context(), false);
68
+ blk_insert_bs(blk_src, src, &error_abort);
89
+
69
+
90
+ return 0;
70
+ target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
71
+ &error_abort);
72
+ blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
73
+ blk_insert_bs(blk_target, target, &error_abort);
74
+
75
+ job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0,
76
+ 0, NULL, NULL, &error_abort);
77
+ block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
78
+ block_job_start(job);
79
+
80
+ g_assert_cmpint(job->pause_count, ==, 0);
81
+ g_assert_false(job->paused);
82
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
83
+
84
+ do_drain_begin(drain_type, src);
85
+
86
+ if (drain_type == BDRV_DRAIN_ALL) {
87
+ /* bdrv_drain_all() drains both src and target, and involves an
88
+ * additional block_job_pause_all() */
89
+ g_assert_cmpint(job->pause_count, ==, 3);
90
+ } else {
91
+ g_assert_cmpint(job->pause_count, ==, 1);
92
+ }
93
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
94
+ /* g_assert_true(job->paused); */
95
+ g_assert_false(job->busy); /* The job is paused */
96
+
97
+ do_drain_end(drain_type, src);
98
+
99
+ g_assert_cmpint(job->pause_count, ==, 0);
100
+ g_assert_false(job->paused);
101
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
102
+
103
+ do_drain_begin(drain_type, target);
104
+
105
+ if (drain_type == BDRV_DRAIN_ALL) {
106
+ /* bdrv_drain_all() drains both src and target, and involves an
107
+ * additional block_job_pause_all() */
108
+ g_assert_cmpint(job->pause_count, ==, 3);
109
+ } else {
110
+ g_assert_cmpint(job->pause_count, ==, 1);
111
+ }
112
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
113
+ /* g_assert_true(job->paused); */
114
+ g_assert_false(job->busy); /* The job is paused */
115
+
116
+ do_drain_end(drain_type, target);
117
+
118
+ g_assert_cmpint(job->pause_count, ==, 0);
119
+ g_assert_false(job->paused);
120
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
121
+
122
+ ret = block_job_complete_sync(job, &error_abort);
123
+ g_assert_cmpint(ret, ==, 0);
124
+
125
+ blk_unref(blk_src);
126
+ blk_unref(blk_target);
127
+ bdrv_unref(src);
128
+ bdrv_unref(target);
91
+}
129
+}
92
+
130
+
93
+/**
131
+static void test_blockjob_drain_all(void)
94
+ * Test a poll in the midst of bdrv_drop_intermediate().
95
+ *
96
+ * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(),
97
+ * which can yield or poll. This may lead to graph changes, unless
98
+ * the whole subtree in question is drained.
99
+ *
100
+ * We test this on the following graph:
101
+ *
102
+ * Job
103
+ *
104
+ * |
105
+ * job-node
106
+ * |
107
+ * v
108
+ *
109
+ * job-node
110
+ *
111
+ * |
112
+ * backing
113
+ * |
114
+ * v
115
+ *
116
+ * node-2 --chain--> node-1 --chain--> node-0
117
+ *
118
+ * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
119
+ *
120
+ * This first updates node-2's backing filename by invoking
121
+ * drop_intermediate_poll_update_filename(), which polls twice. This
122
+ * causes the job to finish, which in turns causes the job-node to be
123
+ * deleted.
124
+ *
125
+ * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
126
+ * already has a pointer to the BdrvChild edge between job-node and
127
+ * node-1. When it tries to handle that edge, we probably get a
128
+ * segmentation fault because the object no longer exists.
129
+ *
130
+ *
131
+ * The solution is for bdrv_drop_intermediate() to drain top's
132
+ * subtree. This prevents graph changes from happening just because
133
+ * BdrvChildRole.update_filename() yields or polls. Thus, the block
134
+ * job is paused during that drained section and must finish before or
135
+ * after.
136
+ *
137
+ * (In addition, bdrv_replace_child() must keep the job paused.)
138
+ */
139
+static void test_drop_intermediate_poll(void)
140
+{
132
+{
141
+ static BdrvChildRole chain_child_role;
133
+ test_blockjob_common(BDRV_DRAIN_ALL);
142
+ BlockDriverState *chain[3];
134
+}
143
+ TestSimpleBlockJob *job;
144
+ BlockDriverState *job_node;
145
+ bool job_has_completed = false;
146
+ int i;
147
+ int ret;
148
+
135
+
149
+ chain_child_role = child_backing;
136
+static void test_blockjob_drain(void)
150
+ chain_child_role.update_filename = drop_intermediate_poll_update_filename;
137
+{
151
+
138
+ test_blockjob_common(BDRV_DRAIN);
152
+ for (i = 0; i < 3; i++) {
153
+ char name[32];
154
+ snprintf(name, 32, "node-%i", i);
155
+
156
+ chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
157
+ }
158
+
159
+ job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
160
+ &error_abort);
161
+ bdrv_set_backing_hd(job_node, chain[1], &error_abort);
162
+
163
+ /*
164
+ * Establish the chain last, so the chain links are the first
165
+ * elements in the BDS.parents lists
166
+ */
167
+ for (i = 0; i < 3; i++) {
168
+ if (i) {
169
+ /* Takes the reference to chain[i - 1] */
170
+ chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1],
171
+ "chain", &chain_child_role,
172
+ &error_abort);
173
+ }
174
+ }
175
+
176
+ job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
177
+ 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
178
+
179
+ /* The job has a reference now */
180
+ bdrv_unref(job_node);
181
+
182
+ job->did_complete = &job_has_completed;
183
+
184
+ job_start(&job->common.job);
185
+ job->should_complete = true;
186
+
187
+ g_assert(!job_has_completed);
188
+ ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
189
+ g_assert(ret == 0);
190
+ g_assert(job_has_completed);
191
+
192
+ bdrv_unref(chain[2]);
193
+}
139
+}
194
+
140
+
195
int main(int argc, char **argv)
141
int main(int argc, char **argv)
196
{
142
{
197
int ret;
143
bdrv_init();
198
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
144
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
199
g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
145
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
200
test_blockjob_commit_by_drained_end);
146
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
201
147
202
+ g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
148
+ g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
203
+ test_drop_intermediate_poll);
149
+ g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
204
+
150
+
205
ret = g_test_run();
151
return g_test_run();
206
qemu_event_destroy(&done_event);
152
}
207
return ret;
208
--
153
--
209
2.20.1
154
2.13.6
210
155
211
156
diff view generated by jsdifflib
New patch
1
Block jobs are already paused using the BdrvChildRole drain callbacks,
2
so we don't need an additional block_job_pause_all() call.
1
3
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
---
6
block/io.c | 4 ----
7
tests/test-bdrv-drain.c | 10 ++++------
8
2 files changed, 4 insertions(+), 10 deletions(-)
9
10
diff --git a/block/io.c b/block/io.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
13
+++ b/block/io.c
14
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
15
* context. */
16
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
17
18
- block_job_pause_all();
19
-
20
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
21
AioContext *aio_context = bdrv_get_aio_context(bs);
22
23
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
24
aio_enable_external(aio_context);
25
aio_context_release(aio_context);
26
}
27
-
28
- block_job_resume_all();
29
}
30
31
void bdrv_drain_all(void)
32
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tests/test-bdrv-drain.c
35
+++ b/tests/test-bdrv-drain.c
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
37
do_drain_begin(drain_type, src);
38
39
if (drain_type == BDRV_DRAIN_ALL) {
40
- /* bdrv_drain_all() drains both src and target, and involves an
41
- * additional block_job_pause_all() */
42
- g_assert_cmpint(job->pause_count, ==, 3);
43
+ /* bdrv_drain_all() drains both src and target */
44
+ g_assert_cmpint(job->pause_count, ==, 2);
45
} else {
46
g_assert_cmpint(job->pause_count, ==, 1);
47
}
48
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
49
do_drain_begin(drain_type, target);
50
51
if (drain_type == BDRV_DRAIN_ALL) {
52
- /* bdrv_drain_all() drains both src and target, and involves an
53
- * additional block_job_pause_all() */
54
- g_assert_cmpint(job->pause_count, ==, 3);
55
+ /* bdrv_drain_all() drains both src and target */
56
+ g_assert_cmpint(job->pause_count, ==, 2);
57
} else {
58
g_assert_cmpint(job->pause_count, ==, 1);
59
}
60
--
61
2.13.6
62
63
diff view generated by jsdifflib
New patch
1
bdrv_do_drained_begin() restricts the call of parent callbacks and
2
aio_disable_external() to the outermost drain section, but the block
3
driver callbacks are always called. bdrv_do_drained_end() must match
4
this behaviour, otherwise nodes stay drained even if begin/end calls
5
were balanced.
1
6
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
block/io.c | 12 +++++++-----
10
1 file changed, 7 insertions(+), 5 deletions(-)
11
12
diff --git a/block/io.c b/block/io.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/io.c
15
+++ b/block/io.c
16
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
17
18
void bdrv_drained_end(BlockDriverState *bs)
19
{
20
+ int old_quiesce_counter;
21
+
22
if (qemu_in_coroutine()) {
23
bdrv_co_yield_to_drain(bs, false);
24
return;
25
}
26
assert(bs->quiesce_counter > 0);
27
- if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
28
- return;
29
- }
30
+ old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
31
32
/* Re-enable things in child-to-parent order */
33
bdrv_drain_invoke(bs, false, false);
34
- bdrv_parent_drained_end(bs);
35
- aio_enable_external(bdrv_get_aio_context(bs));
36
+ if (old_quiesce_counter == 1) {
37
+ bdrv_parent_drained_end(bs);
38
+ aio_enable_external(bdrv_get_aio_context(bs));
39
+ }
40
}
41
42
/*
43
--
44
2.13.6
45
46
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
2
3
We already have 030 for that in general, but this tests very specific
4
cases of both jobs finishing concurrently.
5
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
2
---
9
tests/qemu-iotests/258 | 163 +++++++++++++++++++++++++++++++++++++
3
tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
10
tests/qemu-iotests/258.out | 33 ++++++++
4
1 file changed, 57 insertions(+)
11
tests/qemu-iotests/group | 1 +
12
3 files changed, 197 insertions(+)
13
create mode 100755 tests/qemu-iotests/258
14
create mode 100644 tests/qemu-iotests/258.out
15
5
16
diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
17
new file mode 100755
7
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX
8
--- a/tests/test-bdrv-drain.c
19
--- /dev/null
9
+++ b/tests/test-bdrv-drain.c
20
+++ b/tests/qemu-iotests/258
10
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
21
@@ -XXX,XX +XXX,XX @@
11
enum drain_type {
22
+#!/usr/bin/env python
12
BDRV_DRAIN_ALL,
23
+#
13
BDRV_DRAIN,
24
+# Very specific tests for adjacent commit/stream block jobs
14
+ DRAIN_TYPE_MAX,
25
+#
15
};
26
+# Copyright (C) 2019 Red Hat, Inc.
16
27
+#
17
static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
28
+# This program is free software; you can redistribute it and/or modify
18
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
29
+# it under the terms of the GNU General Public License as published by
19
test_quiesce_common(BDRV_DRAIN, false);
30
+# the Free Software Foundation; either version 2 of the License, or
20
}
31
+# (at your option) any later version.
21
32
+#
22
+static void test_nested(void)
33
+# This program is distributed in the hope that it will be useful,
23
+{
34
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
24
+ BlockBackend *blk;
35
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25
+ BlockDriverState *bs, *backing;
36
+# GNU General Public License for more details.
26
+ BDRVTestState *s, *backing_s;
37
+#
27
+ enum drain_type outer, inner;
38
+# You should have received a copy of the GNU General Public License
39
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
40
+#
41
+# Creator/Owner: Max Reitz <mreitz@redhat.com>
42
+
28
+
43
+import iotests
29
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
44
+from iotests import log, qemu_img, qemu_io_silent, \
30
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
45
+ filter_qmp_testfiles, filter_qmp_imgfmt
31
+ &error_abort);
32
+ s = bs->opaque;
33
+ blk_insert_bs(blk, bs, &error_abort);
46
+
34
+
47
+# Need backing file and change-backing-file support
35
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
48
+iotests.verify_image_format(supported_fmts=['qcow2', 'qed'])
36
+ backing_s = backing->opaque;
49
+iotests.verify_platform(['linux'])
37
+ bdrv_set_backing_hd(bs, backing, &error_abort);
50
+
38
+
39
+ for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
40
+ for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
41
+ /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
42
+ int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
43
+ (inner != BDRV_DRAIN_ALL);
44
+ int backing_quiesce = 0;
45
+ int backing_cb_cnt = (outer != BDRV_DRAIN) +
46
+ (inner != BDRV_DRAIN);
51
+
47
+
52
+# Returns a node for blockdev-add
48
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
53
+def node(node_name, path, backing=None, fmt=None, throttle=None):
49
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
54
+ if fmt is None:
50
+ g_assert_cmpint(s->drain_count, ==, 0);
55
+ fmt = iotests.imgfmt
51
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
56
+
52
+
57
+ res = {
53
+ do_drain_begin(outer, bs);
58
+ 'node-name': node_name,
54
+ do_drain_begin(inner, bs);
59
+ 'driver': fmt,
55
+
60
+ 'file': {
56
+ g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
61
+ 'driver': 'file',
57
+ g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
62
+ 'filename': path
58
+ g_assert_cmpint(s->drain_count, ==, 2);
59
+ g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
60
+
61
+ do_drain_end(inner, bs);
62
+ do_drain_end(outer, bs);
63
+
64
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
65
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
66
+ g_assert_cmpint(s->drain_count, ==, 0);
67
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
63
+ }
68
+ }
64
+ }
69
+ }
65
+
70
+
66
+ if backing is not None:
71
+ bdrv_unref(backing);
67
+ res['backing'] = backing
72
+ bdrv_unref(bs);
73
+ blk_unref(blk);
74
+}
68
+
75
+
69
+ if throttle:
76
70
+ res['file'] = {
77
typedef struct TestBlockJob {
71
+ 'driver': 'throttle',
78
BlockJob common;
72
+ 'throttle-group': throttle,
79
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
73
+ 'file': res['file']
80
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
74
+ }
81
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
82
83
+ g_test_add_func("/bdrv-drain/nested", test_nested);
75
+
84
+
76
+ return res
85
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
77
+
86
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
78
+# Finds a node in the debug block graph
87
79
+def find_graph_node(graph, node_id):
80
+ return next(node for node in graph['nodes'] if node['id'] == node_id)
81
+
82
+
83
+def test_concurrent_finish(write_to_stream_node):
84
+ log('')
85
+ log('=== Commit and stream finish concurrently (letting %s write) ===' % \
86
+ ('stream' if write_to_stream_node else 'commit'))
87
+ log('')
88
+
89
+ # All chosen in such a way that when the commit job wants to
90
+ # finish, it polls and thus makes stream finish concurrently --
91
+ # and the other way around, depending on whether the commit job
92
+ # is finalized before stream completes or not.
93
+
94
+ with iotests.FilePath('node4.img') as node4_path, \
95
+ iotests.FilePath('node3.img') as node3_path, \
96
+ iotests.FilePath('node2.img') as node2_path, \
97
+ iotests.FilePath('node1.img') as node1_path, \
98
+ iotests.FilePath('node0.img') as node0_path, \
99
+ iotests.VM() as vm:
100
+
101
+ # It is important to use raw for the base layer (so that
102
+ # permissions are just handed through to the protocol layer)
103
+ assert qemu_img('create', '-f', 'raw', node0_path, '64M') == 0
104
+
105
+ stream_throttle=None
106
+ commit_throttle=None
107
+
108
+ for path in [node1_path, node2_path, node3_path, node4_path]:
109
+ assert qemu_img('create', '-f', iotests.imgfmt, path, '64M') == 0
110
+
111
+ if write_to_stream_node:
112
+ # This is what (most of the time) makes commit finish
113
+ # earlier and then pull in stream
114
+ assert qemu_io_silent(node2_path,
115
+ '-c', 'write %iK 64K' % (65536 - 192),
116
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
117
+
118
+ stream_throttle='tg'
119
+ else:
120
+ # And this makes stream finish earlier
121
+ assert qemu_io_silent(node1_path,
122
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
123
+
124
+ commit_throttle='tg'
125
+
126
+ vm.launch()
127
+
128
+ vm.qmp_log('object-add',
129
+ qom_type='throttle-group',
130
+ id='tg',
131
+ props={
132
+ 'x-iops-write': 1,
133
+ 'x-iops-write-max': 1
134
+ })
135
+
136
+ vm.qmp_log('blockdev-add',
137
+ filters=[filter_qmp_testfiles, filter_qmp_imgfmt],
138
+ **node('node4', node4_path, throttle=stream_throttle,
139
+ backing=node('node3', node3_path,
140
+ backing=node('node2', node2_path,
141
+ backing=node('node1', node1_path,
142
+ backing=node('node0', node0_path, throttle=commit_throttle,
143
+ fmt='raw'))))))
144
+
145
+ vm.qmp_log('block-commit',
146
+ job_id='commit',
147
+ device='node4',
148
+ filter_node_name='commit-filter',
149
+ top_node='node1',
150
+ base_node='node0',
151
+ auto_finalize=False)
152
+
153
+ vm.qmp_log('block-stream',
154
+ job_id='stream',
155
+ device='node3',
156
+ base_node='commit-filter')
157
+
158
+ if write_to_stream_node:
159
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
160
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
161
+ else:
162
+ # No, the jobs do not really finish concurrently here,
163
+ # the stream job does complete strictly before commit.
164
+ # But still, this is close enough for what we want to
165
+ # test.
166
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
167
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
168
+
169
+ # Assert that the backing node of node3 is node 0 now
170
+ graph = vm.qmp('x-debug-query-block-graph')['return']
171
+ for edge in graph['edges']:
172
+ if edge['name'] == 'backing' and \
173
+ find_graph_node(graph, edge['parent'])['name'] == 'node3':
174
+ assert find_graph_node(graph, edge['child'])['name'] == 'node0'
175
+ break
176
+
177
+
178
+def main():
179
+ log('Running tests:')
180
+ test_concurrent_finish(True)
181
+ test_concurrent_finish(False)
182
+
183
+if __name__ == '__main__':
184
+ main()
185
diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out
186
new file mode 100644
187
index XXXXXXX..XXXXXXX
188
--- /dev/null
189
+++ b/tests/qemu-iotests/258.out
190
@@ -XXX,XX +XXX,XX @@
191
+Running tests:
192
+
193
+=== Commit and stream finish concurrently (letting stream write) ===
194
+
195
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
196
+{"return": {}}
197
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}}
198
+{"return": {}}
199
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
200
+{"return": {}}
201
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
202
+{"return": {}}
203
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
204
+{"return": {}}
205
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
206
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
207
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
208
+
209
+=== Commit and stream finish concurrently (letting commit write) ===
210
+
211
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
212
+{"return": {}}
213
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}}
214
+{"return": {}}
215
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
216
+{"return": {}}
217
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
218
+{"return": {}}
219
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
220
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
221
+{"return": {}}
222
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
223
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
224
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
225
index XXXXXXX..XXXXXXX 100644
226
--- a/tests/qemu-iotests/group
227
+++ b/tests/qemu-iotests/group
228
@@ -XXX,XX +XXX,XX @@
229
254 rw backing quick
230
255 rw quick
231
256 rw quick
232
+258 rw quick
233
262 rw quick migration
234
--
88
--
235
2.20.1
89
2.13.6
236
90
237
91
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
This is in preparation for subtree drains, i.e. drained sections that
2
2
affect not only a single node, but recursively all child nodes, too.
3
Currently, bdrv_replace_child_noperm() undrains the parent until it is
3
4
completely undrained, then re-drains it after attaching the new child
4
Calling the parent callbacks for drain is pointless when we just came
5
node.
5
from that parent node recursively and leads to multiple increases of
6
6
bs->quiesce_counter in a single drain call. Don't do it.
7
This is a problem with bdrv_drop_intermediate(): We want to keep the
7
8
whole subtree drained, including parents, while the operation is
8
In order for this to work correctly, the parent callback must be called
9
under way. bdrv_replace_child_noperm() breaks this by allowing every
9
for every bdrv_drain_begin/end() call, not only for the outermost one:
10
parent to become unquiesced briefly, and then redraining it.
10
11
11
If we have a node N with two parents A and B, recursive draining of A
12
In fact, there is no reason why the parent should become unquiesced and
12
should cause the quiesce_counter of B to increase because its child N is
13
be allowed to submit requests to the new child node if that new node is
13
drained independently of B. If now B is recursively drained, too, A must
14
supposed to be kept drained. So if anything, we have to drain the
14
increase its quiesce_counter because N is drained independently of A
15
parent before detaching the old child node. Conversely, we have to
15
only now, even if N is going from quiesce_counter 1 to 2.
16
undrain it only after attaching the new child node.
16
17
18
Thus, change the whole drain algorithm here: Calculate the number of
19
times we have to drain/undrain the parent before replacing the child
20
node then drain it (if necessary), replace the child node, and then
21
undrain it.
22
23
Signed-off-by: Max Reitz <mreitz@redhat.com>
24
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
25
---
18
---
26
block.c | 49 +++++++++++++++++++++++++++++++++----------------
19
include/block/block.h | 4 ++--
27
1 file changed, 33 insertions(+), 16 deletions(-)
20
block.c | 13 +++++++++----
28
21
block/io.c | 47 ++++++++++++++++++++++++++++++++++-------------
22
3 files changed, 45 insertions(+), 19 deletions(-)
23
24
diff --git a/include/block/block.h b/include/block/block.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/block.h
27
+++ b/include/block/block.h
28
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs);
29
* Begin a quiesced section of all users of @bs. This is part of
30
* bdrv_drained_begin.
31
*/
32
-void bdrv_parent_drained_begin(BlockDriverState *bs);
33
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
34
35
/**
36
* bdrv_parent_drained_end:
37
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs);
38
* End a quiesced section of all users of @bs. This is part of
39
* bdrv_drained_end.
40
*/
41
-void bdrv_parent_drained_end(BlockDriverState *bs);
42
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
43
44
/**
45
* bdrv_drained_begin:
29
diff --git a/block.c b/block.c
46
diff --git a/block.c b/block.c
30
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
31
--- a/block.c
48
--- a/block.c
32
+++ b/block.c
49
+++ b/block.c
33
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
50
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
34
BlockDriverState *new_bs)
51
BlockDriverState *new_bs)
35
{
52
{
36
BlockDriverState *old_bs = child->bs;
53
BlockDriverState *old_bs = child->bs;
37
- int i;
54
+ int i;
38
+ int new_bs_quiesce_counter;
39
+ int drain_saldo;
40
41
assert(!child->frozen);
42
55
43
if (old_bs && new_bs) {
56
if (old_bs && new_bs) {
44
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
57
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
45
}
58
}
46
+
47
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
48
+ drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
49
+
50
+ /*
51
+ * If the new child node is drained but the old one was not, flush
52
+ * all outstanding requests to the old child node.
53
+ */
54
+ while (drain_saldo > 0 && child->role->drained_begin) {
55
+ bdrv_parent_drained_begin_single(child, true);
56
+ drain_saldo--;
57
+ }
58
+
59
if (old_bs) {
59
if (old_bs) {
60
/* Detach first so that the recursive drain sections coming from @child
60
if (old_bs->quiesce_counter && child->role->drained_end) {
61
* are already gone and we only end the drain sections that came from
61
- child->role->drained_end(child);
62
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
62
+ for (i = 0; i < old_bs->quiesce_counter; i++) {
63
+ child->role->drained_end(child);
64
+ }
65
}
63
if (child->role->detach) {
66
if (child->role->detach) {
64
child->role->detach(child);
67
child->role->detach(child);
65
}
68
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
66
- while (child->parent_quiesce_counter) {
67
- bdrv_parent_drained_end_single(child);
68
- }
69
QLIST_REMOVE(child, next_parent);
70
- } else {
71
- assert(child->parent_quiesce_counter == 0);
72
}
73
74
child->bs = new_bs;
75
76
if (new_bs) {
69
if (new_bs) {
77
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
70
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
78
- if (new_bs->quiesce_counter) {
71
if (new_bs->quiesce_counter && child->role->drained_begin) {
79
- int num = new_bs->quiesce_counter;
72
- child->role->drained_begin(child);
80
- if (child->role->parent_is_bds) {
73
+ for (i = 0; i < new_bs->quiesce_counter; i++) {
81
- num -= bdrv_drain_all_count;
74
+ child->role->drained_begin(child);
82
- }
75
+ }
83
- assert(num >= 0);
76
}
84
- for (i = 0; i < num; i++) {
77
85
- bdrv_parent_drained_begin_single(child, true);
78
if (child->role->attach) {
86
- }
79
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
87
- }
80
AioContext *ctx = bdrv_get_aio_context(bs);
81
82
aio_disable_external(ctx);
83
- bdrv_parent_drained_begin(bs);
84
+ bdrv_parent_drained_begin(bs, NULL);
85
bdrv_drain(bs); /* ensure there are no in-flight requests */
86
87
while (aio_poll(ctx, false)) {
88
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
89
*/
90
aio_context_acquire(new_context);
91
bdrv_attach_aio_context(bs, new_context);
92
- bdrv_parent_drained_end(bs);
93
+ bdrv_parent_drained_end(bs, NULL);
94
aio_enable_external(ctx);
95
aio_context_release(new_context);
96
}
97
diff --git a/block/io.c b/block/io.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/io.c
100
+++ b/block/io.c
101
@@ -XXX,XX +XXX,XX @@
102
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
103
int64_t offset, int bytes, BdrvRequestFlags flags);
104
105
-void bdrv_parent_drained_begin(BlockDriverState *bs)
106
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
107
{
108
BdrvChild *c, *next;
109
110
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
111
+ if (c == ignore) {
112
+ continue;
113
+ }
114
if (c->role->drained_begin) {
115
c->role->drained_begin(c);
116
}
117
}
118
}
119
120
-void bdrv_parent_drained_end(BlockDriverState *bs)
121
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
122
{
123
BdrvChild *c, *next;
124
125
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
126
+ if (c == ignore) {
127
+ continue;
128
+ }
129
if (c->role->drained_end) {
130
c->role->drained_end(c);
131
}
132
@@ -XXX,XX +XXX,XX @@ typedef struct {
133
BlockDriverState *bs;
134
bool done;
135
bool begin;
136
+ BdrvChild *parent;
137
} BdrvCoDrainData;
138
139
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
140
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
141
return waited;
142
}
143
144
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
145
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
88
+
146
+
89
+ /*
147
static void bdrv_co_drain_bh_cb(void *opaque)
90
+ * Detaching the old node may have led to the new node's
148
{
91
+ * quiesce_counter having been decreased. Not a problem, we
149
BdrvCoDrainData *data = opaque;
92
+ * just need to recognize this here and then invoke
150
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
93
+ * drained_end appropriately more often.
151
94
+ */
152
bdrv_dec_in_flight(bs);
95
+ assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
153
if (data->begin) {
96
+ drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
154
- bdrv_drained_begin(bs);
97
155
+ bdrv_do_drained_begin(bs, data->parent);
98
/* Attach only after starting new drained sections, so that recursive
156
} else {
99
* drain sections coming from @child don't get an extra .drained_begin
157
- bdrv_drained_end(bs);
100
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
158
+ bdrv_do_drained_end(bs, data->parent);
101
child->role->attach(child);
159
}
102
}
160
103
}
161
data->done = true;
162
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
163
}
164
165
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
166
- bool begin)
167
+ bool begin, BdrvChild *parent)
168
{
169
BdrvCoDrainData data;
170
171
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
172
.bs = bs,
173
.done = false,
174
.begin = begin,
175
+ .parent = parent,
176
};
177
bdrv_inc_in_flight(bs);
178
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
179
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
180
assert(data.done);
181
}
182
183
-void bdrv_drained_begin(BlockDriverState *bs)
184
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
185
{
186
if (qemu_in_coroutine()) {
187
- bdrv_co_yield_to_drain(bs, true);
188
+ bdrv_co_yield_to_drain(bs, true, parent);
189
return;
190
}
191
192
/* Stop things in parent-to-child order */
193
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
194
aio_disable_external(bdrv_get_aio_context(bs));
195
- bdrv_parent_drained_begin(bs);
196
}
197
198
+ bdrv_parent_drained_begin(bs, parent);
199
bdrv_drain_invoke(bs, true, false);
200
bdrv_drain_recurse(bs);
201
}
202
203
-void bdrv_drained_end(BlockDriverState *bs)
204
+void bdrv_drained_begin(BlockDriverState *bs)
205
+{
206
+ bdrv_do_drained_begin(bs, NULL);
207
+}
104
+
208
+
105
+ /*
209
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
106
+ * If the old child node was drained but the new one is not, allow
210
{
107
+ * requests to come in only after the new node has been attached.
211
int old_quiesce_counter;
108
+ */
212
109
+ while (drain_saldo < 0 && child->role->drained_end) {
213
if (qemu_in_coroutine()) {
110
+ bdrv_parent_drained_end_single(child);
214
- bdrv_co_yield_to_drain(bs, false);
111
+ drain_saldo++;
215
+ bdrv_co_yield_to_drain(bs, false, parent);
112
+ }
216
return;
113
}
217
}
114
218
assert(bs->quiesce_counter > 0);
219
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
220
221
/* Re-enable things in child-to-parent order */
222
bdrv_drain_invoke(bs, false, false);
223
+ bdrv_parent_drained_end(bs, parent);
224
if (old_quiesce_counter == 1) {
225
- bdrv_parent_drained_end(bs);
226
aio_enable_external(bdrv_get_aio_context(bs));
227
}
228
}
229
230
+void bdrv_drained_end(BlockDriverState *bs)
231
+{
232
+ bdrv_do_drained_end(bs, NULL);
233
+}
234
+
115
/*
235
/*
236
* Wait for pending requests to complete on a single BlockDriverState subtree,
237
* and suspend block driver's internal I/O until next request arrives.
238
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
239
/* Stop things in parent-to-child order */
240
aio_context_acquire(aio_context);
241
aio_disable_external(aio_context);
242
- bdrv_parent_drained_begin(bs);
243
+ bdrv_parent_drained_begin(bs, NULL);
244
bdrv_drain_invoke(bs, true, true);
245
aio_context_release(aio_context);
246
247
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
248
/* Re-enable things in child-to-parent order */
249
aio_context_acquire(aio_context);
250
bdrv_drain_invoke(bs, false, true);
251
- bdrv_parent_drained_end(bs);
252
+ bdrv_parent_drained_end(bs, NULL);
253
aio_enable_external(aio_context);
254
aio_context_release(aio_context);
255
}
116
--
256
--
117
2.20.1
257
2.13.6
118
258
119
259
diff view generated by jsdifflib
1
This fixes devices like IDE that can still start new requests from I/O
1
bdrv_drained_begin() waits for the completion of requests in the whole
2
handlers in the CPU thread while the block backend is drained.
2
subtree, but it only actually keeps its immediate bs parameter quiesced
3
until bdrv_drained_end().
3
4
4
The basic assumption is that in a drain section, no new requests should
5
Add a version that keeps the whole subtree drained. As of this commit,
5
be allowed through a BlockBackend (blk_drained_begin/end don't exist,
6
graph changes cannot be allowed during a subtree drained section, but
6
we get drain sections only on the node level). However, there are two
7
this will be fixed soon.
7
special cases where requests should not be queued:
8
9
1. Block jobs: We already make sure that block jobs are paused in a
10
drain section, so they won't start new requests. However, if the
11
drain_begin is called on the job's BlockBackend first, it can happen
12
that we deadlock because the job stays busy until it reaches a pause
13
point - which it can't if its requests aren't processed any more.
14
15
The proper solution here would be to make all requests through the
16
job's filter node instead of using a BlockBackend. For now, just
17
disabling request queuing on the job BlockBackend is simpler.
18
19
2. In test cases where making requests through bdrv_* would be
20
cumbersome because we'd need a BdrvChild. As we already got the
21
functionality to disable request queuing from 1., use it in tests,
22
too, for convenience.
23
8
24
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
25
Reviewed-by: Max Reitz <mreitz@redhat.com>
26
---
10
---
27
include/sysemu/block-backend.h | 1 +
11
include/block/block.h | 13 +++++++++++++
28
block/backup.c | 1 +
12
block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++-----------
29
block/block-backend.c | 53 ++++++++++++++++++++++++++++++++--
13
2 files changed, 56 insertions(+), 11 deletions(-)
30
block/commit.c | 2 ++
31
block/mirror.c | 1 +
32
blockjob.c | 3 ++
33
tests/test-bdrv-drain.c | 1 +
34
7 files changed, 59 insertions(+), 3 deletions(-)
35
14
36
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
15
diff --git a/include/block/block.h b/include/block/block.h
37
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
38
--- a/include/sysemu/block-backend.h
17
--- a/include/block/block.h
39
+++ b/include/sysemu/block-backend.h
18
+++ b/include/block/block.h
40
@@ -XXX,XX +XXX,XX @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
19
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
41
20
void bdrv_drained_begin(BlockDriverState *bs);
42
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
21
43
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
22
/**
44
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
23
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
45
void blk_iostatus_enable(BlockBackend *blk);
24
+ * exclusive access to all child nodes as well.
46
bool blk_iostatus_is_enabled(const BlockBackend *blk);
25
+ *
47
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
26
+ * Graph changes are not allowed during a subtree drain section.
48
diff --git a/block/backup.c b/block/backup.c
27
+ */
28
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
29
+
30
+/**
31
* bdrv_drained_end:
32
*
33
* End a quiescent section started by bdrv_drained_begin().
34
*/
35
void bdrv_drained_end(BlockDriverState *bs);
36
37
+/**
38
+ * End a quiescent section started by bdrv_subtree_drained_begin().
39
+ */
40
+void bdrv_subtree_drained_end(BlockDriverState *bs);
41
+
42
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
43
Error **errp);
44
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
45
diff --git a/block/io.c b/block/io.c
49
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
50
--- a/block/backup.c
47
--- a/block/io.c
51
+++ b/block/backup.c
48
+++ b/block/io.c
52
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
49
@@ -XXX,XX +XXX,XX @@ typedef struct {
53
if (ret < 0) {
50
BlockDriverState *bs;
54
goto error;
51
bool done;
52
bool begin;
53
+ bool recursive;
54
BdrvChild *parent;
55
} BdrvCoDrainData;
56
57
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
58
return waited;
59
}
60
61
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
62
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
63
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
64
+ BdrvChild *parent);
65
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
66
+ BdrvChild *parent);
67
68
static void bdrv_co_drain_bh_cb(void *opaque)
69
{
70
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
71
72
bdrv_dec_in_flight(bs);
73
if (data->begin) {
74
- bdrv_do_drained_begin(bs, data->parent);
75
+ bdrv_do_drained_begin(bs, data->recursive, data->parent);
76
} else {
77
- bdrv_do_drained_end(bs, data->parent);
78
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
55
}
79
}
56
+ blk_set_disable_request_queuing(job->target, true);
80
57
81
data->done = true;
58
job->on_source_error = on_source_error;
82
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
59
job->on_target_error = on_target_error;
83
}
60
diff --git a/block/block-backend.c b/block/block-backend.c
84
61
index XXXXXXX..XXXXXXX 100644
85
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
62
--- a/block/block-backend.c
86
- bool begin, BdrvChild *parent)
63
+++ b/block/block-backend.c
87
+ bool begin, bool recursive,
64
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
88
+ BdrvChild *parent)
65
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
89
{
66
90
BdrvCoDrainData data;
67
int quiesce_counter;
91
68
+ CoQueue queued_requests;
92
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
69
+ bool disable_request_queuing;
93
.bs = bs,
94
.done = false,
95
.begin = begin,
96
+ .recursive = recursive,
97
.parent = parent,
98
};
99
bdrv_inc_in_flight(bs);
100
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
101
assert(data.done);
102
}
103
104
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
105
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
106
+ BdrvChild *parent)
107
{
108
+ BdrvChild *child, *next;
70
+
109
+
71
VMChangeStateEntry *vmsh;
110
if (qemu_in_coroutine()) {
72
bool force_allow_inactivate;
111
- bdrv_co_yield_to_drain(bs, true, parent);
73
112
+ bdrv_co_yield_to_drain(bs, true, recursive, parent);
74
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
113
return;
75
114
}
76
block_acct_init(&blk->stats);
115
77
116
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
78
+ qemu_co_queue_init(&blk->queued_requests);
117
bdrv_parent_drained_begin(bs, parent);
79
notifier_list_init(&blk->remove_bs_notifiers);
118
bdrv_drain_invoke(bs, true, false);
80
notifier_list_init(&blk->insert_bs_notifiers);
119
bdrv_drain_recurse(bs);
81
QLIST_INIT(&blk->aio_notifiers);
120
+
82
@@ -XXX,XX +XXX,XX @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
121
+ if (recursive) {
83
blk->allow_aio_context_change = allow;
122
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
123
+ bdrv_do_drained_begin(child->bs, true, child);
124
+ }
125
+ }
84
}
126
}
85
127
86
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
128
void bdrv_drained_begin(BlockDriverState *bs)
87
+{
129
{
88
+ blk->disable_request_queuing = disable;
130
- bdrv_do_drained_begin(bs, NULL);
131
+ bdrv_do_drained_begin(bs, false, NULL);
89
+}
132
+}
90
+
133
+
91
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
134
+void bdrv_subtree_drained_begin(BlockDriverState *bs)
92
size_t size)
135
+{
136
+ bdrv_do_drained_begin(bs, true, NULL);
137
}
138
139
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
140
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
141
+ BdrvChild *parent)
93
{
142
{
94
@@ -XXX,XX +XXX,XX @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
143
+ BdrvChild *child, *next;
95
return 0;
144
int old_quiesce_counter;
145
146
if (qemu_in_coroutine()) {
147
- bdrv_co_yield_to_drain(bs, false, parent);
148
+ bdrv_co_yield_to_drain(bs, false, recursive, parent);
149
return;
150
}
151
assert(bs->quiesce_counter > 0);
152
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
153
if (old_quiesce_counter == 1) {
154
aio_enable_external(bdrv_get_aio_context(bs));
155
}
156
+
157
+ if (recursive) {
158
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
159
+ bdrv_do_drained_end(child->bs, true, child);
160
+ }
161
+ }
96
}
162
}
97
163
98
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
164
void bdrv_drained_end(BlockDriverState *bs)
99
+{
165
{
100
+ if (blk->quiesce_counter && !blk->disable_request_queuing) {
166
- bdrv_do_drained_end(bs, NULL);
101
+ qemu_co_queue_wait(&blk->queued_requests, NULL);
167
+ bdrv_do_drained_end(bs, false, NULL);
102
+ }
103
+}
168
+}
104
+
169
+
105
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
170
+void bdrv_subtree_drained_end(BlockDriverState *bs)
106
unsigned int bytes, QEMUIOVector *qiov,
171
+{
107
BdrvRequestFlags flags)
172
+ bdrv_do_drained_end(bs, true, NULL);
108
{
109
int ret;
110
- BlockDriverState *bs = blk_bs(blk);
111
+ BlockDriverState *bs;
112
113
+ blk_wait_while_drained(blk);
114
+
115
+ /* Call blk_bs() only after waiting, the graph may have changed */
116
+ bs = blk_bs(blk);
117
trace_blk_co_preadv(blk, bs, offset, bytes, flags);
118
119
ret = blk_check_byte_request(blk, offset, bytes);
120
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
121
BdrvRequestFlags flags)
122
{
123
int ret;
124
- BlockDriverState *bs = blk_bs(blk);
125
+ BlockDriverState *bs;
126
127
+ blk_wait_while_drained(blk);
128
+
129
+ /* Call blk_bs() only after waiting, the graph may have changed */
130
+ bs = blk_bs(blk);
131
trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
132
133
ret = blk_check_byte_request(blk, offset, bytes);
134
@@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque)
135
BlkRwCo *rwco = &acb->rwco;
136
QEMUIOVector *qiov = rwco->iobuf;
137
138
+ if (rwco->blk->quiesce_counter) {
139
+ blk_dec_in_flight(rwco->blk);
140
+ blk_wait_while_drained(rwco->blk);
141
+ blk_inc_in_flight(rwco->blk);
142
+ }
143
+
144
assert(qiov->size == acb->bytes);
145
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
146
qiov, rwco->flags);
147
@@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque)
148
BlkRwCo *rwco = &acb->rwco;
149
QEMUIOVector *qiov = rwco->iobuf;
150
151
+ if (rwco->blk->quiesce_counter) {
152
+ blk_dec_in_flight(rwco->blk);
153
+ blk_wait_while_drained(rwco->blk);
154
+ blk_inc_in_flight(rwco->blk);
155
+ }
156
+
157
assert(!qiov || qiov->size == acb->bytes);
158
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
159
qiov, rwco->flags);
160
@@ -XXX,XX +XXX,XX @@ void blk_aio_cancel_async(BlockAIOCB *acb)
161
162
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
163
{
164
+ blk_wait_while_drained(blk);
165
+
166
if (!blk_is_available(blk)) {
167
return -ENOMEDIUM;
168
}
169
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
170
171
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
172
{
173
- int ret = blk_check_byte_request(blk, offset, bytes);
174
+ int ret;
175
+
176
+ blk_wait_while_drained(blk);
177
+
178
+ ret = blk_check_byte_request(blk, offset, bytes);
179
if (ret < 0) {
180
return ret;
181
}
182
@@ -XXX,XX +XXX,XX @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
183
184
int blk_co_flush(BlockBackend *blk)
185
{
186
+ blk_wait_while_drained(blk);
187
+
188
if (!blk_is_available(blk)) {
189
return -ENOMEDIUM;
190
}
191
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
192
if (blk->dev_ops && blk->dev_ops->drained_end) {
193
blk->dev_ops->drained_end(blk->dev_opaque);
194
}
195
+ while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
196
+ /* Resume all queued requests */
197
+ }
198
}
199
}
173
}
200
174
201
diff --git a/block/commit.c b/block/commit.c
175
/*
202
index XXXXXXX..XXXXXXX 100644
203
--- a/block/commit.c
204
+++ b/block/commit.c
205
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
206
if (ret < 0) {
207
goto fail;
208
}
209
+ blk_set_disable_request_queuing(s->base, true);
210
s->base_bs = base;
211
212
/* Required permissions are already taken with block_job_add_bdrv() */
213
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
214
if (ret < 0) {
215
goto fail;
216
}
217
+ blk_set_disable_request_queuing(s->top, true);
218
219
s->backing_file_str = g_strdup(backing_file_str);
220
s->on_error = on_error;
221
diff --git a/block/mirror.c b/block/mirror.c
222
index XXXXXXX..XXXXXXX 100644
223
--- a/block/mirror.c
224
+++ b/block/mirror.c
225
@@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job(
226
blk_set_force_allow_inactivate(s->target);
227
}
228
blk_set_allow_aio_context_change(s->target, true);
229
+ blk_set_disable_request_queuing(s->target, true);
230
231
s->replaces = g_strdup(replaces);
232
s->on_source_error = on_source_error;
233
diff --git a/blockjob.c b/blockjob.c
234
index XXXXXXX..XXXXXXX 100644
235
--- a/blockjob.c
236
+++ b/blockjob.c
237
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
238
239
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
240
241
+ /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
242
+ * The job reports that it's busy until it reaches a pause point. */
243
+ blk_set_disable_request_queuing(blk, true);
244
blk_set_allow_aio_context_change(blk, true);
245
246
/* Only set speed when necessary to avoid NotSupported error */
247
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
248
index XXXXXXX..XXXXXXX 100644
249
--- a/tests/test-bdrv-drain.c
250
+++ b/tests/test-bdrv-drain.c
251
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
252
&error_abort);
253
s = bs->opaque;
254
blk_insert_bs(blk, bs, &error_abort);
255
+ blk_set_disable_request_queuing(blk, true);
256
257
blk_set_aio_context(blk, ctx_a, &error_abort);
258
aio_context_acquire(ctx_a);
259
--
176
--
260
2.20.1
177
2.13.6
261
178
262
179
diff view generated by jsdifflib
1
234 implements functions that are useful for doing migration between two
1
Add a subtree drain version to the existing test cases.
2
VMs. Move them to iotests.py so that other test cases can use them, too.
3
2
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
---
4
---
7
tests/qemu-iotests/234 | 30 +++++++-----------------------
5
tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++-
8
tests/qemu-iotests/iotests.py | 16 ++++++++++++++++
6
1 file changed, 26 insertions(+), 1 deletion(-)
9
2 files changed, 23 insertions(+), 23 deletions(-)
10
7
11
diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
12
index XXXXXXX..XXXXXXX 100755
13
--- a/tests/qemu-iotests/234
14
+++ b/tests/qemu-iotests/234
15
@@ -XXX,XX +XXX,XX @@ import os
16
iotests.verify_image_format(supported_fmts=['qcow2'])
17
iotests.verify_platform(['linux'])
18
19
-def enable_migration_events(vm, name):
20
- iotests.log('Enabling migration QMP events on %s...' % name)
21
- iotests.log(vm.qmp('migrate-set-capabilities', capabilities=[
22
- {
23
- 'capability': 'events',
24
- 'state': True
25
- }
26
- ]))
27
-
28
-def wait_migration(vm):
29
- while True:
30
- event = vm.event_wait('MIGRATION')
31
- iotests.log(event, filters=[iotests.filter_qmp_event])
32
- if event['data']['status'] == 'completed':
33
- break
34
-
35
with iotests.FilePath('img') as img_path, \
36
iotests.FilePath('backing') as backing_path, \
37
iotests.FilePath('mig_fifo_a') as fifo_a, \
38
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
39
.add_blockdev('%s,file=drive0-backing-file,node-name=drive0-backing' % (iotests.imgfmt))
40
.launch())
41
42
- enable_migration_events(vm_a, 'A')
43
+ vm_a.enable_migration_events('A')
44
45
iotests.log('Launching destination VM...')
46
(vm_b.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
47
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
48
.add_incoming("exec: cat '%s'" % (fifo_a))
49
.launch())
50
51
- enable_migration_events(vm_b, 'B')
52
+ vm_b.enable_migration_events('B')
53
54
# Add a child node that was created after the parent node. The reverse case
55
# is covered by the -blockdev options above.
56
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
57
iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a)))
58
with iotests.Timeout(3, 'Migration does not complete'):
59
# Wait for the source first (which includes setup=setup)
60
- wait_migration(vm_a)
61
+ vm_a.wait_migration()
62
# Wait for the destination second (which does not)
63
- wait_migration(vm_b)
64
+ vm_b.wait_migration()
65
66
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
67
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
68
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
69
.add_incoming("exec: cat '%s'" % (fifo_b))
70
.launch())
71
72
- enable_migration_events(vm_a, 'A')
73
+ vm_a.enable_migration_events('A')
74
75
iotests.log(vm_a.qmp('blockdev-snapshot', node='drive0-backing',
76
overlay='drive0'))
77
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \
78
iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b)))
79
with iotests.Timeout(3, 'Migration does not complete'):
80
# Wait for the source first (which includes setup=setup)
81
- wait_migration(vm_b)
82
+ vm_b.wait_migration()
83
# Wait for the destination second (which does not)
84
- wait_migration(vm_a)
85
+ vm_a.wait_migration()
86
87
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
88
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
89
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
90
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
91
--- a/tests/qemu-iotests/iotests.py
10
--- a/tests/test-bdrv-drain.c
92
+++ b/tests/qemu-iotests/iotests.py
11
+++ b/tests/test-bdrv-drain.c
93
@@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine):
12
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
94
elif status == 'null':
13
enum drain_type {
95
return error
14
BDRV_DRAIN_ALL,
96
15
BDRV_DRAIN,
97
+ def enable_migration_events(self, name):
16
+ BDRV_SUBTREE_DRAIN,
98
+ log('Enabling migration QMP events on %s...' % name)
17
DRAIN_TYPE_MAX,
99
+ log(self.qmp('migrate-set-capabilities', capabilities=[
18
};
100
+ {
19
101
+ 'capability': 'events',
20
@@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
102
+ 'state': True
21
switch (drain_type) {
103
+ }
22
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
104
+ ]))
23
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
24
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
25
default: g_assert_not_reached();
26
}
27
}
28
@@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
29
switch (drain_type) {
30
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
31
case BDRV_DRAIN: bdrv_drained_end(bs); break;
32
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
33
default: g_assert_not_reached();
34
}
35
}
36
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
37
test_drv_cb_common(BDRV_DRAIN, false);
38
}
39
40
+static void test_drv_cb_drain_subtree(void)
41
+{
42
+ test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
43
+}
105
+
44
+
106
+ def wait_migration(self):
45
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
107
+ while True:
46
{
108
+ event = self.event_wait('MIGRATION')
47
BlockBackend *blk;
109
+ log(event, filters=[filter_qmp_event])
48
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
110
+ if event['data']['status'] == 'completed':
49
test_quiesce_common(BDRV_DRAIN, false);
111
+ break
50
}
51
52
+static void test_quiesce_drain_subtree(void)
53
+{
54
+ test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
55
+}
112
+
56
+
113
def node_info(self, node_name):
57
static void test_nested(void)
114
nodes = self.qmp('query-named-block-nodes')
58
{
115
for x in nodes['return']:
59
BlockBackend *blk;
60
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
61
/* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
62
int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
63
(inner != BDRV_DRAIN_ALL);
64
- int backing_quiesce = 0;
65
+ int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
66
+ (inner == BDRV_SUBTREE_DRAIN);
67
int backing_cb_cnt = (outer != BDRV_DRAIN) +
68
(inner != BDRV_DRAIN);
69
70
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void)
71
test_blockjob_common(BDRV_DRAIN);
72
}
73
74
+static void test_blockjob_drain_subtree(void)
75
+{
76
+ test_blockjob_common(BDRV_SUBTREE_DRAIN);
77
+}
78
+
79
int main(int argc, char **argv)
80
{
81
bdrv_init();
82
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
83
84
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
85
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
86
+ g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
87
+ test_drv_cb_drain_subtree);
88
89
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
90
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
91
+ g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
92
+ test_quiesce_drain_subtree);
93
94
g_test_add_func("/bdrv-drain/nested", test_nested);
95
96
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
97
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
98
+ g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
99
+ test_blockjob_drain_subtree);
100
101
return g_test_run();
102
}
116
--
103
--
117
2.20.1
104
2.13.6
118
105
119
106
diff view generated by jsdifflib
1
The code path for -device drive=<node-name> or without a drive=...
1
If bdrv_do_drained_begin/end() are called in coroutine context, they
2
option for empty drives, which is supposed to be used with -blockdev
2
first use a BH to get out of the coroutine context. Call some existing
3
differs enough from the -drive based path with a user-owned
3
tests again from a coroutine to cover this code path.
4
BlockBackend, so we want to test both paths at least for the basic tests
5
implemented by TestInitiallyFilled and TestInitiallyEmpty.
6
7
This would have caught the bug recently fixed for inserting read-only
8
nodes into a scsi-cd created without a drive=... option.
9
4
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
12
---
6
---
13
tests/qemu-iotests/118 | 43 ++++++++++++++++++++++++++------------
7
tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
14
tests/qemu-iotests/118.out | 4 ++--
8
1 file changed, 59 insertions(+)
15
2 files changed, 32 insertions(+), 15 deletions(-)
16
9
17
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
18
index XXXXXXX..XXXXXXX 100755
11
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qemu-iotests/118
12
--- a/tests/test-bdrv-drain.c
20
+++ b/tests/qemu-iotests/118
13
+++ b/tests/test-bdrv-drain.c
21
@@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase):
14
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
22
has_opened = False
15
*aio_ret = ret;
23
has_closed = False
16
}
24
17
25
+ device_name = 'qdev0'
18
+typedef struct CallInCoroutineData {
26
+ use_drive = False
19
+ void (*entry)(void);
20
+ bool done;
21
+} CallInCoroutineData;
27
+
22
+
28
def process_events(self):
23
+static coroutine_fn void call_in_coroutine_entry(void *opaque)
29
for event in self.vm.get_qmp_events(wait=False):
24
+{
30
if (event['event'] == 'DEVICE_TRAY_MOVED' and
25
+ CallInCoroutineData *data = opaque;
31
- event['data']['device'] == 'drive0'):
32
+ (event['data']['device'] == 'drive0' or
33
+ event['data']['id'] == self.device_name)):
34
if event['data']['tray-open'] == False:
35
self.has_closed = True
36
else:
37
@@ -XXX,XX +XXX,XX @@ class ChangeBaseClass(iotests.QMPTestCase):
38
39
class GeneralChangeTestsBaseClass(ChangeBaseClass):
40
41
- device_name = 'qdev0'
42
-
43
def test_change(self):
44
+ # 'change' requires a drive name, so skip the test for blockdev
45
+ if not self.use_drive:
46
+ return
47
+
26
+
48
result = self.vm.qmp('change', device='drive0', target=new_img,
27
+ data->entry();
49
arg=iotests.imgfmt)
28
+ data->done = true;
50
self.assert_qmp(result, 'return', {})
29
+}
51
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
52
qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
53
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
54
self.vm = iotests.VM()
55
- self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
56
+ if self.use_drive:
57
+ self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
58
+ else:
59
+ self.vm.add_blockdev([ 'node-name=drive0',
60
+ 'driver=%s' % iotests.imgfmt,
61
+ 'file.driver=file',
62
+ 'file.filename=%s' % old_img ])
63
if self.interface == 'scsi':
64
self.vm.add_device('virtio-scsi-pci')
65
self.vm.add_device('%s,drive=drive0,id=%s' %
66
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
67
68
def setUp(self):
69
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
70
- self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none')
71
+ self.vm = iotests.VM()
72
+ if self.use_drive:
73
+ self.vm.add_drive(None, 'media=%s' % self.media, 'none')
74
if self.interface == 'scsi':
75
self.vm.add_device('virtio-scsi-pci')
76
- self.vm.add_device('%s,drive=drive0,id=%s' %
77
+ self.vm.add_device('%s,%sid=%s' %
78
(interface_to_device_name(self.interface),
79
+ 'drive=drive0,' if self.use_drive else '',
80
self.device_name))
81
self.vm.launch()
82
83
@@ -XXX,XX +XXX,XX @@ def create_basic_test_classes():
84
('disk', 'floppy', False) ]:
85
86
for case in [ TestInitiallyFilled, TestInitiallyEmpty ]:
87
-
88
- attr = { 'media': media,
89
- 'interface': interface,
90
- 'has_real_tray': has_real_tray }
91
-
92
- name = '%s_%s_%s' % (case.__name__, media, interface)
93
- globals()[name] = type(name, (case, ), attr)
94
+ for use_drive in [ True, False ]:
95
+ attr = { 'media': media,
96
+ 'interface': interface,
97
+ 'has_real_tray': has_real_tray,
98
+ 'use_drive': use_drive }
99
+
30
+
100
+ name = '%s_%s_%s_%s' % (case.__name__, media, interface,
31
+static void call_in_coroutine(void (*entry)(void))
101
+ 'drive' if use_drive else 'blockdev')
32
+{
102
+ globals()[name] = type(name, (case, ), attr)
33
+ Coroutine *co;
103
34
+ CallInCoroutineData data = {
104
create_basic_test_classes()
35
+ .entry = entry,
105
36
+ .done = false,
106
diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out
37
+ };
107
index XXXXXXX..XXXXXXX 100644
38
+
108
--- a/tests/qemu-iotests/118.out
39
+ co = qemu_coroutine_create(call_in_coroutine_entry, &data);
109
+++ b/tests/qemu-iotests/118.out
40
+ qemu_coroutine_enter(co);
110
@@ -XXX,XX +XXX,XX @@
41
+ while (!data.done) {
111
-.........................................................................................
42
+ aio_poll(qemu_get_aio_context(), true);
112
+.......................................................................................................................................................................
43
+ }
113
----------------------------------------------------------------------
44
+}
114
-Ran 89 tests
45
+
115
+Ran 167 tests
46
enum drain_type {
116
47
BDRV_DRAIN_ALL,
117
OK
48
BDRV_DRAIN,
49
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void)
50
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
51
}
52
53
+static void test_drv_cb_co_drain(void)
54
+{
55
+ call_in_coroutine(test_drv_cb_drain);
56
+}
57
+
58
+static void test_drv_cb_co_drain_subtree(void)
59
+{
60
+ call_in_coroutine(test_drv_cb_drain_subtree);
61
+}
62
+
63
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
64
{
65
BlockBackend *blk;
66
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void)
67
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
68
}
69
70
+static void test_quiesce_co_drain(void)
71
+{
72
+ call_in_coroutine(test_quiesce_drain);
73
+}
74
+
75
+static void test_quiesce_co_drain_subtree(void)
76
+{
77
+ call_in_coroutine(test_quiesce_drain_subtree);
78
+}
79
+
80
static void test_nested(void)
81
{
82
BlockBackend *blk;
83
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
84
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
85
test_drv_cb_drain_subtree);
86
87
+ // XXX bdrv_drain_all() doesn't work in coroutine context
88
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
89
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
90
+ test_drv_cb_co_drain_subtree);
91
+
92
+
93
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
94
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
95
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
96
test_quiesce_drain_subtree);
97
98
+ // XXX bdrv_drain_all() doesn't work in coroutine context
99
+ g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
100
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
101
+ test_quiesce_co_drain_subtree);
102
+
103
g_test_add_func("/bdrv-drain/nested", test_nested);
104
105
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
118
--
106
--
119
2.20.1
107
2.13.6
120
108
121
109
diff view generated by jsdifflib
1
We're getting a ridiculous number of child classes of
1
Test that drain sections are correctly propagated through the graph.
2
TestInitiallyFilled and TestInitiallyEmpty that differ only in a few
3
attributes that we want to test in all combinations.
4
5
Instead of explicitly writing down every combination, let's use a loop
6
and create those classes dynamically.
7
2
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
---
4
---
11
tests/qemu-iotests/118 | 69 +++++++++++++-----------------------------
5
tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
12
1 file changed, 21 insertions(+), 48 deletions(-)
6
1 file changed, 74 insertions(+)
13
7
14
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
index XXXXXXX..XXXXXXX 100755
9
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qemu-iotests/118
10
--- a/tests/test-bdrv-drain.c
17
+++ b/tests/qemu-iotests/118
11
+++ b/tests/test-bdrv-drain.c
18
@@ -XXX,XX +XXX,XX @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
12
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
19
class TestInitiallyFilled(GeneralChangeTestsBaseClass):
13
blk_unref(blk);
20
was_empty = False
14
}
21
15
22
- def setUp(self, media, interface):
16
+static void test_multiparent(void)
23
+ def setUp(self):
17
+{
24
qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
18
+ BlockBackend *blk_a, *blk_b;
25
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
19
+ BlockDriverState *bs_a, *bs_b, *backing;
26
self.vm = iotests.VM()
20
+ BDRVTestState *a_s, *b_s, *backing_s;
27
- self.vm.add_drive(old_img, 'media=%s' % media, 'none')
21
+
28
- if interface == 'scsi':
22
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
29
+ self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
23
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
30
+ if self.interface == 'scsi':
24
+ &error_abort);
31
self.vm.add_device('virtio-scsi-pci')
25
+ a_s = bs_a->opaque;
32
self.vm.add_device('%s,drive=drive0,id=%s' %
26
+ blk_insert_bs(blk_a, bs_a, &error_abort);
33
- (interface_to_device_name(interface),
27
+
34
+ (interface_to_device_name(self.interface),
28
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
35
self.device_name))
29
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
36
self.vm.launch()
30
+ &error_abort);
37
31
+ b_s = bs_b->opaque;
38
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
32
+ blk_insert_bs(blk_b, bs_b, &error_abort);
39
class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
33
+
40
was_empty = True
34
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
41
35
+ backing_s = backing->opaque;
42
- def setUp(self, media, interface):
36
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
43
+ def setUp(self):
37
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
44
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
38
+
45
- self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none')
39
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
46
- if interface == 'scsi':
40
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
47
+ self.vm = iotests.VM().add_drive(None, 'media=%s' % self.media, 'none')
41
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
48
+ if self.interface == 'scsi':
42
+ g_assert_cmpint(a_s->drain_count, ==, 0);
49
self.vm.add_device('virtio-scsi-pci')
43
+ g_assert_cmpint(b_s->drain_count, ==, 0);
50
self.vm.add_device('%s,drive=drive0,id=%s' %
44
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
51
- (interface_to_device_name(interface),
45
+
52
+ (interface_to_device_name(self.interface),
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
53
self.device_name))
47
+
54
self.vm.launch()
48
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
55
49
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
56
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
50
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
57
# Should be a no-op
51
+ g_assert_cmpint(a_s->drain_count, ==, 1);
58
self.assert_qmp(result, 'return', {})
52
+ g_assert_cmpint(b_s->drain_count, ==, 1);
59
53
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
60
-class TestCDInitiallyFilled(TestInitiallyFilled):
54
+
61
- TestInitiallyFilled = TestInitiallyFilled
55
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
62
- has_real_tray = True
56
+
63
-
57
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
64
- def setUp(self):
58
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
65
- self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide')
59
+ g_assert_cmpint(backing->quiesce_counter, ==, 2);
66
-
60
+ g_assert_cmpint(a_s->drain_count, ==, 2);
67
-class TestCDInitiallyEmpty(TestInitiallyEmpty):
61
+ g_assert_cmpint(b_s->drain_count, ==, 2);
68
- TestInitiallyEmpty = TestInitiallyEmpty
62
+ g_assert_cmpint(backing_s->drain_count, ==, 2);
69
- has_real_tray = True
63
+
70
-
64
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
71
- def setUp(self):
65
+
72
- self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide')
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
73
+# Do this in a function to avoid leaking variables like case into the global
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
74
+# name space (otherwise tests would be run for the abstract base classes)
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
75
+def create_basic_test_classes():
69
+ g_assert_cmpint(a_s->drain_count, ==, 1);
76
+ for (media, interface, has_real_tray) in [ ('cdrom', 'ide', True),
70
+ g_assert_cmpint(b_s->drain_count, ==, 1);
77
+ ('cdrom', 'scsi', True),
71
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
78
+ ('disk', 'floppy', False) ]:
72
+
79
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
80
-class TestSCSICDInitiallyFilled(TestInitiallyFilled):
74
+
81
- TestInitiallyFilled = TestInitiallyFilled
75
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
82
- has_real_tray = True
76
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
83
+ for case in [ TestInitiallyFilled, TestInitiallyEmpty ]:
77
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
84
78
+ g_assert_cmpint(a_s->drain_count, ==, 0);
85
- def setUp(self):
79
+ g_assert_cmpint(b_s->drain_count, ==, 0);
86
- self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi')
80
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
87
+ attr = { 'media': media,
81
+
88
+ 'interface': interface,
82
+ bdrv_unref(backing);
89
+ 'has_real_tray': has_real_tray }
83
+ bdrv_unref(bs_a);
90
84
+ bdrv_unref(bs_b);
91
-class TestSCSICDInitiallyEmpty(TestInitiallyEmpty):
85
+ blk_unref(blk_a);
92
- TestInitiallyEmpty = TestInitiallyEmpty
86
+ blk_unref(blk_b);
93
- has_real_tray = True
87
+}
94
+ name = '%s_%s_%s' % (case.__name__, media, interface)
88
+
95
+ globals()[name] = type(name, (case, ), attr)
89
96
90
typedef struct TestBlockJob {
97
- def setUp(self):
91
BlockJob common;
98
- self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi')
92
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
99
-
93
test_quiesce_co_drain_subtree);
100
-class TestFloppyInitiallyFilled(TestInitiallyFilled):
94
101
- TestInitiallyFilled = TestInitiallyFilled
95
g_test_add_func("/bdrv-drain/nested", test_nested);
102
- has_real_tray = False
96
+ g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
103
-
97
104
- def setUp(self):
98
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
105
- self.TestInitiallyFilled.setUp(self, 'disk', 'floppy')
99
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
106
-
107
-class TestFloppyInitiallyEmpty(TestInitiallyEmpty):
108
- TestInitiallyEmpty = TestInitiallyEmpty
109
- has_real_tray = False
110
-
111
- def setUp(self):
112
- self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy')
113
- # FDDs not having a real tray and there not being a medium inside the
114
- # tray at startup means the tray will be considered open
115
- self.has_opened = True
116
+create_basic_test_classes()
117
118
class TestChangeReadOnly(ChangeBaseClass):
119
device_name = 'qdev0'
120
--
100
--
121
2.20.1
101
2.13.6
122
102
123
103
diff view generated by jsdifflib
1
From: Nir Soffer <nirsof@gmail.com>
1
We need to remember how many of the drain sections in which a node is
2
2
were recursive (i.e. subtree drain rather than node drain), so that they
3
In some cases buf_align or request_alignment cannot be detected:
3
can be correctly applied when children are added or removed during the
4
4
drained section.
5
1. With Gluster, buf_align cannot be detected since the actual I/O is
5
6
done on Gluster server, and qemu buffer alignment does not matter.
6
With this change, it is safe to modify the graph even inside a
7
Since we don't have alignment requirement, buf_align=1 is the best
7
bdrv_subtree_drained_begin/end() section.
8
value.
8
9
10
2. With local XFS filesystem, buf_align cannot be detected if reading
11
from unallocated area. In this we must align the buffer, but we don't
12
know what is the correct size. Using the wrong alignment results in
13
I/O error.
14
15
3. With Gluster backed by XFS, request_alignment cannot be detected if
16
reading from unallocated area. In this case we need to use the
17
correct alignment, and failing to do so results in I/O errors.
18
19
4. With NFS, the server does not use direct I/O, so both buf_align cannot
20
be detected. In this case we don't need any alignment so we can use
21
buf_align=1 and request_alignment=1.
22
23
These cases seems to work when storage sector size is 512 bytes, because
24
the current code starts checking align=512. If the check succeeds
25
because alignment cannot be detected we use 512. But this does not work
26
for storage with 4k sector size.
27
28
To determine if we can detect the alignment, we probe first with
29
align=1. If probing succeeds, maybe there are no alignment requirement
30
(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we
31
don't have any way to tell, we treat this as undetectable alignment. If
32
probing with align=1 fails with EINVAL, but probing with one of the
33
expected alignments succeeds, we know that we found a working alignment.
34
35
Practically the alignment requirements are the same for buffer
36
alignment, buffer length, and offset in file. So in case we cannot
37
detect buf_align, we can use request alignment. If we cannot detect
38
request alignment, we can fallback to a safe value. To use this logic,
39
we probe first request alignment instead of buf_align.
40
41
Here is a table showing the behaviour with current code (the value in
42
parenthesis is the optimal value).
43
44
Case Sector buf_align (opt) request_alignment (opt) result
45
======================================================================
46
1 512 512 (1) 512 (512) OK
47
1 4096 512 (1) 4096 (4096) FAIL
48
----------------------------------------------------------------------
49
2 512 512 (512) 512 (512) OK
50
2 4096 512 (4096) 4096 (4096) FAIL
51
----------------------------------------------------------------------
52
3 512 512 (1) 512 (512) OK
53
3 4096 512 (1) 512 (4096) FAIL
54
----------------------------------------------------------------------
55
4 512 512 (1) 512 (1) OK
56
4 4096 512 (1) 512 (1) OK
57
58
Same cases with this change:
59
60
Case Sector buf_align (opt) request_alignment (opt) result
61
======================================================================
62
1 512 512 (1) 512 (512) OK
63
1 4096 4096 (1) 4096 (4096) OK
64
----------------------------------------------------------------------
65
2 512 512 (512) 512 (512) OK
66
2 4096 4096 (4096) 4096 (4096) OK
67
----------------------------------------------------------------------
68
3 512 4096 (1) 4096 (512) OK
69
3 4096 4096 (1) 4096 (4096) OK
70
----------------------------------------------------------------------
71
4 512 4096 (1) 4096 (1) OK
72
4 4096 4096 (1) 4096 (1) OK
73
74
I tested that provisioning VMs and copying disks on local XFS and
75
Gluster with 4k bytes sector size work now, resolving bugs [1],[2].
76
I tested also on XFS, NFS, Gluster with 512 bytes sector size.
77
78
[1] https://bugzilla.redhat.com/1737256
79
[2] https://bugzilla.redhat.com/1738657
80
81
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
82
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
83
---
10
---
84
block/file-posix.c | 36 +++++++++++++++++++++++++-----------
11
include/block/block.h | 2 --
85
1 file changed, 25 insertions(+), 11 deletions(-)
12
include/block/block_int.h | 5 +++++
86
13
block.c | 32 +++++++++++++++++++++++++++++---
87
diff --git a/block/file-posix.c b/block/file-posix.c
14
block/io.c | 28 ++++++++++++++++++++++++----
88
index XXXXXXX..XXXXXXX 100644
15
4 files changed, 58 insertions(+), 9 deletions(-)
89
--- a/block/file-posix.c
16
90
+++ b/block/file-posix.c
17
diff --git a/include/block/block.h b/include/block/block.h
91
@@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
18
index XXXXXXX..XXXXXXX 100644
92
BDRVRawState *s = bs->opaque;
19
--- a/include/block/block.h
93
char *buf;
20
+++ b/include/block/block.h
94
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
21
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs);
95
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
22
/**
96
23
* Like bdrv_drained_begin, but recursively begins a quiesced section for
97
/* For SCSI generic devices the alignment is not really used.
24
* exclusive access to all child nodes as well.
98
With buffered I/O, we don't have any restrictions. */
25
- *
99
@@ -XXX,XX +XXX,XX @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
26
- * Graph changes are not allowed during a subtree drain section.
27
*/
28
void bdrv_subtree_drained_begin(BlockDriverState *bs);
29
30
diff --git a/include/block/block_int.h b/include/block/block_int.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/include/block/block_int.h
33
+++ b/include/block/block_int.h
34
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
35
36
/* Accessed with atomic ops. */
37
int quiesce_counter;
38
+ int recursive_quiesce_counter;
39
+
40
unsigned int write_gen; /* Current data generation */
41
42
/* Protected by reqs_lock. */
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
44
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
45
BdrvRequestFlags flags);
46
47
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
48
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
49
+
50
int get_tmp_filename(char *filename, int size);
51
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
52
const char *filename);
53
diff --git a/block.c b/block.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/block.c
56
+++ b/block.c
57
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
58
bdrv_drained_end(bs);
59
}
60
61
+static void bdrv_child_cb_attach(BdrvChild *child)
62
+{
63
+ BlockDriverState *bs = child->opaque;
64
+ bdrv_apply_subtree_drain(child, bs);
65
+}
66
+
67
+static void bdrv_child_cb_detach(BdrvChild *child)
68
+{
69
+ BlockDriverState *bs = child->opaque;
70
+ bdrv_unapply_subtree_drain(child, bs);
71
+}
72
+
73
static int bdrv_child_cb_inactivate(BdrvChild *child)
74
{
75
BlockDriverState *bs = child->opaque;
76
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = {
77
.inherit_options = bdrv_inherited_options,
78
.drained_begin = bdrv_child_cb_drained_begin,
79
.drained_end = bdrv_child_cb_drained_end,
80
+ .attach = bdrv_child_cb_attach,
81
+ .detach = bdrv_child_cb_detach,
82
.inactivate = bdrv_child_cb_inactivate,
83
};
84
85
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = {
86
.inherit_options = bdrv_inherited_fmt_options,
87
.drained_begin = bdrv_child_cb_drained_begin,
88
.drained_end = bdrv_child_cb_drained_end,
89
+ .attach = bdrv_child_cb_attach,
90
+ .detach = bdrv_child_cb_detach,
91
.inactivate = bdrv_child_cb_inactivate,
92
};
93
94
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c)
95
parent->backing_blocker);
96
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
97
parent->backing_blocker);
98
+
99
+ bdrv_child_cb_attach(c);
100
}
101
102
static void bdrv_backing_detach(BdrvChild *c)
103
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c)
104
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
105
error_free(parent->backing_blocker);
106
parent->backing_blocker = NULL;
107
+
108
+ bdrv_child_cb_detach(c);
109
}
110
111
/*
112
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
113
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
100
}
114
}
101
#endif
115
if (old_bs) {
102
116
+ /* Detach first so that the recursive drain sections coming from @child
103
- /* If we could not get the sizes so far, we can only guess them */
117
+ * are already gone and we only end the drain sections that came from
104
- if (!s->buf_align) {
118
+ * elsewhere. */
105
+ /*
119
+ if (child->role->detach) {
106
+ * If we could not get the sizes so far, we can only guess them. First try
120
+ child->role->detach(child);
107
+ * to detect request alignment, since it is more likely to succeed. Then
121
+ }
108
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
122
if (old_bs->quiesce_counter && child->role->drained_end) {
109
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
123
for (i = 0; i < old_bs->quiesce_counter; i++) {
110
+ * request_alignment.
124
child->role->drained_end(child);
111
+ */
112
+
113
+ if (!bs->bl.request_alignment) {
114
+ int i;
115
size_t align;
116
- buf = qemu_memalign(max_align, 2 * max_align);
117
- for (align = 512; align <= max_align; align <<= 1) {
118
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
119
- s->buf_align = align;
120
+ buf = qemu_memalign(max_align, max_align);
121
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
122
+ align = alignments[i];
123
+ if (raw_is_io_aligned(fd, buf, align)) {
124
+ /* Fallback to safe value. */
125
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
126
break;
127
}
125
}
128
}
126
}
129
qemu_vfree(buf);
127
- if (child->role->detach) {
128
- child->role->detach(child);
129
- }
130
QLIST_REMOVE(child, next_parent);
130
}
131
}
131
132
132
- if (!bs->bl.request_alignment) {
133
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
133
+ if (!s->buf_align) {
134
+ int i;
135
size_t align;
136
- buf = qemu_memalign(s->buf_align, max_align);
137
- for (align = 512; align <= max_align; align <<= 1) {
138
- if (raw_is_io_aligned(fd, buf, align)) {
139
- bs->bl.request_alignment = align;
140
+ buf = qemu_memalign(max_align, 2 * max_align);
141
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
142
+ align = alignments[i];
143
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
144
+ /* Fallback to request_aligment. */
145
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
146
break;
147
}
134
}
148
}
135
}
136
137
+ /* Attach only after starting new drained sections, so that recursive
138
+ * drain sections coming from @child don't get an extra .drained_begin
139
+ * callback. */
140
if (child->role->attach) {
141
child->role->attach(child);
142
}
143
diff --git a/block/io.c b/block/io.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
148
assert(data.done);
149
}
150
151
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
152
- BdrvChild *parent)
153
+void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
154
+ BdrvChild *parent)
155
{
156
BdrvChild *child, *next;
157
158
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
159
bdrv_drain_recurse(bs);
160
161
if (recursive) {
162
+ bs->recursive_quiesce_counter++;
163
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
164
bdrv_do_drained_begin(child->bs, true, child);
165
}
166
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
167
bdrv_do_drained_begin(bs, true, NULL);
168
}
169
170
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
171
- BdrvChild *parent)
172
+void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
173
+ BdrvChild *parent)
174
{
175
BdrvChild *child, *next;
176
int old_quiesce_counter;
177
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
178
}
179
180
if (recursive) {
181
+ bs->recursive_quiesce_counter--;
182
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
183
bdrv_do_drained_end(child->bs, true, child);
184
}
185
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
186
bdrv_do_drained_end(bs, true, NULL);
187
}
188
189
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
190
+{
191
+ int i;
192
+
193
+ for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
194
+ bdrv_do_drained_begin(child->bs, true, child);
195
+ }
196
+}
197
+
198
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
199
+{
200
+ int i;
201
+
202
+ for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
203
+ bdrv_do_drained_end(child->bs, true, child);
204
+ }
205
+}
206
+
207
/*
208
* Wait for pending requests to complete on a single BlockDriverState subtree,
209
* and suspend block driver's internal I/O until next request arrives.
149
--
210
--
150
2.20.1
211
2.13.6
151
212
152
213
diff view generated by jsdifflib
New patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
---
3
tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
4
1 file changed, 80 insertions(+)
1
5
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tests/test-bdrv-drain.c
9
+++ b/tests/test-bdrv-drain.c
10
@@ -XXX,XX +XXX,XX @@ static void test_multiparent(void)
11
blk_unref(blk_b);
12
}
13
14
+static void test_graph_change(void)
15
+{
16
+ BlockBackend *blk_a, *blk_b;
17
+ BlockDriverState *bs_a, *bs_b, *backing;
18
+ BDRVTestState *a_s, *b_s, *backing_s;
19
+
20
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
21
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
22
+ &error_abort);
23
+ a_s = bs_a->opaque;
24
+ blk_insert_bs(blk_a, bs_a, &error_abort);
25
+
26
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
27
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
28
+ &error_abort);
29
+ b_s = bs_b->opaque;
30
+ blk_insert_bs(blk_b, bs_b, &error_abort);
31
+
32
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
33
+ backing_s = backing->opaque;
34
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
35
+
36
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
37
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
38
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
39
+ g_assert_cmpint(a_s->drain_count, ==, 0);
40
+ g_assert_cmpint(b_s->drain_count, ==, 0);
41
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
42
+
43
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
44
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
45
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
47
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
48
+
49
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
50
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
51
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
52
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
53
+ g_assert_cmpint(a_s->drain_count, ==, 5);
54
+ g_assert_cmpint(b_s->drain_count, ==, 5);
55
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
56
+
57
+ bdrv_set_backing_hd(bs_b, NULL, &error_abort);
58
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
59
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
60
+ g_assert_cmpint(backing->quiesce_counter, ==, 3);
61
+ g_assert_cmpint(a_s->drain_count, ==, 3);
62
+ g_assert_cmpint(b_s->drain_count, ==, 2);
63
+ g_assert_cmpint(backing_s->drain_count, ==, 3);
64
+
65
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
69
+ g_assert_cmpint(a_s->drain_count, ==, 5);
70
+ g_assert_cmpint(b_s->drain_count, ==, 5);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
74
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
75
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
76
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
77
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
78
+
79
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
80
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
81
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
82
+ g_assert_cmpint(a_s->drain_count, ==, 0);
83
+ g_assert_cmpint(b_s->drain_count, ==, 0);
84
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
85
+
86
+ bdrv_unref(backing);
87
+ bdrv_unref(bs_a);
88
+ bdrv_unref(bs_b);
89
+ blk_unref(blk_a);
90
+ blk_unref(blk_b);
91
+}
92
+
93
94
typedef struct TestBlockJob {
95
BlockJob common;
96
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
97
98
g_test_add_func("/bdrv-drain/nested", test_nested);
99
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
100
+ g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
101
102
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
103
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
104
--
105
2.13.6
106
107
diff view generated by jsdifflib
1
The test covered only floppy and ide-cd. Add scsi-cd as well.
1
Since commit bde70715, base is the only node that is reopened in
2
commit_start(). This means that the code, which still involves an
3
explicit BlockReopenQueue, can now be simplified by using bdrv_reopen().
2
4
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Fam Zheng <famz@redhat.com>
5
---
7
---
6
tests/qemu-iotests/118 | 20 ++++++++++++++++++++
8
block/commit.c | 8 +-------
7
tests/qemu-iotests/118.out | 4 ++--
9
1 file changed, 1 insertion(+), 7 deletions(-)
8
2 files changed, 22 insertions(+), 2 deletions(-)
9
10
10
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
11
diff --git a/block/commit.c b/block/commit.c
11
index XXXXXXX..XXXXXXX 100755
12
--- a/tests/qemu-iotests/118
13
+++ b/tests/qemu-iotests/118
14
@@ -XXX,XX +XXX,XX @@ def interface_to_device_name(interface):
15
return 'ide-cd'
16
elif interface == 'floppy':
17
return 'floppy'
18
+ elif interface == 'scsi':
19
+ return 'scsi-cd'
20
else:
21
return None
22
23
@@ -XXX,XX +XXX,XX @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
24
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
25
self.vm = iotests.VM()
26
self.vm.add_drive(old_img, 'media=%s' % media, 'none')
27
+ if interface == 'scsi':
28
+ self.vm.add_device('virtio-scsi-pci')
29
self.vm.add_device('%s,drive=drive0,id=%s' %
30
(interface_to_device_name(interface),
31
self.device_name))
32
@@ -XXX,XX +XXX,XX @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
33
def setUp(self, media, interface):
34
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
35
self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none')
36
+ if interface == 'scsi':
37
+ self.vm.add_device('virtio-scsi-pci')
38
self.vm.add_device('%s,drive=drive0,id=%s' %
39
(interface_to_device_name(interface),
40
self.device_name))
41
@@ -XXX,XX +XXX,XX @@ class TestCDInitiallyEmpty(TestInitiallyEmpty):
42
def setUp(self):
43
self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide')
44
45
+class TestSCSICDInitiallyFilled(TestInitiallyFilled):
46
+ TestInitiallyFilled = TestInitiallyFilled
47
+ has_real_tray = True
48
+
49
+ def setUp(self):
50
+ self.TestInitiallyFilled.setUp(self, 'cdrom', 'scsi')
51
+
52
+class TestSCSICDInitiallyEmpty(TestInitiallyEmpty):
53
+ TestInitiallyEmpty = TestInitiallyEmpty
54
+ has_real_tray = True
55
+
56
+ def setUp(self):
57
+ self.TestInitiallyEmpty.setUp(self, 'cdrom', 'scsi')
58
+
59
class TestFloppyInitiallyFilled(TestInitiallyFilled):
60
TestInitiallyFilled = TestInitiallyFilled
61
has_real_tray = False
62
diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out
63
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
64
--- a/tests/qemu-iotests/118.out
13
--- a/block/commit.c
65
+++ b/tests/qemu-iotests/118.out
14
+++ b/block/commit.c
66
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
67
-...............................................................
16
const char *filter_node_name, Error **errp)
68
+.........................................................................................
17
{
69
----------------------------------------------------------------------
18
CommitBlockJob *s;
70
-Ran 63 tests
19
- BlockReopenQueue *reopen_queue = NULL;
71
+Ran 89 tests
20
int orig_base_flags;
72
21
BlockDriverState *iter;
73
OK
22
BlockDriverState *commit_top_bs = NULL;
23
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
24
/* convert base to r/w, if necessary */
25
orig_base_flags = bdrv_get_flags(base);
26
if (!(orig_base_flags & BDRV_O_RDWR)) {
27
- reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
28
- orig_base_flags | BDRV_O_RDWR);
29
- }
30
-
31
- if (reopen_queue) {
32
- bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
33
+ bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
34
if (local_err != NULL) {
35
error_propagate(errp, local_err);
36
goto fail;
74
--
37
--
75
2.20.1
38
2.13.6
76
39
77
40
diff view generated by jsdifflib
1
The same change as commit 2b23f28639 ('block/copy-on-read: Fix
1
The bdrv_reopen*() implementation doesn't like it if the graph is
2
permissions for inactive node') made for the copy-on-read driver can be
2
changed between queuing nodes for reopen and actually reopening them
3
made for bdrv_filter_default_perms(): Retaining the old permissions from
3
(one of the reasons is that queuing can be recursive).
4
the BdrvChild if it is given complicates things unnecessarily when in
4
5
the end this only means that the options set in the c == NULL case (i.e.
5
So instead of draining the device only in bdrv_reopen_multiple(),
6
during child creation) are retained.
6
require that callers already drained all affected nodes, and assert this
7
in bdrv_reopen_queue().
7
8
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
11
---
12
block.c | 12 ++----------
12
block.c | 23 ++++++++++++++++-------
13
1 file changed, 2 insertions(+), 10 deletions(-)
13
block/replication.c | 6 ++++++
14
qemu-io-cmds.c | 3 +++
15
3 files changed, 25 insertions(+), 7 deletions(-)
14
16
15
diff --git a/block.c b/block.c
17
diff --git a/block.c b/block.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/block.c
19
--- a/block.c
18
+++ b/block.c
20
+++ b/block.c
19
@@ -XXX,XX +XXX,XX @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
21
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
20
uint64_t perm, uint64_t shared,
22
* returns a pointer to bs_queue, which is either the newly allocated
21
uint64_t *nperm, uint64_t *nshared)
23
* bs_queue, or the existing bs_queue being used.
24
*
25
+ * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
26
*/
27
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
28
BlockDriverState *bs,
29
@@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
30
BdrvChild *child;
31
QDict *old_options, *explicit_options;
32
33
+ /* Make sure that the caller remembered to use a drained section. This is
34
+ * important to avoid graph changes between the recursive queuing here and
35
+ * bdrv_reopen_multiple(). */
36
+ assert(bs->quiesce_counter > 0);
37
+
38
if (bs_queue == NULL) {
39
bs_queue = g_new0(BlockReopenQueue, 1);
40
QSIMPLEQ_INIT(bs_queue);
41
@@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
42
* If all devices prepare successfully, then the changes are committed
43
* to all devices.
44
*
45
+ * All affected nodes must be drained between bdrv_reopen_queue() and
46
+ * bdrv_reopen_multiple().
47
*/
48
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
22
{
49
{
23
- if (c == NULL) {
50
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
24
- *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
51
25
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
52
assert(bs_queue != NULL);
26
- return;
53
27
- }
54
- aio_context_release(ctx);
55
- bdrv_drain_all_begin();
56
- aio_context_acquire(ctx);
28
-
57
-
29
- *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
58
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
30
- (c->perm & DEFAULT_PERM_UNCHANGED);
59
+ assert(bs_entry->state.bs->quiesce_counter > 0);
31
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
60
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
32
- (c->shared_perm & DEFAULT_PERM_UNCHANGED);
61
error_propagate(errp, local_err);
33
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
62
goto cleanup;
34
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
63
@@ -XXX,XX +XXX,XX @@ cleanup:
64
}
65
g_free(bs_queue);
66
67
- bdrv_drain_all_end();
68
-
69
return ret;
35
}
70
}
36
71
37
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
72
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
73
{
74
int ret = -1;
75
Error *local_err = NULL;
76
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
77
+ BlockReopenQueue *queue;
78
79
+ bdrv_subtree_drained_begin(bs);
80
+
81
+ queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
82
ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
83
if (local_err != NULL) {
84
error_propagate(errp, local_err);
85
}
86
+
87
+ bdrv_subtree_drained_end(bs);
88
+
89
return ret;
90
}
91
92
diff --git a/block/replication.c b/block/replication.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/replication.c
95
+++ b/block/replication.c
96
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
97
new_secondary_flags = s->orig_secondary_flags;
98
}
99
100
+ bdrv_subtree_drained_begin(s->hidden_disk->bs);
101
+ bdrv_subtree_drained_begin(s->secondary_disk->bs);
102
+
103
if (orig_hidden_flags != new_hidden_flags) {
104
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
105
new_hidden_flags);
106
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
107
reopen_queue, &local_err);
108
error_propagate(errp, local_err);
109
}
110
+
111
+ bdrv_subtree_drained_end(s->hidden_disk->bs);
112
+ bdrv_subtree_drained_end(s->secondary_disk->bs);
113
}
114
115
static void backup_job_cleanup(BlockDriverState *bs)
116
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/qemu-io-cmds.c
119
+++ b/qemu-io-cmds.c
120
@@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
121
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
122
qemu_opts_reset(&reopen_opts);
123
124
+ bdrv_subtree_drained_begin(bs);
125
brq = bdrv_reopen_queue(NULL, bs, opts, flags);
126
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
127
+ bdrv_subtree_drained_end(bs);
128
+
129
if (local_err) {
130
error_report_err(local_err);
131
} else {
38
--
132
--
39
2.20.1
133
2.13.6
40
134
41
135
diff view generated by jsdifflib