1
The following changes since commit 67b6526cf042f22521feff5ea521a05d3dd2bf8f:
1
The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8:
2
2
3
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging (2022-01-13 13:59:56 +0000)
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to e5e748739562268ef4063ee77bf53ad7040b25c7:
9
for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8:
10
10
11
iotests/testrunner.py: refactor test_field_width (2022-01-14 12:03:16 +0100)
11
block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches
15
15
16
- qemu-storage-daemon: Add vhost-user-blk help
16
----------------------------------------------------------------
17
- block-backend: Fix use-after-free for BDS pointers after aio_poll()
17
Doug Gale (1):
18
- qemu-img: Fix sparseness of output image with unaligned ranges
18
nvme: Add tracing
19
- vvfat: Fix crashes in read-write mode
20
- Fix device deletion events with -device JSON syntax
21
- Code cleanups
22
19
23
----------------------------------------------------------------
20
Edgar Kaziakhmedov (1):
24
Daniel P. Berrangé (1):
21
qcow2: get rid of qcow2_backing_read1 routine
25
softmmu: fix device deletion events with -device JSON syntax
26
22
27
Emanuele Giuseppe Esposito (3):
23
Fam Zheng (2):
28
block_int: make bdrv_backing_overridden static
24
block: Open backing image in force share mode for size probe
29
include/sysemu/blockdev.h: remove drive_mark_claimed_by_board and inline drive_def
25
block: Remove unused bdrv_requests_pending
30
include/sysemu/blockdev.h: remove drive_get_max_devs
31
26
32
Hanna Reitz (2):
27
John Snow (1):
33
iotests/stream-error-on-reset: New test
28
iotests: fix 197 for vpc
34
iotests/308: Fix for CAP_DAC_OVERRIDE
35
29
36
Kevin Wolf (3):
30
Kevin Wolf (27):
37
vvfat: Fix size of temporary qcow file
31
block: Formats don't need CONSISTENT_READ with NO_IO
38
vvfat: Fix vvfat_write() for writes before the root directory
32
block: Make bdrv_drain_invoke() recursive
39
iotests: Test qemu-img convert of zeroed data cluster
33
block: Call .drain_begin only once in bdrv_drain_all_begin()
34
test-bdrv-drain: Test BlockDriver callbacks for drain
35
block: bdrv_drain_recurse(): Remove unused begin parameter
36
block: Don't wait for requests in bdrv_drain*_end()
37
block: Unify order in drain functions
38
block: Don't acquire AioContext in hmp_qemu_io()
39
block: Document that x-blockdev-change breaks quorum children list
40
block: Assert drain_all is only called from main AioContext
41
block: Make bdrv_drain() driver callbacks non-recursive
42
test-bdrv-drain: Test callback for bdrv_drain
43
test-bdrv-drain: Test bs->quiesce_counter
44
blockjob: Pause job on draining any job BDS
45
test-bdrv-drain: Test drain vs. block jobs
46
block: Don't block_job_pause_all() in bdrv_drain_all()
47
block: Nested drain_end must still call callbacks
48
test-bdrv-drain: Test nested drain sections
49
block: Don't notify parents in drain call chain
50
block: Add bdrv_subtree_drained_begin/end()
51
test-bdrv-drain: Tests for bdrv_subtree_drain
52
test-bdrv-drain: Test behaviour in coroutine context
53
test-bdrv-drain: Recursive draining with multiple parents
54
block: Allow graph changes in subtree drained section
55
test-bdrv-drain: Test graph changes in drained section
56
commit: Simplify reopen of base
57
block: Keep nodes drained between reopen_queue/multiple
40
58
41
Philippe Mathieu-Daudé (3):
59
Thomas Huth (3):
42
docs: Correct 'vhost-user-blk' spelling
60
block: Remove the obsolete -drive boot=on|off parameter
43
qemu-storage-daemon: Add vhost-user-blk help
61
block: Remove the deprecated -hdachs option
44
qapi/block: Restrict vhost-user-blk to CONFIG_VHOST_USER_BLK_SERVER
62
block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter
45
63
46
Stefan Hajnoczi (1):
64
qapi/block-core.json | 4 +
47
block-backend: prevent dangling BDS pointers across aio_poll()
65
block/qcow2.h | 3 -
66
include/block/block.h | 15 +-
67
include/block/block_int.h | 6 +-
68
block.c | 75 ++++-
69
block/commit.c | 8 +-
70
block/io.c | 164 +++++++---
71
block/qcow2.c | 51 +--
72
block/replication.c | 6 +
73
blockdev.c | 11 -
74
blockjob.c | 22 +-
75
hmp.c | 6 -
76
hw/block/nvme.c | 349 +++++++++++++++++----
77
qemu-io-cmds.c | 3 +
78
tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++
79
vl.c | 86 +-----
80
hw/block/trace-events | 93 ++++++
81
qemu-doc.texi | 29 +-
82
qemu-options.hx | 19 +-
83
tests/Makefile.include | 2 +
84
tests/qemu-iotests/197 | 4 +
85
tests/qemu-iotests/common.filter | 3 +-
86
22 files changed, 1294 insertions(+), 316 deletions(-)
87
create mode 100644 tests/test-bdrv-drain.c
48
88
49
Vladimir Sementsov-Ogievskiy (3):
50
qemu-img: make is_allocated_sectors() more efficient
51
block: drop BLK_PERM_GRAPH_MOD
52
iotests/testrunner.py: refactor test_field_width
53
54
qapi/block-core.json | 7 +-
55
qapi/block-export.json | 6 +-
56
qapi/qdev.json | 5 +-
57
docs/tools/qemu-storage-daemon.rst | 2 +-
58
include/block/block.h | 9 +-
59
include/block/block_int.h | 3 -
60
include/sysemu/blockdev.h | 3 -
61
block.c | 11 +-
62
block/block-backend.c | 19 ++-
63
block/commit.c | 1 -
64
block/mirror.c | 15 +--
65
block/monitor/block-hmp-cmds.c | 2 +-
66
block/vvfat.c | 37 ++++--
67
blockdev.c | 24 +---
68
hw/block/block.c | 3 +-
69
qemu-img.c | 23 +++-
70
softmmu/vl.c | 8 +-
71
storage-daemon/qemu-storage-daemon.c | 13 ++
72
tests/qtest/device-plug-test.c | 19 +++
73
scripts/render_block_graph.py | 1 -
74
tests/qemu-iotests/testrunner.py | 21 ++--
75
tests/qemu-iotests/122 | 1 +
76
tests/qemu-iotests/122.out | 2 +
77
tests/qemu-iotests/273.out | 4 -
78
tests/qemu-iotests/308 | 25 +++-
79
tests/qemu-iotests/308.out | 2 +-
80
tests/qemu-iotests/tests/stream-error-on-reset | 140 +++++++++++++++++++++
81
tests/qemu-iotests/tests/stream-error-on-reset.out | 5 +
82
28 files changed, 307 insertions(+), 104 deletions(-)
83
create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset
84
create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out
85
86
diff view generated by jsdifflib
New patch
1
Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently
2
in use as a mirror target. It is not enough for image formats, though,
3
as these still unconditionally request BLK_PERM_CONSISTENT_READ.
1
4
5
As this permission is geared towards whether the guest-visible data is
6
consistent, and has no impact on whether the metadata is sane, and
7
'qemu-img info' does not read guest-visible data (except for the raw
8
format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there
9
is not going to be any guest I/O performed, regardless of image format.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block.c | 6 +++++-
14
1 file changed, 5 insertions(+), 1 deletion(-)
15
16
diff --git a/block.c b/block.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block.c
19
+++ b/block.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
21
assert(role == &child_backing || role == &child_file);
22
23
if (!backing) {
24
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
25
+
26
/* Apart from the modifications below, the same permissions are
27
* forwarded and left alone as for filters */
28
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
29
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
30
31
/* bs->file always needs to be consistent because of the metadata. We
32
* can never allow other users to resize or write to it. */
33
- perm |= BLK_PERM_CONSISTENT_READ;
34
+ if (!(flags & BDRV_O_NO_IO)) {
35
+ perm |= BLK_PERM_CONSISTENT_READ;
36
+ }
37
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
38
} else {
39
/* We want consistent read from backing files if the parent needs it.
40
--
41
2.13.6
42
43
diff view generated by jsdifflib
New patch
1
From: John Snow <jsnow@redhat.com>
1
2
3
VPC has some difficulty creating geometries of particular size.
4
However, we can indeed force it to use a literal one, so let's
5
do that for the sake of test 197, which is testing some specific
6
offsets.
7
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Lukáš Doktor <ldoktor@redhat.com>
13
---
14
tests/qemu-iotests/197 | 4 ++++
15
tests/qemu-iotests/common.filter | 3 ++-
16
2 files changed, 6 insertions(+), 1 deletion(-)
17
18
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
19
index XXXXXXX..XXXXXXX 100755
20
--- a/tests/qemu-iotests/197
21
+++ b/tests/qemu-iotests/197
22
@@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ==='
23
echo
24
25
# Prep the images
26
+# VPC rounds image sizes to a specific geometry, force a specific size.
27
+if [ "$IMGFMT" = "vpc" ]; then
28
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
29
+fi
30
_make_test_img 4G
31
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
32
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
33
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tests/qemu-iotests/common.filter
36
+++ b/tests/qemu-iotests/common.filter
37
@@ -XXX,XX +XXX,XX @@ _filter_img_create()
38
-e "s# log_size=[0-9]\\+##g" \
39
-e "s# refcount_bits=[0-9]\\+##g" \
40
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
41
- -e "s# iter-time=[0-9]\\+##g"
42
+ -e "s# iter-time=[0-9]\\+##g" \
43
+ -e "s# force_size=\\(on\\|off\\)##g"
44
}
45
46
_filter_img_info()
47
--
48
2.13.6
49
50
diff view generated by jsdifflib
New patch
1
This change separates bdrv_drain_invoke(), which calls the BlockDriver
2
drain callbacks, from bdrv_drain_recurse(). Instead, the function
3
performs its own recursion now.
1
4
5
One reason for this is that bdrv_drain_recurse() can be called multiple
6
times by bdrv_drain_all_begin(), but the callbacks may only be called
7
once. The separation is necessary to fix this bug.
8
9
The other reason is that we intend to go to a model where we call all
10
driver callbacks first, and only then start polling. This is not fully
11
achieved yet with this patch, as bdrv_drain_invoke() contains a
12
BDRV_POLL_WHILE() loop for the block driver callbacks, which can still
13
call callbacks for any unrelated event. It's a step in this direction
14
anyway.
15
16
Cc: qemu-stable@nongnu.org
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
block/io.c | 14 +++++++++++---
21
1 file changed, 11 insertions(+), 3 deletions(-)
22
23
diff --git a/block/io.c b/block/io.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io.c
26
+++ b/block/io.c
27
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
28
bdrv_wakeup(bs);
29
}
30
31
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
32
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
33
{
34
+ BdrvChild *child, *tmp;
35
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
36
37
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
38
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
39
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
40
bdrv_coroutine_enter(bs, data.co);
41
BDRV_POLL_WHILE(bs, !data.done);
42
+
43
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
44
+ bdrv_drain_invoke(child->bs, begin);
45
+ }
46
}
47
48
static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
49
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
50
BdrvChild *child, *tmp;
51
bool waited;
52
53
- /* Ensure any pending metadata writes are submitted to bs->file. */
54
- bdrv_drain_invoke(bs, begin);
55
-
56
/* Wait for drained requests to finish */
57
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
58
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
60
bdrv_parent_drained_begin(bs);
61
}
62
63
+ bdrv_drain_invoke(bs, true);
64
bdrv_drain_recurse(bs, true);
65
}
66
67
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
68
}
69
70
bdrv_parent_drained_end(bs);
71
+ bdrv_drain_invoke(bs, false);
72
bdrv_drain_recurse(bs, false);
73
aio_enable_external(bdrv_get_aio_context(bs));
74
}
75
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
76
aio_context_acquire(aio_context);
77
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
78
if (aio_context == bdrv_get_aio_context(bs)) {
79
+ /* FIXME Calling this multiple times is wrong */
80
+ bdrv_drain_invoke(bs, true);
81
waited |= bdrv_drain_recurse(bs, true);
82
}
83
}
84
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
85
aio_context_acquire(aio_context);
86
aio_enable_external(aio_context);
87
bdrv_parent_drained_end(bs);
88
+ bdrv_drain_invoke(bs, false);
89
bdrv_drain_recurse(bs, false);
90
aio_context_release(aio_context);
91
}
92
--
93
2.13.6
94
95
diff view generated by jsdifflib
New patch
1
bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver
2
callback inside its polling loop. This means that how many times it got
3
called for each node depended on long it had to poll the event loop.
1
4
5
This is obviously not right and results in nodes that stay drained even
6
after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per
7
node.
8
9
Fix bdrv_drain_all_begin() to call the callback only once, too.
10
11
Cc: qemu-stable@nongnu.org
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
block/io.c | 3 +--
16
1 file changed, 1 insertion(+), 2 deletions(-)
17
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
23
aio_context_acquire(aio_context);
24
bdrv_parent_drained_begin(bs);
25
aio_disable_external(aio_context);
26
+ bdrv_drain_invoke(bs, true);
27
aio_context_release(aio_context);
28
29
if (!g_slist_find(aio_ctxs, aio_context)) {
30
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
31
aio_context_acquire(aio_context);
32
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
33
if (aio_context == bdrv_get_aio_context(bs)) {
34
- /* FIXME Calling this multiple times is wrong */
35
- bdrv_drain_invoke(bs, true);
36
waited |= bdrv_drain_recurse(bs, true);
37
}
38
}
39
--
40
2.13.6
41
42
diff view generated by jsdifflib
New patch
1
This adds a test case that the BlockDriver callbacks for drain are
2
called in bdrv_drained_all_begin/end(), and that both of them are called
3
exactly once.
1
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
---
9
tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
10
tests/Makefile.include | 2 +
11
2 files changed, 139 insertions(+)
12
create mode 100644 tests/test-bdrv-drain.c
13
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/test-bdrv-drain.c
19
@@ -XXX,XX +XXX,XX @@
20
+/*
21
+ * Block node draining tests
22
+ *
23
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
24
+ *
25
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
26
+ * of this software and associated documentation files (the "Software"), to deal
27
+ * in the Software without restriction, including without limitation the rights
28
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
29
+ * copies of the Software, and to permit persons to whom the Software is
30
+ * furnished to do so, subject to the following conditions:
31
+ *
32
+ * The above copyright notice and this permission notice shall be included in
33
+ * all copies or substantial portions of the Software.
34
+ *
35
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
38
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
39
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
41
+ * THE SOFTWARE.
42
+ */
43
+
44
+#include "qemu/osdep.h"
45
+#include "block/block.h"
46
+#include "sysemu/block-backend.h"
47
+#include "qapi/error.h"
48
+
49
+typedef struct BDRVTestState {
50
+ int drain_count;
51
+} BDRVTestState;
52
+
53
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
54
+{
55
+ BDRVTestState *s = bs->opaque;
56
+ s->drain_count++;
57
+}
58
+
59
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
60
+{
61
+ BDRVTestState *s = bs->opaque;
62
+ s->drain_count--;
63
+}
64
+
65
+static void bdrv_test_close(BlockDriverState *bs)
66
+{
67
+ BDRVTestState *s = bs->opaque;
68
+ g_assert_cmpint(s->drain_count, >, 0);
69
+}
70
+
71
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
72
+ uint64_t offset, uint64_t bytes,
73
+ QEMUIOVector *qiov, int flags)
74
+{
75
+ /* We want this request to stay until the polling loop in drain waits for
76
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
77
+ * first and polls its result, too, but it shouldn't accidentally complete
78
+ * this request yet. */
79
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
80
+
81
+ return 0;
82
+}
83
+
84
+static BlockDriver bdrv_test = {
85
+ .format_name = "test",
86
+ .instance_size = sizeof(BDRVTestState),
87
+
88
+ .bdrv_close = bdrv_test_close,
89
+ .bdrv_co_preadv = bdrv_test_co_preadv,
90
+
91
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
92
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
93
+};
94
+
95
+static void aio_ret_cb(void *opaque, int ret)
96
+{
97
+ int *aio_ret = opaque;
98
+ *aio_ret = ret;
99
+}
100
+
101
+static void test_drv_cb_drain_all(void)
102
+{
103
+ BlockBackend *blk;
104
+ BlockDriverState *bs;
105
+ BDRVTestState *s;
106
+ BlockAIOCB *acb;
107
+ int aio_ret;
108
+
109
+ QEMUIOVector qiov;
110
+ struct iovec iov = {
111
+ .iov_base = NULL,
112
+ .iov_len = 0,
113
+ };
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
115
+
116
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
117
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
118
+ &error_abort);
119
+ s = bs->opaque;
120
+ blk_insert_bs(blk, bs, &error_abort);
121
+
122
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
123
+ g_assert_cmpint(s->drain_count, ==, 0);
124
+ bdrv_drain_all_begin();
125
+ g_assert_cmpint(s->drain_count, ==, 1);
126
+ bdrv_drain_all_end();
127
+ g_assert_cmpint(s->drain_count, ==, 0);
128
+
129
+ /* Now do the same while a request is pending */
130
+ aio_ret = -EINPROGRESS;
131
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
132
+ g_assert(acb != NULL);
133
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
134
+
135
+ g_assert_cmpint(s->drain_count, ==, 0);
136
+ bdrv_drain_all_begin();
137
+ g_assert_cmpint(aio_ret, ==, 0);
138
+ g_assert_cmpint(s->drain_count, ==, 1);
139
+ bdrv_drain_all_end();
140
+ g_assert_cmpint(s->drain_count, ==, 0);
141
+
142
+ bdrv_unref(bs);
143
+ blk_unref(blk);
144
+}
145
+
146
+int main(int argc, char **argv)
147
+{
148
+ bdrv_init();
149
+ qemu_init_main_loop(&error_abort);
150
+
151
+ g_test_init(&argc, &argv, NULL);
152
+
153
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
154
+
155
+ return g_test_run();
156
+}
157
diff --git a/tests/Makefile.include b/tests/Makefile.include
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tests/Makefile.include
160
+++ b/tests/Makefile.include
161
@@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c
162
gcov-files-test-hbitmap-y = util/hbitmap.c
163
check-unit-y += tests/test-hbitmap$(EXESUF)
164
gcov-files-test-hbitmap-y = blockjob.c
165
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
166
check-unit-y += tests/test-blockjob$(EXESUF)
167
check-unit-y += tests/test-blockjob-txn$(EXESUF)
168
check-unit-y += tests/test-x86-cpuid$(EXESUF)
169
@@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
170
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
171
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
172
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
173
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
174
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
175
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
176
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
177
--
178
2.13.6
179
180
diff view generated by jsdifflib
New patch
1
Now that the bdrv_drain_invoke() calls are pulled up to the callers of
2
bdrv_drain_recurse(), the 'begin' parameter isn't needed any more.
1
3
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/io.c | 12 ++++++------
8
1 file changed, 6 insertions(+), 6 deletions(-)
9
10
diff --git a/block/io.c b/block/io.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
13
+++ b/block/io.c
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
15
}
16
}
17
18
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
19
+static bool bdrv_drain_recurse(BlockDriverState *bs)
20
{
21
BdrvChild *child, *tmp;
22
bool waited;
23
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
24
*/
25
bdrv_ref(bs);
26
}
27
- waited |= bdrv_drain_recurse(bs, begin);
28
+ waited |= bdrv_drain_recurse(bs);
29
if (in_main_loop) {
30
bdrv_unref(bs);
31
}
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
33
}
34
35
bdrv_drain_invoke(bs, true);
36
- bdrv_drain_recurse(bs, true);
37
+ bdrv_drain_recurse(bs);
38
}
39
40
void bdrv_drained_end(BlockDriverState *bs)
41
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
42
43
bdrv_parent_drained_end(bs);
44
bdrv_drain_invoke(bs, false);
45
- bdrv_drain_recurse(bs, false);
46
+ bdrv_drain_recurse(bs);
47
aio_enable_external(bdrv_get_aio_context(bs));
48
}
49
50
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
51
aio_context_acquire(aio_context);
52
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
53
if (aio_context == bdrv_get_aio_context(bs)) {
54
- waited |= bdrv_drain_recurse(bs, true);
55
+ waited |= bdrv_drain_recurse(bs);
56
}
57
}
58
aio_context_release(aio_context);
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
60
aio_enable_external(aio_context);
61
bdrv_parent_drained_end(bs);
62
bdrv_drain_invoke(bs, false);
63
- bdrv_drain_recurse(bs, false);
64
+ bdrv_drain_recurse(bs);
65
aio_context_release(aio_context);
66
}
67
68
--
69
2.13.6
70
71
diff view generated by jsdifflib
New patch
1
The device is drained, so there is no point in waiting for requests at
2
the end of the drained section. Remove the bdrv_drain_recurse() calls
3
there.
1
4
5
The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e
6
in order to call the .bdrv_co_drain_end() driver callback. This is now
7
done by a separate bdrv_drain_invoke() call.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/io.c | 2 --
14
1 file changed, 2 deletions(-)
15
16
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/io.c
19
+++ b/block/io.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
21
22
bdrv_parent_drained_end(bs);
23
bdrv_drain_invoke(bs, false);
24
- bdrv_drain_recurse(bs);
25
aio_enable_external(bdrv_get_aio_context(bs));
26
}
27
28
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
29
aio_enable_external(aio_context);
30
bdrv_parent_drained_end(bs);
31
bdrv_drain_invoke(bs, false);
32
- bdrv_drain_recurse(bs);
33
aio_context_release(aio_context);
34
}
35
36
--
37
2.13.6
38
39
diff view generated by jsdifflib
New patch
1
Drain requests are propagated to child nodes, parent nodes and directly
2
to the AioContext. The order in which this happened was different
3
between all combinations of drain/drain_all and begin/end.
1
4
5
The correct order is to keep children only drained when their parents
6
are also drained. This means that at the start of a drained section, the
7
AioContext needs to be drained first, the parents second and only then
8
the children. The correct order for the end of a drained section is the
9
opposite.
10
11
This patch changes the three other functions to follow the example of
12
bdrv_drained_begin(), which is the only one that got it right.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/io.c | 12 ++++++++----
18
1 file changed, 8 insertions(+), 4 deletions(-)
19
20
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/io.c
23
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
25
return;
26
}
27
28
+ /* Stop things in parent-to-child order */
29
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
30
aio_disable_external(bdrv_get_aio_context(bs));
31
bdrv_parent_drained_begin(bs);
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
33
return;
34
}
35
36
- bdrv_parent_drained_end(bs);
37
+ /* Re-enable things in child-to-parent order */
38
bdrv_drain_invoke(bs, false);
39
+ bdrv_parent_drained_end(bs);
40
aio_enable_external(bdrv_get_aio_context(bs));
41
}
42
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
44
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
45
AioContext *aio_context = bdrv_get_aio_context(bs);
46
47
+ /* Stop things in parent-to-child order */
48
aio_context_acquire(aio_context);
49
- bdrv_parent_drained_begin(bs);
50
aio_disable_external(aio_context);
51
+ bdrv_parent_drained_begin(bs);
52
bdrv_drain_invoke(bs, true);
53
aio_context_release(aio_context);
54
55
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
56
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
57
AioContext *aio_context = bdrv_get_aio_context(bs);
58
59
+ /* Re-enable things in child-to-parent order */
60
aio_context_acquire(aio_context);
61
- aio_enable_external(aio_context);
62
- bdrv_parent_drained_end(bs);
63
bdrv_drain_invoke(bs, false);
64
+ bdrv_parent_drained_end(bs);
65
+ aio_enable_external(aio_context);
66
aio_context_release(aio_context);
67
}
68
69
--
70
2.13.6
71
72
diff view generated by jsdifflib
1
The size of the qcow size was calculated so that only the FAT partition
1
Commit 15afd94a047 added code to acquire and release the AioContext in
2
would fit on it, but not the whole disk. However, offsets relative to
2
qemuio_command(). This means that the lock is taken twice now in the
3
the whole disk are used to access it, so increase its size to be large
3
call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for
4
enough for that.
4
any requests issued to nodes in a non-mainloop AioContext.
5
6
Dropping the first locking from hmp_qemu_io() fixes the problem.
5
7
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Message-Id: <20211209151815.23495-1-kwolf@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
---
10
block/vvfat.c | 7 +++----
11
hmp.c | 6 ------
11
1 file changed, 3 insertions(+), 4 deletions(-)
12
1 file changed, 6 deletions(-)
12
13
13
diff --git a/block/vvfat.c b/block/vvfat.c
14
diff --git a/hmp.c b/hmp.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/block/vvfat.c
16
--- a/hmp.c
16
+++ b/block/vvfat.c
17
+++ b/hmp.c
17
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
18
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
18
dirname, cyls, heads, secs));
19
{
19
20
BlockBackend *blk;
20
s->sector_count = cyls * heads * secs - s->offset_to_bootsector;
21
BlockBackend *local_blk = NULL;
21
+ bs->total_sectors = cyls * heads * secs;
22
- AioContext *aio_context;
22
23
const char* device = qdict_get_str(qdict, "device");
23
if (qemu_opt_get_bool(opts, "rw", false)) {
24
const char* command = qdict_get_str(qdict, "command");
24
if (!bdrv_is_read_only(bs)) {
25
Error *err = NULL;
25
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
26
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
26
}
27
}
27
}
28
}
28
29
29
- bs->total_sectors = cyls * heads * secs;
30
- aio_context = blk_get_aio_context(blk);
31
- aio_context_acquire(aio_context);
30
-
32
-
31
if (init_directories(s, dirname, heads, secs, errp)) {
33
/*
32
ret = -EIO;
34
* Notably absent: Proper permission management. This is sad, but it seems
33
goto fail;
35
* almost impossible to achieve without changing the semantics and thereby
34
@@ -XXX,XX +XXX,XX @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
36
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
35
}
37
*/
36
38
qemuio_command(blk, command);
37
opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
39
38
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512,
40
- aio_context_release(aio_context);
39
- &error_abort);
41
-
40
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
42
fail:
41
+ bs->total_sectors * BDRV_SECTOR_SIZE, &error_abort);
43
blk_unref(local_blk);
42
qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort);
44
hmp_handle_error(mon, &err);
43
44
ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
45
--
45
--
46
2.31.1
46
2.13.6
47
47
48
48
diff view generated by jsdifflib
New patch
1
From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
1
2
3
Since bdrv_co_preadv does all neccessary checks including
4
reading after the end of the backing file, avoid duplication
5
of verification before bdrv_co_preadv call.
6
7
Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
block/qcow2.h | 3 ---
13
block/qcow2.c | 51 ++++++++-------------------------------------------
14
2 files changed, 8 insertions(+), 46 deletions(-)
15
16
diff --git a/block/qcow2.h b/block/qcow2.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.h
19
+++ b/block/qcow2.h
20
@@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
21
}
22
23
/* qcow2.c functions */
24
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
25
- int64_t sector_num, int nb_sectors);
26
-
27
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
28
int refcount_order, bool generous_increase,
29
uint64_t *refblock_count);
30
diff --git a/block/qcow2.c b/block/qcow2.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/qcow2.c
33
+++ b/block/qcow2.c
34
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
35
return status;
36
}
37
38
-/* handle reading after the end of the backing file */
39
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
40
- int64_t offset, int bytes)
41
-{
42
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
43
- int n1;
44
-
45
- if ((offset + bytes) <= bs_size) {
46
- return bytes;
47
- }
48
-
49
- if (offset >= bs_size) {
50
- n1 = 0;
51
- } else {
52
- n1 = bs_size - offset;
53
- }
54
-
55
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
56
-
57
- return n1;
58
-}
59
-
60
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
61
uint64_t bytes, QEMUIOVector *qiov,
62
int flags)
63
{
64
BDRVQcow2State *s = bs->opaque;
65
- int offset_in_cluster, n1;
66
+ int offset_in_cluster;
67
int ret;
68
unsigned int cur_bytes; /* number of bytes in current iteration */
69
uint64_t cluster_offset = 0;
70
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
71
case QCOW2_CLUSTER_UNALLOCATED:
72
73
if (bs->backing) {
74
- /* read from the base image */
75
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
76
- offset, cur_bytes);
77
- if (n1 > 0) {
78
- QEMUIOVector local_qiov;
79
-
80
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
81
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
82
-
83
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
84
- qemu_co_mutex_unlock(&s->lock);
85
- ret = bdrv_co_preadv(bs->backing, offset, n1,
86
- &local_qiov, 0);
87
- qemu_co_mutex_lock(&s->lock);
88
-
89
- qemu_iovec_destroy(&local_qiov);
90
-
91
- if (ret < 0) {
92
- goto fail;
93
- }
94
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
95
+ qemu_co_mutex_unlock(&s->lock);
96
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
97
+ &hd_qiov, 0);
98
+ qemu_co_mutex_lock(&s->lock);
99
+ if (ret < 0) {
100
+ goto fail;
101
}
102
} else {
103
/* Note: in this case, no need to wait */
104
--
105
2.13.6
106
107
diff view generated by jsdifflib
New patch
1
Removing a quorum child node with x-blockdev-change results in a quorum
2
driver state that cannot be recreated with create options because it
3
would require a list with gaps. This causes trouble in at least
4
.bdrv_refresh_filename().
1
5
6
Document this problem so that we won't accidentally mark the command
7
stable without having addressed it.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
---
12
qapi/block-core.json | 4 ++++
13
1 file changed, 4 insertions(+)
14
15
diff --git a/qapi/block-core.json b/qapi/block-core.json
16
index XXXXXXX..XXXXXXX 100644
17
--- a/qapi/block-core.json
18
+++ b/qapi/block-core.json
19
@@ -XXX,XX +XXX,XX @@
20
# does not support all kinds of operations, all kinds of children, nor
21
# all block drivers.
22
#
23
+# FIXME Removing children from a quorum node means introducing gaps in the
24
+# child indices. This cannot be represented in the 'children' list of
25
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
26
+#
27
# Warning: The data in a new quorum child MUST be consistent with that of
28
# the rest of the array.
29
#
30
--
31
2.13.6
32
33
diff view generated by jsdifflib
New patch
1
From: Doug Gale <doug16k@gmail.com>
1
2
3
Add trace output for commands, errors, and undefined behavior.
4
Add guest error log output for undefined behavior.
5
Report invalid undefined accesses to MMIO.
6
Annotate unlikely error checks with unlikely.
7
8
Signed-off-by: Doug Gale <doug16k@gmail.com>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++--------
14
hw/block/trace-events | 93 ++++++++++++++
15
2 files changed, 390 insertions(+), 52 deletions(-)
16
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/block/nvme.c
20
+++ b/hw/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@
22
#include "qapi/visitor.h"
23
#include "sysemu/block-backend.h"
24
25
+#include "qemu/log.h"
26
+#include "trace.h"
27
#include "nvme.h"
28
29
+#define NVME_GUEST_ERR(trace, fmt, ...) \
30
+ do { \
31
+ (trace_##trace)(__VA_ARGS__); \
32
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
33
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
34
+ } while (0)
35
+
36
static void nvme_process_sq(void *opaque);
37
38
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
39
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
40
{
41
if (cq->irq_enabled) {
42
if (msix_enabled(&(n->parent_obj))) {
43
+ trace_nvme_irq_msix(cq->vector);
44
msix_notify(&(n->parent_obj), cq->vector);
45
} else {
46
+ trace_nvme_irq_pin();
47
pci_irq_pulse(&n->parent_obj);
48
}
49
+ } else {
50
+ trace_nvme_irq_masked();
51
}
52
}
53
54
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
55
trans_len = MIN(len, trans_len);
56
int num_prps = (len >> n->page_bits) + 1;
57
58
- if (!prp1) {
59
+ if (unlikely(!prp1)) {
60
+ trace_nvme_err_invalid_prp();
61
return NVME_INVALID_FIELD | NVME_DNR;
62
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
63
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
64
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
65
}
66
len -= trans_len;
67
if (len) {
68
- if (!prp2) {
69
+ if (unlikely(!prp2)) {
70
+ trace_nvme_err_invalid_prp2_missing();
71
goto unmap;
72
}
73
if (len > n->page_size) {
74
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
if (i == n->max_prp_ents - 1 && len > n->page_size) {
78
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
79
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
80
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
81
goto unmap;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
85
prp_ent = le64_to_cpu(prp_list[i]);
86
}
87
88
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
89
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
90
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
91
goto unmap;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
95
i++;
96
}
97
} else {
98
- if (prp2 & (n->page_size - 1)) {
99
+ if (unlikely(prp2 & (n->page_size - 1))) {
100
+ trace_nvme_err_invalid_prp2_align(prp2);
101
goto unmap;
102
}
103
if (qsg->nsg) {
104
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
105
QEMUIOVector iov;
106
uint16_t status = NVME_SUCCESS;
107
108
+ trace_nvme_dma_read(prp1, prp2);
109
+
110
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
111
return NVME_INVALID_FIELD | NVME_DNR;
112
}
113
if (qsg.nsg > 0) {
114
- if (dma_buf_read(ptr, len, &qsg)) {
115
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
116
+ trace_nvme_err_invalid_dma();
117
status = NVME_INVALID_FIELD | NVME_DNR;
118
}
119
qemu_sglist_destroy(&qsg);
120
} else {
121
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
122
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
123
+ trace_nvme_err_invalid_dma();
124
status = NVME_INVALID_FIELD | NVME_DNR;
125
}
126
qemu_iovec_destroy(&iov);
127
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
128
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
129
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
130
131
- if (slba + nlb > ns->id_ns.nsze) {
132
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
133
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
134
return NVME_LBA_RANGE | NVME_DNR;
135
}
136
137
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
138
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
139
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
140
141
- if ((slba + nlb) > ns->id_ns.nsze) {
142
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
143
+
144
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
145
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
146
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
147
return NVME_LBA_RANGE | NVME_DNR;
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
151
NvmeNamespace *ns;
152
uint32_t nsid = le32_to_cpu(cmd->nsid);
153
154
- if (nsid == 0 || nsid > n->num_namespaces) {
155
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
156
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
157
return NVME_INVALID_NSID | NVME_DNR;
158
}
159
160
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
161
case NVME_CMD_READ:
162
return nvme_rw(n, ns, cmd, req);
163
default:
164
+ trace_nvme_err_invalid_opc(cmd->opcode);
165
return NVME_INVALID_OPCODE | NVME_DNR;
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
169
NvmeCQueue *cq;
170
uint16_t qid = le16_to_cpu(c->qid);
171
172
- if (!qid || nvme_check_sqid(n, qid)) {
173
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
174
+ trace_nvme_err_invalid_del_sq(qid);
175
return NVME_INVALID_QID | NVME_DNR;
176
}
177
178
+ trace_nvme_del_sq(qid);
179
+
180
sq = n->sq[qid];
181
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
182
req = QTAILQ_FIRST(&sq->out_req_list);
183
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
184
uint16_t qflags = le16_to_cpu(c->sq_flags);
185
uint64_t prp1 = le64_to_cpu(c->prp1);
186
187
- if (!cqid || nvme_check_cqid(n, cqid)) {
188
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
189
+
190
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
191
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
192
return NVME_INVALID_CQID | NVME_DNR;
193
}
194
- if (!sqid || !nvme_check_sqid(n, sqid)) {
195
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
196
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
197
return NVME_INVALID_QID | NVME_DNR;
198
}
199
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
200
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
201
+ trace_nvme_err_invalid_create_sq_size(qsize);
202
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
203
}
204
- if (!prp1 || prp1 & (n->page_size - 1)) {
205
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
206
+ trace_nvme_err_invalid_create_sq_addr(prp1);
207
return NVME_INVALID_FIELD | NVME_DNR;
208
}
209
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
210
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
211
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
212
return NVME_INVALID_FIELD | NVME_DNR;
213
}
214
sq = g_malloc0(sizeof(*sq));
215
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
216
NvmeCQueue *cq;
217
uint16_t qid = le16_to_cpu(c->qid);
218
219
- if (!qid || nvme_check_cqid(n, qid)) {
220
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
221
+ trace_nvme_err_invalid_del_cq_cqid(qid);
222
return NVME_INVALID_CQID | NVME_DNR;
223
}
224
225
cq = n->cq[qid];
226
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
227
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
228
+ trace_nvme_err_invalid_del_cq_notempty(qid);
229
return NVME_INVALID_QUEUE_DEL;
230
}
231
+ trace_nvme_del_cq(qid);
232
nvme_free_cq(cq, n);
233
return NVME_SUCCESS;
234
}
235
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
236
uint16_t qflags = le16_to_cpu(c->cq_flags);
237
uint64_t prp1 = le64_to_cpu(c->prp1);
238
239
- if (!cqid || !nvme_check_cqid(n, cqid)) {
240
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
241
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
242
+
243
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
244
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
245
return NVME_INVALID_CQID | NVME_DNR;
246
}
247
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
248
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
249
+ trace_nvme_err_invalid_create_cq_size(qsize);
250
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
251
}
252
- if (!prp1) {
253
+ if (unlikely(!prp1)) {
254
+ trace_nvme_err_invalid_create_cq_addr(prp1);
255
return NVME_INVALID_FIELD | NVME_DNR;
256
}
257
- if (vector > n->num_queues) {
258
+ if (unlikely(vector > n->num_queues)) {
259
+ trace_nvme_err_invalid_create_cq_vector(vector);
260
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
261
}
262
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
263
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
264
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
265
return NVME_INVALID_FIELD | NVME_DNR;
266
}
267
268
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
269
uint64_t prp1 = le64_to_cpu(c->prp1);
270
uint64_t prp2 = le64_to_cpu(c->prp2);
271
272
+ trace_nvme_identify_ctrl();
273
+
274
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
275
prp1, prp2);
276
}
277
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
278
uint64_t prp1 = le64_to_cpu(c->prp1);
279
uint64_t prp2 = le64_to_cpu(c->prp2);
280
281
- if (nsid == 0 || nsid > n->num_namespaces) {
282
+ trace_nvme_identify_ns(nsid);
283
+
284
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
285
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
286
return NVME_INVALID_NSID | NVME_DNR;
287
}
288
289
ns = &n->namespaces[nsid - 1];
290
+
291
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
292
prp1, prp2);
293
}
294
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
295
uint16_t ret;
296
int i, j = 0;
297
298
+ trace_nvme_identify_nslist(min_nsid);
299
+
300
list = g_malloc0(data_len);
301
for (i = 0; i < n->num_namespaces; i++) {
302
if (i < min_nsid) {
303
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
304
case 0x02:
305
return nvme_identify_nslist(n, c);
306
default:
307
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
308
return NVME_INVALID_FIELD | NVME_DNR;
309
}
310
}
311
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
312
switch (dw10) {
313
case NVME_VOLATILE_WRITE_CACHE:
314
result = blk_enable_write_cache(n->conf.blk);
315
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
316
break;
317
case NVME_NUMBER_OF_QUEUES:
318
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
319
+ trace_nvme_getfeat_numq(result);
320
break;
321
default:
322
+ trace_nvme_err_invalid_getfeat(dw10);
323
return NVME_INVALID_FIELD | NVME_DNR;
324
}
325
326
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
327
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
328
break;
329
case NVME_NUMBER_OF_QUEUES:
330
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
331
+ ((dw11 >> 16) & 0xFFFF) + 1,
332
+ n->num_queues - 1, n->num_queues - 1);
333
req->cqe.result =
334
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
335
break;
336
default:
337
+ trace_nvme_err_invalid_setfeat(dw10);
338
return NVME_INVALID_FIELD | NVME_DNR;
339
}
340
return NVME_SUCCESS;
341
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
342
case NVME_ADM_CMD_GET_FEATURES:
343
return nvme_get_feature(n, cmd, req);
344
default:
345
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
346
return NVME_INVALID_OPCODE | NVME_DNR;
347
}
348
}
349
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
350
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
351
uint32_t page_size = 1 << page_bits;
352
353
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
354
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
355
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
356
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
357
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
358
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
359
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
360
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
361
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
362
+ if (unlikely(n->cq[0])) {
363
+ trace_nvme_err_startfail_cq();
364
+ return -1;
365
+ }
366
+ if (unlikely(n->sq[0])) {
367
+ trace_nvme_err_startfail_sq();
368
+ return -1;
369
+ }
370
+ if (unlikely(!n->bar.asq)) {
371
+ trace_nvme_err_startfail_nbarasq();
372
+ return -1;
373
+ }
374
+ if (unlikely(!n->bar.acq)) {
375
+ trace_nvme_err_startfail_nbaracq();
376
+ return -1;
377
+ }
378
+ if (unlikely(n->bar.asq & (page_size - 1))) {
379
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
380
+ return -1;
381
+ }
382
+ if (unlikely(n->bar.acq & (page_size - 1))) {
383
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
384
+ return -1;
385
+ }
386
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
387
+ NVME_CAP_MPSMIN(n->bar.cap))) {
388
+ trace_nvme_err_startfail_page_too_small(
389
+ NVME_CC_MPS(n->bar.cc),
390
+ NVME_CAP_MPSMIN(n->bar.cap));
391
+ return -1;
392
+ }
393
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
394
+ NVME_CAP_MPSMAX(n->bar.cap))) {
395
+ trace_nvme_err_startfail_page_too_large(
396
+ NVME_CC_MPS(n->bar.cc),
397
+ NVME_CAP_MPSMAX(n->bar.cap));
398
+ return -1;
399
+ }
400
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
401
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
402
+ trace_nvme_err_startfail_cqent_too_small(
403
+ NVME_CC_IOCQES(n->bar.cc),
404
+ NVME_CTRL_CQES_MIN(n->bar.cap));
405
+ return -1;
406
+ }
407
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
408
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
409
+ trace_nvme_err_startfail_cqent_too_large(
410
+ NVME_CC_IOCQES(n->bar.cc),
411
+ NVME_CTRL_CQES_MAX(n->bar.cap));
412
+ return -1;
413
+ }
414
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
415
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
416
+ trace_nvme_err_startfail_sqent_too_small(
417
+ NVME_CC_IOSQES(n->bar.cc),
418
+ NVME_CTRL_SQES_MIN(n->bar.cap));
419
+ return -1;
420
+ }
421
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
422
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
423
+ trace_nvme_err_startfail_sqent_too_large(
424
+ NVME_CC_IOSQES(n->bar.cc),
425
+ NVME_CTRL_SQES_MAX(n->bar.cap));
426
+ return -1;
427
+ }
428
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
429
+ trace_nvme_err_startfail_asqent_sz_zero();
430
+ return -1;
431
+ }
432
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
433
+ trace_nvme_err_startfail_acqent_sz_zero();
434
return -1;
435
}
436
437
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
438
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
439
unsigned size)
440
{
441
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
442
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
443
+ "MMIO write not 32-bit aligned,"
444
+ " offset=0x%"PRIx64"", offset);
445
+ /* should be ignored, fall through for now */
446
+ }
447
+
448
+ if (unlikely(size < sizeof(uint32_t))) {
449
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
450
+ "MMIO write smaller than 32-bits,"
451
+ " offset=0x%"PRIx64", size=%u",
452
+ offset, size);
453
+ /* should be ignored, fall through for now */
454
+ }
455
+
456
switch (offset) {
457
- case 0xc:
458
+ case 0xc: /* INTMS */
459
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
460
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
461
+ "undefined access to interrupt mask set"
462
+ " when MSI-X is enabled");
463
+ /* should be ignored, fall through for now */
464
+ }
465
n->bar.intms |= data & 0xffffffff;
466
n->bar.intmc = n->bar.intms;
467
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
468
+ n->bar.intmc);
469
break;
470
- case 0x10:
471
+ case 0x10: /* INTMC */
472
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
473
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
474
+ "undefined access to interrupt mask clr"
475
+ " when MSI-X is enabled");
476
+ /* should be ignored, fall through for now */
477
+ }
478
n->bar.intms &= ~(data & 0xffffffff);
479
n->bar.intmc = n->bar.intms;
480
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
481
+ n->bar.intmc);
482
break;
483
- case 0x14:
484
+ case 0x14: /* CC */
485
+ trace_nvme_mmio_cfg(data & 0xffffffff);
486
/* Windows first sends data, then sends enable bit */
487
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
488
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
489
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
490
491
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
492
n->bar.cc = data;
493
- if (nvme_start_ctrl(n)) {
494
+ if (unlikely(nvme_start_ctrl(n))) {
495
+ trace_nvme_err_startfail();
496
n->bar.csts = NVME_CSTS_FAILED;
497
} else {
498
+ trace_nvme_mmio_start_success();
499
n->bar.csts = NVME_CSTS_READY;
500
}
501
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
502
+ trace_nvme_mmio_stopped();
503
nvme_clear_ctrl(n);
504
n->bar.csts &= ~NVME_CSTS_READY;
505
}
506
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
507
- nvme_clear_ctrl(n);
508
- n->bar.cc = data;
509
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
510
+ trace_nvme_mmio_shutdown_set();
511
+ nvme_clear_ctrl(n);
512
+ n->bar.cc = data;
513
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
514
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
515
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
516
- n->bar.cc = data;
517
+ trace_nvme_mmio_shutdown_cleared();
518
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
519
+ n->bar.cc = data;
520
+ }
521
+ break;
522
+ case 0x1C: /* CSTS */
523
+ if (data & (1 << 4)) {
524
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
525
+ "attempted to W1C CSTS.NSSRO"
526
+ " but CAP.NSSRS is zero (not supported)");
527
+ } else if (data != 0) {
528
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
529
+ "attempted to set a read only bit"
530
+ " of controller status");
531
+ }
532
+ break;
533
+ case 0x20: /* NSSR */
534
+ if (data == 0x4E564D65) {
535
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
536
+ } else {
537
+ /* The spec says that writes of other values have no effect */
538
+ return;
539
}
540
break;
541
- case 0x24:
542
+ case 0x24: /* AQA */
543
n->bar.aqa = data & 0xffffffff;
544
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
545
break;
546
- case 0x28:
547
+ case 0x28: /* ASQ */
548
n->bar.asq = data;
549
+ trace_nvme_mmio_asqaddr(data);
550
break;
551
- case 0x2c:
552
+ case 0x2c: /* ASQ hi */
553
n->bar.asq |= data << 32;
554
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
555
break;
556
- case 0x30:
557
+ case 0x30: /* ACQ */
558
+ trace_nvme_mmio_acqaddr(data);
559
n->bar.acq = data;
560
break;
561
- case 0x34:
562
+ case 0x34: /* ACQ hi */
563
n->bar.acq |= data << 32;
564
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
565
break;
566
+ case 0x38: /* CMBLOC */
567
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
568
+ "invalid write to reserved CMBLOC"
569
+ " when CMBSZ is zero, ignored");
570
+ return;
571
+ case 0x3C: /* CMBSZ */
572
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
573
+ "invalid write to read only CMBSZ, ignored");
574
+ return;
575
default:
576
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
577
+ "invalid MMIO write,"
578
+ " offset=0x%"PRIx64", data=%"PRIx64"",
579
+ offset, data);
580
break;
581
}
582
}
583
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
584
uint8_t *ptr = (uint8_t *)&n->bar;
585
uint64_t val = 0;
586
587
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
588
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
589
+ "MMIO read not 32-bit aligned,"
590
+ " offset=0x%"PRIx64"", addr);
591
+ /* should RAZ, fall through for now */
592
+ } else if (unlikely(size < sizeof(uint32_t))) {
593
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
594
+ "MMIO read smaller than 32-bits,"
595
+ " offset=0x%"PRIx64"", addr);
596
+ /* should RAZ, fall through for now */
597
+ }
598
+
599
if (addr < sizeof(n->bar)) {
600
memcpy(&val, ptr + addr, size);
601
+ } else {
602
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
603
+ "MMIO read beyond last register,"
604
+ " offset=0x%"PRIx64", returning 0", addr);
605
}
606
+
607
return val;
608
}
609
610
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
611
{
612
uint32_t qid;
613
614
- if (addr & ((1 << 2) - 1)) {
615
+ if (unlikely(addr & ((1 << 2) - 1))) {
616
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
617
+ "doorbell write not 32-bit aligned,"
618
+ " offset=0x%"PRIx64", ignoring", addr);
619
return;
620
}
621
622
if (((addr - 0x1000) >> 2) & 1) {
623
+ /* Completion queue doorbell write */
624
+
625
uint16_t new_head = val & 0xffff;
626
int start_sqs;
627
NvmeCQueue *cq;
628
629
qid = (addr - (0x1000 + (1 << 2))) >> 3;
630
- if (nvme_check_cqid(n, qid)) {
631
+ if (unlikely(nvme_check_cqid(n, qid))) {
632
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
633
+ "completion queue doorbell write"
634
+ " for nonexistent queue,"
635
+ " sqid=%"PRIu32", ignoring", qid);
636
return;
637
}
638
639
cq = n->cq[qid];
640
- if (new_head >= cq->size) {
641
+ if (unlikely(new_head >= cq->size)) {
642
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
643
+ "completion queue doorbell write value"
644
+ " beyond queue size, sqid=%"PRIu32","
645
+ " new_head=%"PRIu16", ignoring",
646
+ qid, new_head);
647
return;
648
}
649
650
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
651
nvme_isr_notify(n, cq);
652
}
653
} else {
654
+ /* Submission queue doorbell write */
655
+
656
uint16_t new_tail = val & 0xffff;
657
NvmeSQueue *sq;
658
659
qid = (addr - 0x1000) >> 3;
660
- if (nvme_check_sqid(n, qid)) {
661
+ if (unlikely(nvme_check_sqid(n, qid))) {
662
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
663
+ "submission queue doorbell write"
664
+ " for nonexistent queue,"
665
+ " sqid=%"PRIu32", ignoring", qid);
666
return;
667
}
668
669
sq = n->sq[qid];
670
- if (new_tail >= sq->size) {
671
+ if (unlikely(new_tail >= sq->size)) {
672
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
673
+ "submission queue doorbell write value"
674
+ " beyond queue size, sqid=%"PRIu32","
675
+ " new_tail=%"PRIu16", ignoring",
676
+ qid, new_tail);
677
return;
678
}
679
680
diff --git a/hw/block/trace-events b/hw/block/trace-events
681
index XXXXXXX..XXXXXXX 100644
682
--- a/hw/block/trace-events
683
+++ b/hw/block/trace-events
684
@@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
685
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
686
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
687
688
+# hw/block/nvme.c
689
+# nvme traces for successful events
690
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
691
+nvme_irq_pin(void) "pulsing IRQ pin"
692
+nvme_irq_masked(void) "IRQ is masked"
693
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
694
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
695
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
696
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
697
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
698
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
699
+nvme_identify_ctrl(void) "identify controller"
700
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
701
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
702
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
703
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
704
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
705
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
706
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
707
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
708
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
709
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
710
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
711
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
712
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
713
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
714
+nvme_mmio_stopped(void) "cleared controller enable bit"
715
+nvme_mmio_shutdown_set(void) "shutdown bit set"
716
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
717
+
718
+# nvme traces for error conditions
719
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
720
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
721
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
722
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
723
+nvme_err_invalid_field(void) "invalid field"
724
+nvme_err_invalid_prp(void) "invalid PRP"
725
+nvme_err_invalid_sgl(void) "invalid SGL"
726
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
727
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
728
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
729
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
730
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
731
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
732
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
733
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
734
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
735
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
736
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
737
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
738
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
739
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
740
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
741
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
742
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
743
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
744
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
745
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
746
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
747
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
748
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
749
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
750
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
751
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
752
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
753
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
754
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
755
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
756
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
757
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
758
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
759
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
760
+nvme_err_startfail(void) "setting controller enable bit failed"
761
+
762
+# Traces for undefined behavior
763
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
764
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
765
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
766
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
767
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
768
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
769
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
770
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
771
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
772
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
773
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
774
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
775
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
776
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
777
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
778
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
779
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
780
+
781
# hw/block/xen_disk.c
782
xen_disk_alloc(char *name) "%s"
783
xen_disk_init(char *name) "%s"
784
--
785
2.13.6
786
787
diff view generated by jsdifflib
New patch
1
From: Fam Zheng <famz@redhat.com>
1
2
3
Management tools create overlays of running guests with qemu-img:
4
5
$ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2
6
7
but this doesn't work anymore due to image locking:
8
9
qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock
10
Is another process using the image?
11
Could not open backing image to determine size.
12
Use the force share option to allow this use case again.
13
14
Cc: qemu-stable@nongnu.org
15
Signed-off-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Eric Blake <eblake@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
block.c | 3 ++-
20
1 file changed, 2 insertions(+), 1 deletion(-)
21
22
diff --git a/block.c b/block.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block.c
25
+++ b/block.c
26
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
27
back_flags = flags;
28
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
29
30
+ backing_options = qdict_new();
31
if (backing_fmt) {
32
- backing_options = qdict_new();
33
qdict_put_str(backing_options, "driver", backing_fmt);
34
}
35
+ qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
36
37
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
38
&local_err);
39
--
40
2.13.6
41
42
diff view generated by jsdifflib
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Remove drive_get_max_devs, as it is not used by anyone.
3
It's not working anymore since QEMU v1.3.0 - time to remove it now.
4
4
5
Last use was removed in commit 8f2d75e81d5
5
Signed-off-by: Thomas Huth <thuth@redhat.com>
6
("hw: Drop superfluous special checks for orphaned -drive").
6
Reviewed-by: John Snow <jsnow@redhat.com>
7
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
8
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-Id: <20211215121140.456939-4-eesposit@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
9
---
14
include/sysemu/blockdev.h | 1 -
10
blockdev.c | 11 -----------
15
blockdev.c | 17 -----------------
11
qemu-doc.texi | 6 ------
16
2 files changed, 18 deletions(-)
12
2 files changed, 17 deletions(-)
17
13
18
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/sysemu/blockdev.h
21
+++ b/include/sysemu/blockdev.h
22
@@ -XXX,XX +XXX,XX @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit);
23
void drive_check_orphaned(void);
24
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
25
int drive_get_max_bus(BlockInterfaceType type);
26
-int drive_get_max_devs(BlockInterfaceType type);
27
28
QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
29
const char *optstr);
30
diff --git a/blockdev.c b/blockdev.c
14
diff --git a/blockdev.c b/blockdev.c
31
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
32
--- a/blockdev.c
16
--- a/blockdev.c
33
+++ b/blockdev.c
17
+++ b/blockdev.c
34
@@ -XXX,XX +XXX,XX @@ void blockdev_auto_del(BlockBackend *blk)
18
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = {
19
.type = QEMU_OPT_STRING,
20
.help = "chs translation (auto, lba, none)",
21
},{
22
- .name = "boot",
23
- .type = QEMU_OPT_BOOL,
24
- .help = "(deprecated, ignored)",
25
- },{
26
.name = "addr",
27
.type = QEMU_OPT_STRING,
28
.help = "pci address (virtio only)",
29
@@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
30
goto fail;
35
}
31
}
36
}
32
37
33
- /* Deprecated option boot=[on|off] */
38
-/**
34
- if (qemu_opt_get(legacy_opts, "boot") != NULL) {
39
- * Returns the current mapping of how many units per bus
35
- fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
40
- * a particular interface can support.
36
- "ignored. Future versions will reject this parameter. Please "
41
- *
37
- "update your scripts.\n");
42
- * A positive integer indicates n units per bus.
43
- * 0 implies the mapping has not been established.
44
- * -1 indicates an invalid BlockInterfaceType was given.
45
- */
46
-int drive_get_max_devs(BlockInterfaceType type)
47
-{
48
- if (type >= IF_IDE && type < IF_COUNT) {
49
- return if_max_devs[type];
50
- }
38
- }
51
-
39
-
52
- return -1;
40
/* Other deprecated options */
53
-}
41
if (!qtest_enabled()) {
42
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
43
diff --git a/qemu-doc.texi b/qemu-doc.texi
44
index XXXXXXX..XXXXXXX 100644
45
--- a/qemu-doc.texi
46
+++ b/qemu-doc.texi
47
@@ -XXX,XX +XXX,XX @@ deprecated.
48
49
@section System emulator command line arguments
50
51
-@subsection -drive boot=on|off (since 1.3.0)
54
-
52
-
55
static int drive_index_to_bus_id(BlockInterfaceType type, int index)
53
-The ``boot=on|off'' option to the ``-drive'' argument is
56
{
54
-ignored. Applications should use the ``bootindex=N'' parameter
57
int max_devs = if_max_devs[type];
55
-to set an absolute ordering between devices instead.
56
-
57
@subsection -tdf (since 1.3.0)
58
59
The ``-tdf'' argument is ignored. The behaviour implemented
58
--
60
--
59
2.31.1
61
2.13.6
60
62
61
63
diff view generated by jsdifflib
New patch
1
1
From: Thomas Huth <thuth@redhat.com>
2
3
It's been marked as deprecated since QEMU v2.10.0, and so far nobody
4
complained that we should keep it, so let's remove this legacy option
5
now to simplify the code quite a bit.
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
vl.c | 86 ++-------------------------------------------------------
13
qemu-doc.texi | 8 ------
14
qemu-options.hx | 19 ++-----------
15
3 files changed, 4 insertions(+), 109 deletions(-)
16
17
diff --git a/vl.c b/vl.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/vl.c
20
+++ b/vl.c
21
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
22
const char *boot_order = NULL;
23
const char *boot_once = NULL;
24
DisplayState *ds;
25
- int cyls, heads, secs, translation;
26
QemuOpts *opts, *machine_opts;
27
- QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
28
+ QemuOpts *icount_opts = NULL, *accel_opts = NULL;
29
QemuOptsList *olist;
30
int optind;
31
const char *optarg;
32
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
33
34
cpu_model = NULL;
35
snapshot = 0;
36
- cyls = heads = secs = 0;
37
- translation = BIOS_ATA_TRANSLATION_AUTO;
38
39
nb_nics = 0;
40
41
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
42
if (optind >= argc)
43
break;
44
if (argv[optind][0] != '-') {
45
- hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
46
+ drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
47
} else {
48
const QEMUOption *popt;
49
50
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
51
cpu_model = optarg;
52
break;
53
case QEMU_OPTION_hda:
54
- {
55
- char buf[256];
56
- if (cyls == 0)
57
- snprintf(buf, sizeof(buf), "%s", HD_OPTS);
58
- else
59
- snprintf(buf, sizeof(buf),
60
- "%s,cyls=%d,heads=%d,secs=%d%s",
61
- HD_OPTS , cyls, heads, secs,
62
- translation == BIOS_ATA_TRANSLATION_LBA ?
63
- ",trans=lba" :
64
- translation == BIOS_ATA_TRANSLATION_NONE ?
65
- ",trans=none" : "");
66
- drive_add(IF_DEFAULT, 0, optarg, buf);
67
- break;
68
- }
69
case QEMU_OPTION_hdb:
70
case QEMU_OPTION_hdc:
71
case QEMU_OPTION_hdd:
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
73
case QEMU_OPTION_snapshot:
74
snapshot = 1;
75
break;
76
- case QEMU_OPTION_hdachs:
77
- {
78
- const char *p;
79
- p = optarg;
80
- cyls = strtol(p, (char **)&p, 0);
81
- if (cyls < 1 || cyls > 16383)
82
- goto chs_fail;
83
- if (*p != ',')
84
- goto chs_fail;
85
- p++;
86
- heads = strtol(p, (char **)&p, 0);
87
- if (heads < 1 || heads > 16)
88
- goto chs_fail;
89
- if (*p != ',')
90
- goto chs_fail;
91
- p++;
92
- secs = strtol(p, (char **)&p, 0);
93
- if (secs < 1 || secs > 63)
94
- goto chs_fail;
95
- if (*p == ',') {
96
- p++;
97
- if (!strcmp(p, "large")) {
98
- translation = BIOS_ATA_TRANSLATION_LARGE;
99
- } else if (!strcmp(p, "rechs")) {
100
- translation = BIOS_ATA_TRANSLATION_RECHS;
101
- } else if (!strcmp(p, "none")) {
102
- translation = BIOS_ATA_TRANSLATION_NONE;
103
- } else if (!strcmp(p, "lba")) {
104
- translation = BIOS_ATA_TRANSLATION_LBA;
105
- } else if (!strcmp(p, "auto")) {
106
- translation = BIOS_ATA_TRANSLATION_AUTO;
107
- } else {
108
- goto chs_fail;
109
- }
110
- } else if (*p != '\0') {
111
- chs_fail:
112
- error_report("invalid physical CHS format");
113
- exit(1);
114
- }
115
- if (hda_opts != NULL) {
116
- qemu_opt_set_number(hda_opts, "cyls", cyls,
117
- &error_abort);
118
- qemu_opt_set_number(hda_opts, "heads", heads,
119
- &error_abort);
120
- qemu_opt_set_number(hda_opts, "secs", secs,
121
- &error_abort);
122
- if (translation == BIOS_ATA_TRANSLATION_LARGE) {
123
- qemu_opt_set(hda_opts, "trans", "large",
124
- &error_abort);
125
- } else if (translation == BIOS_ATA_TRANSLATION_RECHS) {
126
- qemu_opt_set(hda_opts, "trans", "rechs",
127
- &error_abort);
128
- } else if (translation == BIOS_ATA_TRANSLATION_LBA) {
129
- qemu_opt_set(hda_opts, "trans", "lba",
130
- &error_abort);
131
- } else if (translation == BIOS_ATA_TRANSLATION_NONE) {
132
- qemu_opt_set(hda_opts, "trans", "none",
133
- &error_abort);
134
- }
135
- }
136
- }
137
- error_report("'-hdachs' is deprecated, please use '-device"
138
- " ide-hd,cyls=c,heads=h,secs=s,...' instead");
139
- break;
140
case QEMU_OPTION_numa:
141
opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
142
optarg, true);
143
diff --git a/qemu-doc.texi b/qemu-doc.texi
144
index XXXXXXX..XXXXXXX 100644
145
--- a/qemu-doc.texi
146
+++ b/qemu-doc.texi
147
@@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the
148
``-object filter-dump'' argument which works in combination
149
with the modern ``-netdev`` backends instead.
150
151
-@subsection -hdachs (since 2.10.0)
152
-
153
-The ``-hdachs'' argument is now a synonym for setting
154
-the ``cyls'', ``heads'', ``secs'', and ``trans'' properties
155
-on the ``ide-hd'' device using the ``-device'' argument.
156
-The new syntax allows different settings to be provided
157
-per disk.
158
-
159
@subsection -usbdevice (since 2.10.0)
160
161
The ``-usbdevice DEV'' argument is now a synonym for setting
162
diff --git a/qemu-options.hx b/qemu-options.hx
163
index XXXXXXX..XXXXXXX 100644
164
--- a/qemu-options.hx
165
+++ b/qemu-options.hx
166
@@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type.
167
@item media=@var{media}
168
This option defines the type of the media: disk or cdrom.
169
@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
170
-These options have the same definition as they have in @option{-hdachs}.
171
-These parameters are deprecated, use the corresponding parameters
172
+Force disk physical geometry and the optional BIOS translation (trans=none or
173
+lba). These parameters are deprecated, use the corresponding parameters
174
of @code{-device} instead.
175
@item snapshot=@var{snapshot}
176
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
177
@@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force
178
the write back by pressing @key{C-a s} (@pxref{disk_images}).
179
ETEXI
180
181
-DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \
182
- "-hdachs c,h,s[,t]\n" \
183
- " force hard disk 0 physical geometry and the optional BIOS\n" \
184
- " translation (t=none or lba) (usually QEMU can guess them)\n",
185
- QEMU_ARCH_ALL)
186
-STEXI
187
-@item -hdachs @var{c},@var{h},@var{s},[,@var{t}]
188
-@findex -hdachs
189
-Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <=
190
-@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS
191
-translation mode (@var{t}=none, lba or auto). Usually QEMU can guess
192
-all those parameters. This option is deprecated, please use
193
-@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead.
194
-ETEXI
195
-
196
DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
197
"-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
198
" [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n"
199
--
200
2.13.6
201
202
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
Looks like we forgot to announce the deprecation of these options in
4
the corresponding chapter of the qemu-doc text, so let's do that now.
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Reviewed-by: John Snow <jsnow@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
qemu-doc.texi | 15 +++++++++++++++
12
1 file changed, 15 insertions(+)
13
14
diff --git a/qemu-doc.texi b/qemu-doc.texi
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-doc.texi
17
+++ b/qemu-doc.texi
18
@@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU.
19
The ``-drive if=scsi'' argument is replaced by the the
20
``-device BUS-TYPE'' argument combined with ``-drive if=none''.
21
22
+@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
23
+
24
+The drive geometry arguments are replaced by the the geometry arguments
25
+that can be specified with the ``-device'' parameter.
26
+
27
+@subsection -drive serial=... (since 2.10.0)
28
+
29
+The drive serial argument is replaced by the the serial argument
30
+that can be specified with the ``-device'' parameter.
31
+
32
+@subsection -drive addr=... (since 2.10.0)
33
+
34
+The drive addr argument is replaced by the the addr argument
35
+that can be specified with the ``-device'' parameter.
36
+
37
@subsection -net dump (since 2.10.0)
38
39
The ``--net dump'' argument is now replaced with the
40
--
41
2.13.6
42
43
diff view generated by jsdifflib
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
1
From: Fam Zheng <famz@redhat.com>
2
2
3
bdrv_backing_overridden is only used in block.c, so there is
3
Signed-off-by: Fam Zheng <famz@redhat.com>
4
no need to leave it in block_int.h
5
6
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20211215121140.456939-2-eesposit@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
5
---
11
include/block/block_int.h | 3 ---
6
include/block/block_int.h | 1 -
12
block.c | 4 +++-
7
block/io.c | 18 ------------------
13
2 files changed, 3 insertions(+), 4 deletions(-)
8
2 files changed, 19 deletions(-)
14
9
15
diff --git a/include/block/block_int.h b/include/block/block_int.h
10
diff --git a/include/block/block_int.h b/include/block/block_int.h
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/block_int.h
12
--- a/include/block/block_int.h
18
+++ b/include/block/block_int.h
13
+++ b/include/block/block_int.h
19
@@ -XXX,XX +XXX,XX @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
14
@@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk);
20
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
15
bool blk_dev_is_medium_locked(BlockBackend *blk);
21
QDict *options);
16
22
17
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
23
-bool bdrv_backing_overridden(BlockDriverState *bs);
18
-bool bdrv_requests_pending(BlockDriverState *bs);
19
20
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
21
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
22
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/io.c
25
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
27
assert(old >= 1);
28
}
29
30
-/* Check if any requests are in-flight (including throttled requests) */
31
-bool bdrv_requests_pending(BlockDriverState *bs)
32
-{
33
- BdrvChild *child;
24
-
34
-
35
- if (atomic_read(&bs->in_flight)) {
36
- return true;
37
- }
25
-
38
-
26
/**
39
- QLIST_FOREACH(child, &bs->children, next) {
27
* bdrv_add_aio_context_notifier:
40
- if (bdrv_requests_pending(child->bs)) {
28
*
41
- return true;
29
diff --git a/block.c b/block.c
42
- }
30
index XXXXXXX..XXXXXXX 100644
43
- }
31
--- a/block.c
44
-
32
+++ b/block.c
45
- return false;
33
@@ -XXX,XX +XXX,XX @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
46
-}
34
static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
47
-
35
static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
48
typedef struct {
36
49
Coroutine *co;
37
+static bool bdrv_backing_overridden(BlockDriverState *bs);
50
BlockDriverState *bs;
38
+
39
/* If non-zero, use only whitelisted block drivers */
40
static int use_bdrv_whitelist;
41
42
@@ -XXX,XX +XXX,XX @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
43
/* Note: This function may return false positives; it may return true
44
* even if opening the backing file specified by bs's image header
45
* would result in exactly bs->backing. */
46
-bool bdrv_backing_overridden(BlockDriverState *bs)
47
+static bool bdrv_backing_overridden(BlockDriverState *bs)
48
{
49
if (bs->backing) {
50
return strcmp(bs->auto_backing_file,
51
--
51
--
52
2.31.1
52
2.13.6
53
53
54
54
diff view generated by jsdifflib
New patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Fam Zheng <famz@redhat.com>
3
---
4
block/io.c | 6 ++++++
5
1 file changed, 6 insertions(+)
1
6
7
diff --git a/block/io.c b/block/io.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/block/io.c
10
+++ b/block/io.c
11
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
12
BdrvNextIterator it;
13
GSList *aio_ctxs = NULL, *ctx;
14
15
+ /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
16
+ * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
17
+ * nodes in several different AioContexts, so make sure we're in the main
18
+ * context. */
19
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
20
+
21
block_job_pause_all();
22
23
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
24
--
25
2.13.6
26
27
diff view generated by jsdifflib
1
The calculation in sector2cluster() is done relative to the offset of
1
bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively
2
the root directory. Any writes to blocks before the start of the root
2
and also doesn't notify other parent nodes of children, which both means
3
directory (in particular, writes to the FAT) result in negative values,
3
that the child nodes are not actually drained, and bdrv_drained_begin()
4
which are not handled correctly in vvfat_write().
4
is providing useful functionality only on a single node.
5
5
6
This changes sector2cluster() to return a signed value, and makes sure
6
To keep things consistent, we also shouldn't call the block driver
7
that vvfat_write() doesn't try to find mappings for negative cluster
7
callbacks recursively.
8
number. It clarifies the code in vvfat_write() to make it more obvious
8
9
that the cluster numbers can be negative.
9
A proper recursive drain version that provides an actually working
10
drained section for child nodes will be introduced later.
10
11
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20211209152231.23756-1-kwolf@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
14
---
15
block/vvfat.c | 30 ++++++++++++++++++++++--------
15
block/io.c | 16 +++++++++-------
16
1 file changed, 22 insertions(+), 8 deletions(-)
16
1 file changed, 9 insertions(+), 7 deletions(-)
17
17
18
diff --git a/block/vvfat.c b/block/vvfat.c
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/vvfat.c
20
--- a/block/io.c
21
+++ b/block/vvfat.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
22
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
23
return 0;
24
}
23
}
25
24
26
-static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
25
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
27
+static inline int32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
26
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
27
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
28
{
28
{
29
return (sector_num - s->offset_to_root_dir) / s->sectors_per_cluster;
29
BdrvChild *child, *tmp;
30
}
30
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
31
@@ -XXX,XX +XXX,XX @@ static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
31
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
32
{
32
bdrv_coroutine_enter(bs, data.co);
33
BDRVVVFATState *s = bs->opaque;
33
BDRV_POLL_WHILE(bs, !data.done);
34
int i, ret;
34
35
+ int first_cluster, last_cluster;
35
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
36
36
- bdrv_drain_invoke(child->bs, begin);
37
DLOG(checkpoint());
37
+ if (recursive) {
38
38
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
39
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
39
+ bdrv_drain_invoke(child->bs, begin, true);
40
if (sector_num < s->offset_to_fat)
41
return -1;
42
43
- for (i = sector2cluster(s, sector_num);
44
- i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
45
- mapping_t* mapping = find_mapping_for_cluster(s, i);
46
+ /*
47
+ * Values will be negative for writes to the FAT, which is located before
48
+ * the root directory.
49
+ */
50
+ first_cluster = sector2cluster(s, sector_num);
51
+ last_cluster = sector2cluster(s, sector_num + nb_sectors - 1);
52
+
53
+ for (i = first_cluster; i <= last_cluster;) {
54
+ mapping_t *mapping = NULL;
55
+
56
+ if (i >= 0) {
57
+ mapping = find_mapping_for_cluster(s, i);
58
+ }
59
+
60
if (mapping) {
61
if (mapping->read_only) {
62
fprintf(stderr, "Tried to write to write-protected file %s\n",
63
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
64
}
65
}
66
i = mapping->end;
67
- } else
68
+ } else {
69
i++;
70
+ }
40
+ }
71
}
41
}
72
42
}
73
/*
43
74
@@ -XXX,XX +XXX,XX @@ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sec
44
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
75
return ret;
45
bdrv_parent_drained_begin(bs);
76
}
46
}
77
47
78
- for (i = sector2cluster(s, sector_num);
48
- bdrv_drain_invoke(bs, true);
79
- i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
49
+ bdrv_drain_invoke(bs, true, false);
80
- if (i >= 0)
50
bdrv_drain_recurse(bs);
81
+ for (i = first_cluster; i <= last_cluster; i++) {
51
}
82
+ if (i >= 0) {
52
83
s->used_clusters[i] |= USED_ALLOCATED;
53
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
84
+ }
54
}
85
+ }
55
86
56
/* Re-enable things in child-to-parent order */
87
DLOG(checkpoint());
57
- bdrv_drain_invoke(bs, false);
88
/* TODO: add timeout */
58
+ bdrv_drain_invoke(bs, false, false);
59
bdrv_parent_drained_end(bs);
60
aio_enable_external(bdrv_get_aio_context(bs));
61
}
62
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
63
aio_context_acquire(aio_context);
64
aio_disable_external(aio_context);
65
bdrv_parent_drained_begin(bs);
66
- bdrv_drain_invoke(bs, true);
67
+ bdrv_drain_invoke(bs, true, true);
68
aio_context_release(aio_context);
69
70
if (!g_slist_find(aio_ctxs, aio_context)) {
71
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
72
73
/* Re-enable things in child-to-parent order */
74
aio_context_acquire(aio_context);
75
- bdrv_drain_invoke(bs, false);
76
+ bdrv_drain_invoke(bs, false, true);
77
bdrv_parent_drained_end(bs);
78
aio_enable_external(aio_context);
79
aio_context_release(aio_context);
89
--
80
--
90
2.31.1
81
2.13.6
91
82
92
83
diff view generated by jsdifflib
New patch
1
The existing test is for bdrv_drain_all_begin/end() only. Generalise the
2
test case so that it can be run for the other variants as well. At the
3
moment this is only bdrv_drain_begin/end(), but in a while, we'll add
4
another one.
1
5
6
Also, add a backing file to the test node to test whether the operations
7
work recursively.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-----
12
1 file changed, 62 insertions(+), 7 deletions(-)
13
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/test-bdrv-drain.c
17
+++ b/tests/test-bdrv-drain.c
18
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
19
20
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
21
.bdrv_co_drain_end = bdrv_test_co_drain_end,
22
+
23
+ .bdrv_child_perm = bdrv_format_default_perms,
24
};
25
26
static void aio_ret_cb(void *opaque, int ret)
27
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
28
*aio_ret = ret;
29
}
30
31
-static void test_drv_cb_drain_all(void)
32
+enum drain_type {
33
+ BDRV_DRAIN_ALL,
34
+ BDRV_DRAIN,
35
+};
36
+
37
+static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
38
+{
39
+ switch (drain_type) {
40
+ case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
41
+ case BDRV_DRAIN: bdrv_drained_begin(bs); break;
42
+ default: g_assert_not_reached();
43
+ }
44
+}
45
+
46
+static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
47
+{
48
+ switch (drain_type) {
49
+ case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
50
+ case BDRV_DRAIN: bdrv_drained_end(bs); break;
51
+ default: g_assert_not_reached();
52
+ }
53
+}
54
+
55
+static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
56
{
57
BlockBackend *blk;
58
- BlockDriverState *bs;
59
- BDRVTestState *s;
60
+ BlockDriverState *bs, *backing;
61
+ BDRVTestState *s, *backing_s;
62
BlockAIOCB *acb;
63
int aio_ret;
64
65
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
66
s = bs->opaque;
67
blk_insert_bs(blk, bs, &error_abort);
68
69
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
70
+ backing_s = backing->opaque;
71
+ bdrv_set_backing_hd(bs, backing, &error_abort);
72
+
73
/* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
74
g_assert_cmpint(s->drain_count, ==, 0);
75
- bdrv_drain_all_begin();
76
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
77
+
78
+ do_drain_begin(drain_type, bs);
79
+
80
g_assert_cmpint(s->drain_count, ==, 1);
81
- bdrv_drain_all_end();
82
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
83
+
84
+ do_drain_end(drain_type, bs);
85
+
86
g_assert_cmpint(s->drain_count, ==, 0);
87
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
88
89
/* Now do the same while a request is pending */
90
aio_ret = -EINPROGRESS;
91
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
92
g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
93
94
g_assert_cmpint(s->drain_count, ==, 0);
95
- bdrv_drain_all_begin();
96
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
97
+
98
+ do_drain_begin(drain_type, bs);
99
+
100
g_assert_cmpint(aio_ret, ==, 0);
101
g_assert_cmpint(s->drain_count, ==, 1);
102
- bdrv_drain_all_end();
103
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
104
+
105
+ do_drain_end(drain_type, bs);
106
+
107
g_assert_cmpint(s->drain_count, ==, 0);
108
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
109
110
+ bdrv_unref(backing);
111
bdrv_unref(bs);
112
blk_unref(blk);
113
}
114
115
+static void test_drv_cb_drain_all(void)
116
+{
117
+ test_drv_cb_common(BDRV_DRAIN_ALL, true);
118
+}
119
+
120
+static void test_drv_cb_drain(void)
121
+{
122
+ test_drv_cb_common(BDRV_DRAIN, false);
123
+}
124
+
125
int main(int argc, char **argv)
126
{
127
bdrv_init();
128
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
129
g_test_init(&argc, &argv, NULL);
130
131
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
132
+ g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
133
134
return g_test_run();
135
}
136
--
137
2.13.6
138
139
diff view generated by jsdifflib
New patch
1
This is currently only working correctly for bdrv_drain(), not for
2
bdrv_drain_all(). Leave a comment for the drain_all case, we'll address
3
it later.
1
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 45 insertions(+)
9
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tests/test-bdrv-drain.c
13
+++ b/tests/test-bdrv-drain.c
14
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
15
test_drv_cb_common(BDRV_DRAIN, false);
16
}
17
18
+static void test_quiesce_common(enum drain_type drain_type, bool recursive)
19
+{
20
+ BlockBackend *blk;
21
+ BlockDriverState *bs, *backing;
22
+
23
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
24
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
25
+ &error_abort);
26
+ blk_insert_bs(blk, bs, &error_abort);
27
+
28
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
29
+ bdrv_set_backing_hd(bs, backing, &error_abort);
30
+
31
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
32
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
33
+
34
+ do_drain_begin(drain_type, bs);
35
+
36
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
37
+ g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
38
+
39
+ do_drain_end(drain_type, bs);
40
+
41
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
43
+
44
+ bdrv_unref(backing);
45
+ bdrv_unref(bs);
46
+ blk_unref(blk);
47
+}
48
+
49
+static void test_quiesce_drain_all(void)
50
+{
51
+ // XXX drain_all doesn't quiesce
52
+ //test_quiesce_common(BDRV_DRAIN_ALL, true);
53
+}
54
+
55
+static void test_quiesce_drain(void)
56
+{
57
+ test_quiesce_common(BDRV_DRAIN, false);
58
+}
59
+
60
int main(int argc, char **argv)
61
{
62
bdrv_init();
63
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
64
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
65
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
66
67
+ g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
68
+ g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
69
+
70
return g_test_run();
71
}
72
--
73
2.13.6
74
75
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
Block jobs already paused themselves when their main BlockBackend
2
entered a drained section. This is not good enough: We also want to
3
pause a block job and may not submit new requests if, for example, the
4
mirror target node should be drained.
2
5
3
A lot of Optional[] types doesn't make code beautiful.
6
This implements .drained_begin/end callbacks in child_job in order to
4
test_field_width defaults to 8, but that is never used in the code.
7
consider all block nodes related to the job, and removes the
8
BlockBackend callbacks which are unnecessary now because the root of the
9
job main BlockBackend is always referenced with a child_job, too.
5
10
6
More over, if we want some default behavior for single call of
7
test_run(), it should just print the whole test name, not limiting or
8
expanding its width, so 8 is bad default.
9
10
So, just drop the default as unused for now.
11
12
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Message-Id: <20211210201450.101576-1-vsementsov@virtuozzo.com>
14
Reviewed-by: John Snow <jsnow@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
12
---
17
tests/qemu-iotests/testrunner.py | 21 ++++++++++-----------
13
blockjob.c | 22 +++++++++-------------
18
1 file changed, 10 insertions(+), 11 deletions(-)
14
1 file changed, 9 insertions(+), 13 deletions(-)
19
15
20
diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py
16
diff --git a/blockjob.c b/blockjob.c
21
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
22
--- a/tests/qemu-iotests/testrunner.py
18
--- a/blockjob.c
23
+++ b/tests/qemu-iotests/testrunner.py
19
+++ b/blockjob.c
24
@@ -XXX,XX +XXX,XX @@ def __enter__(self) -> 'TestRunner':
20
@@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c)
25
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
21
job->id);
26
self._stack.close()
22
}
27
23
28
- def test_print_one_line(self, test: str, starttime: str,
24
-static const BdrvChildRole child_job = {
29
+ def test_print_one_line(self, test: str,
25
- .get_parent_desc = child_job_get_parent_desc,
30
+ test_field_width: int,
26
- .stay_at_node = true,
31
+ starttime: str,
27
-};
32
endtime: Optional[str] = None, status: str = '...',
33
lasttime: Optional[float] = None,
34
thistime: Optional[float] = None,
35
description: str = '',
36
- test_field_width: Optional[int] = None,
37
end: str = '\n') -> None:
38
""" Print short test info before/after test run """
39
test = os.path.basename(test)
40
41
- if test_field_width is None:
42
- test_field_width = 8
43
-
28
-
44
if self.makecheck and status != '...':
29
-static void block_job_drained_begin(void *opaque)
45
if status and status != 'pass':
30
+static void child_job_drained_begin(BdrvChild *c)
46
status = f' [{status}]'
31
{
47
@@ -XXX,XX +XXX,XX @@ def do_run_test(self, test: str, mp: bool) -> TestResult:
32
- BlockJob *job = opaque;
48
casenotrun=casenotrun)
33
+ BlockJob *job = c->opaque;
49
34
block_job_pause(job);
50
def run_test(self, test: str,
35
}
51
- test_field_width: Optional[int] = None,
36
52
+ test_field_width: int,
37
-static void block_job_drained_end(void *opaque)
53
mp: bool = False) -> TestResult:
38
+static void child_job_drained_end(BdrvChild *c)
54
"""
39
{
55
Run one test and print short status
40
- BlockJob *job = opaque;
56
@@ -XXX,XX +XXX,XX @@ def run_test(self, test: str,
41
+ BlockJob *job = c->opaque;
57
42
block_job_resume(job);
58
if not self.makecheck:
43
}
59
self.test_print_one_line(test=test,
44
60
+ test_field_width=test_field_width,
45
-static const BlockDevOps block_job_dev_ops = {
61
status = 'started' if mp else '...',
46
- .drained_begin = block_job_drained_begin,
62
starttime=start,
47
- .drained_end = block_job_drained_end,
63
lasttime=last_el,
48
+static const BdrvChildRole child_job = {
64
- end = '\n' if mp else '\r',
49
+ .get_parent_desc = child_job_get_parent_desc,
65
- test_field_width=test_field_width)
50
+ .drained_begin = child_job_drained_begin,
66
+ end = '\n' if mp else '\r')
51
+ .drained_end = child_job_drained_end,
67
52
+ .stay_at_node = true,
68
res = self.do_run_test(test, mp)
53
};
69
54
70
end = datetime.datetime.now().strftime('%H:%M:%S')
55
void block_job_remove_all_bdrv(BlockJob *job)
71
- self.test_print_one_line(test=test, status=res.status,
56
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
72
+ self.test_print_one_line(test=test,
57
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
73
+ test_field_width=test_field_width,
58
bs->job = job;
74
+ status=res.status,
59
75
starttime=start, endtime=end,
60
- blk_set_dev_ops(blk, &block_job_dev_ops, job);
76
lasttime=last_el, thistime=res.elapsed,
61
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
77
- description=res.description,
62
78
- test_field_width=test_field_width)
63
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
79
+ description=res.description)
80
81
if res.casenotrun:
82
print(res.casenotrun)
83
--
64
--
84
2.31.1
65
2.13.6
85
66
86
67
diff view generated by jsdifflib
1
From: Hanna Reitz <hreitz@redhat.com>
1
Block jobs must be paused if any of the involved nodes are drained.
2
2
3
With CAP_DAC_OVERRIDE (which e.g. root generally has), permission checks
4
will be bypassed when opening files.
5
6
308 in one instance tries to open a read-only file (FUSE export) with
7
qemu-io as read/write, and expects this to fail. However, when running
8
it as root, opening will succeed (thanks to CAP_DAC_OVERRIDE) and only
9
the actual write operation will fail.
10
11
Note this as "Case not run", but have the test pass in either case.
12
13
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
14
Fixes: 2c7dd057aa7bd7a875e9b1a53975c220d6380bc4
15
("export/fuse: Pass default_permissions for mount")
16
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
17
Message-Id: <20220103120014.13061-1-hreitz@redhat.com>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
---
4
---
20
tests/qemu-iotests/308 | 25 +++++++++++++++++++++++--
5
tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
21
tests/qemu-iotests/308.out | 2 +-
6
1 file changed, 121 insertions(+)
22
2 files changed, 24 insertions(+), 3 deletions(-)
23
7
24
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
25
index XXXXXXX..XXXXXXX 100755
9
index XXXXXXX..XXXXXXX 100644
26
--- a/tests/qemu-iotests/308
10
--- a/tests/test-bdrv-drain.c
27
+++ b/tests/qemu-iotests/308
11
+++ b/tests/test-bdrv-drain.c
28
@@ -XXX,XX +XXX,XX @@ echo '=== Writable export ==='
12
@@ -XXX,XX +XXX,XX @@
29
fuse_export_add 'export-mp' "'mountpoint': '$EXT_MP', 'writable': true"
13
30
14
#include "qemu/osdep.h"
31
# Check that writing to the read-only export fails
15
#include "block/block.h"
32
-$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" 2>&1 \
16
+#include "block/blockjob_int.h"
33
- | _filter_qemu_io | _filter_testdir | _filter_imgfmt
17
#include "sysemu/block-backend.h"
34
+output=$($QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" 2>&1 \
18
#include "qapi/error.h"
35
+ | _filter_qemu_io | _filter_testdir | _filter_imgfmt)
19
20
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
21
test_quiesce_common(BDRV_DRAIN, false);
22
}
23
36
+
24
+
37
+# Expected reference output: Opening the file fails because it has no
25
+typedef struct TestBlockJob {
38
+# write permission
26
+ BlockJob common;
39
+reference="Could not open 'TEST_DIR/t.IMGFMT': Permission denied"
27
+ bool should_complete;
28
+} TestBlockJob;
40
+
29
+
41
+if echo "$output" | grep -q "$reference"; then
30
+static void test_job_completed(BlockJob *job, void *opaque)
42
+ echo "Writing to read-only export failed: OK"
31
+{
43
+elif echo "$output" | grep -q "write failed: Permission denied"; then
32
+ block_job_completed(job, 0);
44
+ # With CAP_DAC_OVERRIDE (e.g. when running this test as root), the export
33
+}
45
+ # can be opened regardless of its file permissions, but writing will then
46
+ # fail. This is not the result for which we want to test, so count this as
47
+ # a SKIP.
48
+ _casenotrun "Opening RO export as R/W succeeded, perhaps because of" \
49
+ "CAP_DAC_OVERRIDE"
50
+
34
+
51
+ # Still, write this to the reference output to make the test pass
35
+static void coroutine_fn test_job_start(void *opaque)
52
+ echo "Writing to read-only export failed: OK"
36
+{
53
+else
37
+ TestBlockJob *s = opaque;
54
+ echo "Writing to read-only export failed: ERROR"
38
+
55
+ echo "$output"
39
+ while (!s->should_complete) {
56
+fi
40
+ block_job_sleep_ns(&s->common, 100000);
57
41
+ }
58
# But here it should work
42
+
59
$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$EXT_MP" | _filter_qemu_io
43
+ block_job_defer_to_main_loop(&s->common, test_job_completed, NULL);
60
diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out
44
+}
61
index XXXXXXX..XXXXXXX 100644
45
+
62
--- a/tests/qemu-iotests/308.out
46
+static void test_job_complete(BlockJob *job, Error **errp)
63
+++ b/tests/qemu-iotests/308.out
47
+{
64
@@ -XXX,XX +XXX,XX @@ virtual size: 0 B (0 bytes)
48
+ TestBlockJob *s = container_of(job, TestBlockJob, common);
65
'mountpoint': 'TEST_DIR/t.IMGFMT.fuse', 'writable': true
49
+ s->should_complete = true;
66
} }
50
+}
67
{"return": {}}
51
+
68
-qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
52
+BlockJobDriver test_job_driver = {
69
+Writing to read-only export failed: OK
53
+ .instance_size = sizeof(TestBlockJob),
70
wrote 65536/65536 bytes at offset 1048576
54
+ .start = test_job_start,
71
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
55
+ .complete = test_job_complete,
72
wrote 65536/65536 bytes at offset 1048576
56
+};
57
+
58
+static void test_blockjob_common(enum drain_type drain_type)
59
+{
60
+ BlockBackend *blk_src, *blk_target;
61
+ BlockDriverState *src, *target;
62
+ BlockJob *job;
63
+ int ret;
64
+
65
+ src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
66
+ &error_abort);
67
+ blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
68
+ blk_insert_bs(blk_src, src, &error_abort);
69
+
70
+ target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
71
+ &error_abort);
72
+ blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
73
+ blk_insert_bs(blk_target, target, &error_abort);
74
+
75
+ job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0,
76
+ 0, NULL, NULL, &error_abort);
77
+ block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
78
+ block_job_start(job);
79
+
80
+ g_assert_cmpint(job->pause_count, ==, 0);
81
+ g_assert_false(job->paused);
82
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
83
+
84
+ do_drain_begin(drain_type, src);
85
+
86
+ if (drain_type == BDRV_DRAIN_ALL) {
87
+ /* bdrv_drain_all() drains both src and target, and involves an
88
+ * additional block_job_pause_all() */
89
+ g_assert_cmpint(job->pause_count, ==, 3);
90
+ } else {
91
+ g_assert_cmpint(job->pause_count, ==, 1);
92
+ }
93
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
94
+ /* g_assert_true(job->paused); */
95
+ g_assert_false(job->busy); /* The job is paused */
96
+
97
+ do_drain_end(drain_type, src);
98
+
99
+ g_assert_cmpint(job->pause_count, ==, 0);
100
+ g_assert_false(job->paused);
101
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
102
+
103
+ do_drain_begin(drain_type, target);
104
+
105
+ if (drain_type == BDRV_DRAIN_ALL) {
106
+ /* bdrv_drain_all() drains both src and target, and involves an
107
+ * additional block_job_pause_all() */
108
+ g_assert_cmpint(job->pause_count, ==, 3);
109
+ } else {
110
+ g_assert_cmpint(job->pause_count, ==, 1);
111
+ }
112
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
113
+ /* g_assert_true(job->paused); */
114
+ g_assert_false(job->busy); /* The job is paused */
115
+
116
+ do_drain_end(drain_type, target);
117
+
118
+ g_assert_cmpint(job->pause_count, ==, 0);
119
+ g_assert_false(job->paused);
120
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
121
+
122
+ ret = block_job_complete_sync(job, &error_abort);
123
+ g_assert_cmpint(ret, ==, 0);
124
+
125
+ blk_unref(blk_src);
126
+ blk_unref(blk_target);
127
+ bdrv_unref(src);
128
+ bdrv_unref(target);
129
+}
130
+
131
+static void test_blockjob_drain_all(void)
132
+{
133
+ test_blockjob_common(BDRV_DRAIN_ALL);
134
+}
135
+
136
+static void test_blockjob_drain(void)
137
+{
138
+ test_blockjob_common(BDRV_DRAIN);
139
+}
140
+
141
int main(int argc, char **argv)
142
{
143
bdrv_init();
144
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
145
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
146
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
147
148
+ g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
149
+ g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
150
+
151
return g_test_run();
152
}
73
--
153
--
74
2.31.1
154
2.13.6
75
155
76
156
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
Block jobs are already paused using the BdrvChildRole drain callbacks,
2
so we don't need an additional block_job_pause_all() call.
2
3
3
When building QEMU with --disable-vhost-user and using introspection,
4
query-qmp-schema lists vhost-user-blk even though it's not actually
5
available:
6
7
{ "execute": "query-qmp-schema" }
8
{
9
"return": [
10
...
11
{
12
"name": "312",
13
"members": [
14
{
15
"name": "nbd"
16
},
17
{
18
"name": "vhost-user-blk"
19
}
20
],
21
"meta-type": "enum",
22
"values": [
23
"nbd",
24
"vhost-user-blk"
25
]
26
},
27
28
Restrict vhost-user-blk in BlockExportType when
29
CONFIG_VHOST_USER_BLK_SERVER is disabled, so it
30
doesn't end listed by query-qmp-schema.
31
32
Fixes: 90fc91d50b7 ("convert vhost-user-blk server to block export API")
33
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
34
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
35
Message-Id: <20220107105420.395011-4-f4bug@amsat.org>
36
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
37
---
5
---
38
qapi/block-export.json | 6 ++++--
6
block/io.c | 4 ----
39
1 file changed, 4 insertions(+), 2 deletions(-)
7
tests/test-bdrv-drain.c | 10 ++++------
8
2 files changed, 4 insertions(+), 10 deletions(-)
40
9
41
diff --git a/qapi/block-export.json b/qapi/block-export.json
10
diff --git a/block/io.c b/block/io.c
42
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
43
--- a/qapi/block-export.json
12
--- a/block/io.c
44
+++ b/qapi/block-export.json
13
+++ b/block/io.c
45
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
46
# Since: 4.2
15
* context. */
47
##
16
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
48
{ 'enum': 'BlockExportType',
17
49
- 'data': [ 'nbd', 'vhost-user-blk',
18
- block_job_pause_all();
50
+ 'data': [ 'nbd',
19
-
51
+ { 'name': 'vhost-user-blk', 'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
20
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
52
{ 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] }
21
AioContext *aio_context = bdrv_get_aio_context(bs);
53
22
54
##
23
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
55
@@ -XXX,XX +XXX,XX @@
24
aio_enable_external(aio_context);
56
'discriminator': 'type',
25
aio_context_release(aio_context);
57
'data': {
26
}
58
'nbd': 'BlockExportOptionsNbd',
27
-
59
- 'vhost-user-blk': 'BlockExportOptionsVhostUserBlk',
28
- block_job_resume_all();
60
+ 'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk',
29
}
61
+ 'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
30
62
'fuse': { 'type': 'BlockExportOptionsFuse',
31
void bdrv_drain_all(void)
63
'if': 'CONFIG_FUSE' }
32
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
64
} }
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tests/test-bdrv-drain.c
35
+++ b/tests/test-bdrv-drain.c
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
37
do_drain_begin(drain_type, src);
38
39
if (drain_type == BDRV_DRAIN_ALL) {
40
- /* bdrv_drain_all() drains both src and target, and involves an
41
- * additional block_job_pause_all() */
42
- g_assert_cmpint(job->pause_count, ==, 3);
43
+ /* bdrv_drain_all() drains both src and target */
44
+ g_assert_cmpint(job->pause_count, ==, 2);
45
} else {
46
g_assert_cmpint(job->pause_count, ==, 1);
47
}
48
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
49
do_drain_begin(drain_type, target);
50
51
if (drain_type == BDRV_DRAIN_ALL) {
52
- /* bdrv_drain_all() drains both src and target, and involves an
53
- * additional block_job_pause_all() */
54
- g_assert_cmpint(job->pause_count, ==, 3);
55
+ /* bdrv_drain_all() drains both src and target */
56
+ g_assert_cmpint(job->pause_count, ==, 2);
57
} else {
58
g_assert_cmpint(job->pause_count, ==, 1);
59
}
65
--
60
--
66
2.31.1
61
2.13.6
67
62
68
63
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
bdrv_do_drained_begin() restricts the call of parent callbacks and
2
aio_disable_external() to the outermost drain section, but the block
3
driver callbacks are always called. bdrv_do_drained_end() must match
4
this behaviour, otherwise nodes stay drained even if begin/end calls
5
were balanced.
2
6
3
Add missing vhost-user-blk help:
4
5
$ qemu-storage-daemon -h
6
...
7
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,
8
addr.type=unix,addr.path=<socket-path>[,writable=on|off]
9
[,logical-block-size=<block-size>][,num-queues=<num-queues>]
10
export the specified block node as a
11
vhosts-user-blk device over UNIX domain socket
12
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,
13
fd,addr.str=<fd>[,writable=on|off]
14
[,logical-block-size=<block-size>][,num-queues=<num-queues>]
15
export the specified block node as a
16
vhosts-user-blk device over file descriptor
17
...
18
19
Fixes: 90fc91d50b7 ("convert vhost-user-blk server to block export API")
20
Reported-by: Qing Wang <qinwang@redhat.com>
21
Reviewed-by: Eric Blake <eblake@redhat.com>
22
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
24
Message-Id: <20220107105420.395011-3-f4bug@amsat.org>
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26
---
8
---
27
storage-daemon/qemu-storage-daemon.c | 13 +++++++++++++
9
block/io.c | 12 +++++++-----
28
1 file changed, 13 insertions(+)
10
1 file changed, 7 insertions(+), 5 deletions(-)
29
11
30
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
12
diff --git a/block/io.c b/block/io.c
31
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
32
--- a/storage-daemon/qemu-storage-daemon.c
14
--- a/block/io.c
33
+++ b/storage-daemon/qemu-storage-daemon.c
15
+++ b/block/io.c
34
@@ -XXX,XX +XXX,XX @@ static void help(void)
16
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
35
" export the specified block node over FUSE\n"
17
36
"\n"
18
void bdrv_drained_end(BlockDriverState *bs)
37
#endif /* CONFIG_FUSE */
19
{
38
+#ifdef CONFIG_VHOST_USER_BLK_SERVER
20
+ int old_quiesce_counter;
39
+" --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n"
21
+
40
+" addr.type=unix,addr.path=<socket-path>[,writable=on|off]\n"
22
if (qemu_in_coroutine()) {
41
+" [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n"
23
bdrv_co_yield_to_drain(bs, false);
42
+" export the specified block node as a\n"
24
return;
43
+" vhost-user-blk device over UNIX domain socket\n"
25
}
44
+" --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,\n"
26
assert(bs->quiesce_counter > 0);
45
+" fd,addr.str=<fd>[,writable=on|off]\n"
27
- if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
46
+" [,logical-block-size=<block-size>][,num-queues=<num-queues>]\n"
28
- return;
47
+" export the specified block node as a\n"
29
- }
48
+" vhost-user-blk device over file descriptor\n"
30
+ old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
49
+"\n"
31
50
+#endif /* CONFIG_VHOST_USER_BLK_SERVER */
32
/* Re-enable things in child-to-parent order */
51
" --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n"
33
bdrv_drain_invoke(bs, false, false);
52
" configure a QMP monitor\n"
34
- bdrv_parent_drained_end(bs);
53
"\n"
35
- aio_enable_external(bdrv_get_aio_context(bs));
36
+ if (old_quiesce_counter == 1) {
37
+ bdrv_parent_drained_end(bs);
38
+ aio_enable_external(bdrv_get_aio_context(bs));
39
+ }
40
}
41
42
/*
54
--
43
--
55
2.31.1
44
2.13.6
56
45
57
46
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
3
Consider the case when the whole buffer is zero and end is unaligned.
4
5
If i <= tail, we return 1 and do one unaligned WRITE, RMW happens.
6
7
If i > tail, we do on aligned WRITE_ZERO (or skip if target is zeroed)
8
and again one unaligned WRITE, RMW happens.
9
10
Let's do better: don't fragment the whole-zero buffer and report it as
11
ZERO: in case of zeroed target we just do nothing and avoid RMW. If
12
target is not zeroes, one unaligned WRITE_ZERO should not be much worse
13
than one unaligned WRITE.
14
15
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
Message-Id: <20211217164654.1184218-3-vsementsov@virtuozzo.com>
17
Tested-by: Peter Lieven <pl@kamp.de>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
---
2
---
20
qemu-img.c | 23 +++++++++++++++++++----
3
tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
21
tests/qemu-iotests/122.out | 8 ++------
4
1 file changed, 57 insertions(+)
22
2 files changed, 21 insertions(+), 10 deletions(-)
23
5
24
diff --git a/qemu-img.c b/qemu-img.c
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
25
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
26
--- a/qemu-img.c
8
--- a/tests/test-bdrv-drain.c
27
+++ b/qemu-img.c
9
+++ b/tests/test-bdrv-drain.c
28
@@ -XXX,XX +XXX,XX @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
10
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
29
}
11
enum drain_type {
30
}
12
BDRV_DRAIN_ALL,
31
13
BDRV_DRAIN,
32
+ if (i == n) {
14
+ DRAIN_TYPE_MAX,
33
+ /*
15
};
34
+ * The whole buf is the same.
16
35
+ * No reason to split it into chunks, so return now.
17
static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
36
+ */
18
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
37
+ *pnum = i;
19
test_quiesce_common(BDRV_DRAIN, false);
38
+ return !is_zero;
20
}
21
22
+static void test_nested(void)
23
+{
24
+ BlockBackend *blk;
25
+ BlockDriverState *bs, *backing;
26
+ BDRVTestState *s, *backing_s;
27
+ enum drain_type outer, inner;
28
+
29
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
30
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
31
+ &error_abort);
32
+ s = bs->opaque;
33
+ blk_insert_bs(blk, bs, &error_abort);
34
+
35
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
36
+ backing_s = backing->opaque;
37
+ bdrv_set_backing_hd(bs, backing, &error_abort);
38
+
39
+ for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
40
+ for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
41
+ /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
42
+ int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
43
+ (inner != BDRV_DRAIN_ALL);
44
+ int backing_quiesce = 0;
45
+ int backing_cb_cnt = (outer != BDRV_DRAIN) +
46
+ (inner != BDRV_DRAIN);
47
+
48
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
49
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
50
+ g_assert_cmpint(s->drain_count, ==, 0);
51
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
52
+
53
+ do_drain_begin(outer, bs);
54
+ do_drain_begin(inner, bs);
55
+
56
+ g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
57
+ g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
58
+ g_assert_cmpint(s->drain_count, ==, 2);
59
+ g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
60
+
61
+ do_drain_end(inner, bs);
62
+ do_drain_end(outer, bs);
63
+
64
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
65
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
66
+ g_assert_cmpint(s->drain_count, ==, 0);
67
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
68
+ }
39
+ }
69
+ }
40
+
70
+
41
tail = (sector_num + i) & (alignment - 1);
71
+ bdrv_unref(backing);
42
if (tail) {
72
+ bdrv_unref(bs);
43
if (is_zero && i <= tail) {
73
+ blk_unref(blk);
44
- /* treat unallocated areas which only consist
74
+}
45
- * of a small tail as allocated. */
75
+
46
+ /*
76
47
+ * For sure next sector after i is data, and it will rewrite this
77
typedef struct TestBlockJob {
48
+ * tail anyway due to RMW. So, let's just write data now.
78
BlockJob common;
49
+ */
79
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
50
is_zero = false;
80
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
51
}
81
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
52
if (!is_zero) {
82
53
- /* align up end offset of allocated areas. */
83
+ g_test_add_func("/bdrv-drain/nested", test_nested);
54
+ /* If possible, align up end offset of allocated areas. */
84
+
55
i += alignment - tail;
85
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
56
i = MIN(i, n);
86
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
57
} else {
87
58
- /* align down end offset of zero areas. */
59
+ /*
60
+ * For sure next sector after i is data, and it will rewrite this
61
+ * tail anyway due to RMW. Better is avoid RMW and write zeroes up
62
+ * to aligned bound.
63
+ */
64
i -= tail;
65
}
66
}
67
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
68
index XXXXXXX..XXXXXXX 100644
69
--- a/tests/qemu-iotests/122.out
70
+++ b/tests/qemu-iotests/122.out
71
@@ -XXX,XX +XXX,XX @@ convert -S 4k
72
{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
73
{ "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
74
{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
75
-{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false},
76
-{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
77
-{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}]
78
+{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}]
79
80
convert -c -S 4k
81
[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
82
@@ -XXX,XX +XXX,XX @@ convert -c -S 4k
83
84
convert -S 8k
85
[{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
86
-{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false},
87
-{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
88
-{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}]
89
+{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}]
90
91
convert -c -S 8k
92
[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
93
--
88
--
94
2.31.1
89
2.13.6
95
90
96
91
diff view generated by jsdifflib
New patch
1
1
This is in preparation for subtree drains, i.e. drained sections that
2
affect not only a single node, but recursively all child nodes, too.
3
4
Calling the parent callbacks for drain is pointless when we just came
5
from that parent node recursively and leads to multiple increases of
6
bs->quiesce_counter in a single drain call. Don't do it.
7
8
In order for this to work correctly, the parent callback must be called
9
for every bdrv_drain_begin/end() call, not only for the outermost one:
10
11
If we have a node N with two parents A and B, recursive draining of A
12
should cause the quiesce_counter of B to increase because its child N is
13
drained independently of B. If now B is recursively drained, too, A must
14
increase its quiesce_counter because N is drained independently of A
15
only now, even if N is going from quiesce_counter 1 to 2.
16
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
include/block/block.h | 4 ++--
20
block.c | 13 +++++++++----
21
block/io.c | 47 ++++++++++++++++++++++++++++++++++-------------
22
3 files changed, 45 insertions(+), 19 deletions(-)
23
24
diff --git a/include/block/block.h b/include/block/block.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/block.h
27
+++ b/include/block/block.h
28
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs);
29
* Begin a quiesced section of all users of @bs. This is part of
30
* bdrv_drained_begin.
31
*/
32
-void bdrv_parent_drained_begin(BlockDriverState *bs);
33
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
34
35
/**
36
* bdrv_parent_drained_end:
37
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs);
38
* End a quiesced section of all users of @bs. This is part of
39
* bdrv_drained_end.
40
*/
41
-void bdrv_parent_drained_end(BlockDriverState *bs);
42
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
43
44
/**
45
* bdrv_drained_begin:
46
diff --git a/block.c b/block.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/block.c
49
+++ b/block.c
50
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
51
BlockDriverState *new_bs)
52
{
53
BlockDriverState *old_bs = child->bs;
54
+ int i;
55
56
if (old_bs && new_bs) {
57
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
58
}
59
if (old_bs) {
60
if (old_bs->quiesce_counter && child->role->drained_end) {
61
- child->role->drained_end(child);
62
+ for (i = 0; i < old_bs->quiesce_counter; i++) {
63
+ child->role->drained_end(child);
64
+ }
65
}
66
if (child->role->detach) {
67
child->role->detach(child);
68
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
69
if (new_bs) {
70
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
71
if (new_bs->quiesce_counter && child->role->drained_begin) {
72
- child->role->drained_begin(child);
73
+ for (i = 0; i < new_bs->quiesce_counter; i++) {
74
+ child->role->drained_begin(child);
75
+ }
76
}
77
78
if (child->role->attach) {
79
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
80
AioContext *ctx = bdrv_get_aio_context(bs);
81
82
aio_disable_external(ctx);
83
- bdrv_parent_drained_begin(bs);
84
+ bdrv_parent_drained_begin(bs, NULL);
85
bdrv_drain(bs); /* ensure there are no in-flight requests */
86
87
while (aio_poll(ctx, false)) {
88
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
89
*/
90
aio_context_acquire(new_context);
91
bdrv_attach_aio_context(bs, new_context);
92
- bdrv_parent_drained_end(bs);
93
+ bdrv_parent_drained_end(bs, NULL);
94
aio_enable_external(ctx);
95
aio_context_release(new_context);
96
}
97
diff --git a/block/io.c b/block/io.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/io.c
100
+++ b/block/io.c
101
@@ -XXX,XX +XXX,XX @@
102
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
103
int64_t offset, int bytes, BdrvRequestFlags flags);
104
105
-void bdrv_parent_drained_begin(BlockDriverState *bs)
106
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
107
{
108
BdrvChild *c, *next;
109
110
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
111
+ if (c == ignore) {
112
+ continue;
113
+ }
114
if (c->role->drained_begin) {
115
c->role->drained_begin(c);
116
}
117
}
118
}
119
120
-void bdrv_parent_drained_end(BlockDriverState *bs)
121
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
122
{
123
BdrvChild *c, *next;
124
125
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
126
+ if (c == ignore) {
127
+ continue;
128
+ }
129
if (c->role->drained_end) {
130
c->role->drained_end(c);
131
}
132
@@ -XXX,XX +XXX,XX @@ typedef struct {
133
BlockDriverState *bs;
134
bool done;
135
bool begin;
136
+ BdrvChild *parent;
137
} BdrvCoDrainData;
138
139
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
140
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
141
return waited;
142
}
143
144
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
145
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
146
+
147
static void bdrv_co_drain_bh_cb(void *opaque)
148
{
149
BdrvCoDrainData *data = opaque;
150
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
151
152
bdrv_dec_in_flight(bs);
153
if (data->begin) {
154
- bdrv_drained_begin(bs);
155
+ bdrv_do_drained_begin(bs, data->parent);
156
} else {
157
- bdrv_drained_end(bs);
158
+ bdrv_do_drained_end(bs, data->parent);
159
}
160
161
data->done = true;
162
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
163
}
164
165
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
166
- bool begin)
167
+ bool begin, BdrvChild *parent)
168
{
169
BdrvCoDrainData data;
170
171
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
172
.bs = bs,
173
.done = false,
174
.begin = begin,
175
+ .parent = parent,
176
};
177
bdrv_inc_in_flight(bs);
178
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
179
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
180
assert(data.done);
181
}
182
183
-void bdrv_drained_begin(BlockDriverState *bs)
184
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
185
{
186
if (qemu_in_coroutine()) {
187
- bdrv_co_yield_to_drain(bs, true);
188
+ bdrv_co_yield_to_drain(bs, true, parent);
189
return;
190
}
191
192
/* Stop things in parent-to-child order */
193
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
194
aio_disable_external(bdrv_get_aio_context(bs));
195
- bdrv_parent_drained_begin(bs);
196
}
197
198
+ bdrv_parent_drained_begin(bs, parent);
199
bdrv_drain_invoke(bs, true, false);
200
bdrv_drain_recurse(bs);
201
}
202
203
-void bdrv_drained_end(BlockDriverState *bs)
204
+void bdrv_drained_begin(BlockDriverState *bs)
205
+{
206
+ bdrv_do_drained_begin(bs, NULL);
207
+}
208
+
209
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
210
{
211
int old_quiesce_counter;
212
213
if (qemu_in_coroutine()) {
214
- bdrv_co_yield_to_drain(bs, false);
215
+ bdrv_co_yield_to_drain(bs, false, parent);
216
return;
217
}
218
assert(bs->quiesce_counter > 0);
219
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
220
221
/* Re-enable things in child-to-parent order */
222
bdrv_drain_invoke(bs, false, false);
223
+ bdrv_parent_drained_end(bs, parent);
224
if (old_quiesce_counter == 1) {
225
- bdrv_parent_drained_end(bs);
226
aio_enable_external(bdrv_get_aio_context(bs));
227
}
228
}
229
230
+void bdrv_drained_end(BlockDriverState *bs)
231
+{
232
+ bdrv_do_drained_end(bs, NULL);
233
+}
234
+
235
/*
236
* Wait for pending requests to complete on a single BlockDriverState subtree,
237
* and suspend block driver's internal I/O until next request arrives.
238
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
239
/* Stop things in parent-to-child order */
240
aio_context_acquire(aio_context);
241
aio_disable_external(aio_context);
242
- bdrv_parent_drained_begin(bs);
243
+ bdrv_parent_drained_begin(bs, NULL);
244
bdrv_drain_invoke(bs, true, true);
245
aio_context_release(aio_context);
246
247
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
248
/* Re-enable things in child-to-parent order */
249
aio_context_acquire(aio_context);
250
bdrv_drain_invoke(bs, false, true);
251
- bdrv_parent_drained_end(bs);
252
+ bdrv_parent_drained_end(bs, NULL);
253
aio_enable_external(aio_context);
254
aio_context_release(aio_context);
255
}
256
--
257
2.13.6
258
259
diff view generated by jsdifflib
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
1
bdrv_drained_begin() waits for the completion of requests in the whole
2
subtree, but it only actually keeps its immediate bs parameter quiesced
3
until bdrv_drained_end().
2
4
3
drive_def is only a particular use case of
5
Add a version that keeps the whole subtree drained. As of this commit,
4
qemu_opts_parse_noisily, so it can be inlined.
6
graph changes cannot be allowed during a subtree drained section, but
7
this will be fixed soon.
5
8
6
Also remove drive_mark_claimed_by_board, as it is only defined
7
but not implemented (nor used) anywhere.
8
9
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
10
Message-Id: <20211215121140.456939-3-eesposit@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
include/sysemu/blockdev.h | 2 --
11
include/block/block.h | 13 +++++++++++++
14
block/monitor/block-hmp-cmds.c | 2 +-
12
block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++-----------
15
blockdev.c | 7 +------
13
2 files changed, 56 insertions(+), 11 deletions(-)
16
softmmu/vl.c | 4 +++-
17
4 files changed, 5 insertions(+), 10 deletions(-)
18
14
19
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
15
diff --git a/include/block/block.h b/include/block/block.h
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/include/sysemu/blockdev.h
17
--- a/include/block/block.h
22
+++ b/include/sysemu/blockdev.h
18
+++ b/include/block/block.h
23
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
19
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
24
void override_max_devs(BlockInterfaceType type, int max_devs);
20
void bdrv_drained_begin(BlockDriverState *bs);
25
21
26
DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit);
22
/**
27
-void drive_mark_claimed_by_board(void);
23
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
28
void drive_check_orphaned(void);
24
+ * exclusive access to all child nodes as well.
29
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
25
+ *
30
int drive_get_max_bus(BlockInterfaceType type);
26
+ * Graph changes are not allowed during a subtree drain section.
31
int drive_get_max_devs(BlockInterfaceType type);
27
+ */
32
28
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
33
-QemuOpts *drive_def(const char *optstr);
29
+
34
QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
30
+/**
35
const char *optstr);
31
* bdrv_drained_end:
36
DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
32
*
37
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
33
* End a quiescent section started by bdrv_drained_begin().
34
*/
35
void bdrv_drained_end(BlockDriverState *bs);
36
37
+/**
38
+ * End a quiescent section started by bdrv_subtree_drained_begin().
39
+ */
40
+void bdrv_subtree_drained_end(BlockDriverState *bs);
41
+
42
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
43
Error **errp);
44
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
45
diff --git a/block/io.c b/block/io.c
38
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
39
--- a/block/monitor/block-hmp-cmds.c
47
--- a/block/io.c
40
+++ b/block/monitor/block-hmp-cmds.c
48
+++ b/block/io.c
41
@@ -XXX,XX +XXX,XX @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
49
@@ -XXX,XX +XXX,XX @@ typedef struct {
50
BlockDriverState *bs;
51
bool done;
52
bool begin;
53
+ bool recursive;
54
BdrvChild *parent;
55
} BdrvCoDrainData;
56
57
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
58
return waited;
59
}
60
61
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
62
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
63
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
64
+ BdrvChild *parent);
65
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
66
+ BdrvChild *parent);
67
68
static void bdrv_co_drain_bh_cb(void *opaque)
69
{
70
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
71
72
bdrv_dec_in_flight(bs);
73
if (data->begin) {
74
- bdrv_do_drained_begin(bs, data->parent);
75
+ bdrv_do_drained_begin(bs, data->recursive, data->parent);
76
} else {
77
- bdrv_do_drained_end(bs, data->parent);
78
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
79
}
80
81
data->done = true;
82
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
83
}
84
85
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
86
- bool begin, BdrvChild *parent)
87
+ bool begin, bool recursive,
88
+ BdrvChild *parent)
89
{
90
BdrvCoDrainData data;
91
92
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
93
.bs = bs,
94
.done = false,
95
.begin = begin,
96
+ .recursive = recursive,
97
.parent = parent,
98
};
99
bdrv_inc_in_flight(bs);
100
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
101
assert(data.done);
102
}
103
104
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
105
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
106
+ BdrvChild *parent)
107
{
108
+ BdrvChild *child, *next;
109
+
110
if (qemu_in_coroutine()) {
111
- bdrv_co_yield_to_drain(bs, true, parent);
112
+ bdrv_co_yield_to_drain(bs, true, recursive, parent);
42
return;
113
return;
43
}
114
}
44
115
45
- opts = drive_def(optstr);
116
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
46
+ opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
117
bdrv_parent_drained_begin(bs, parent);
47
if (!opts)
118
bdrv_drain_invoke(bs, true, false);
119
bdrv_drain_recurse(bs);
120
+
121
+ if (recursive) {
122
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
123
+ bdrv_do_drained_begin(child->bs, true, child);
124
+ }
125
+ }
126
}
127
128
void bdrv_drained_begin(BlockDriverState *bs)
129
{
130
- bdrv_do_drained_begin(bs, NULL);
131
+ bdrv_do_drained_begin(bs, false, NULL);
132
+}
133
+
134
+void bdrv_subtree_drained_begin(BlockDriverState *bs)
135
+{
136
+ bdrv_do_drained_begin(bs, true, NULL);
137
}
138
139
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
140
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
141
+ BdrvChild *parent)
142
{
143
+ BdrvChild *child, *next;
144
int old_quiesce_counter;
145
146
if (qemu_in_coroutine()) {
147
- bdrv_co_yield_to_drain(bs, false, parent);
148
+ bdrv_co_yield_to_drain(bs, false, recursive, parent);
48
return;
149
return;
49
150
}
50
diff --git a/blockdev.c b/blockdev.c
151
assert(bs->quiesce_counter > 0);
51
index XXXXXXX..XXXXXXX 100644
152
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
52
--- a/blockdev.c
153
if (old_quiesce_counter == 1) {
53
+++ b/blockdev.c
154
aio_enable_external(bdrv_get_aio_context(bs));
54
@@ -XXX,XX +XXX,XX @@ static int drive_index_to_unit_id(BlockInterfaceType type, int index)
155
}
55
return max_devs ? index % max_devs : index;
156
+
157
+ if (recursive) {
158
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
159
+ bdrv_do_drained_end(child->bs, true, child);
160
+ }
161
+ }
56
}
162
}
57
163
58
-QemuOpts *drive_def(const char *optstr)
164
void bdrv_drained_end(BlockDriverState *bs)
59
-{
60
- return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
61
-}
62
-
63
QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
64
const char *optstr)
65
{
165
{
66
QemuOpts *opts;
166
- bdrv_do_drained_end(bs, NULL);
67
167
+ bdrv_do_drained_end(bs, false, NULL);
68
- opts = drive_def(optstr);
168
+}
69
+ opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
169
+
70
if (!opts) {
170
+void bdrv_subtree_drained_end(BlockDriverState *bs)
71
return NULL;
171
+{
72
}
172
+ bdrv_do_drained_end(bs, true, NULL);
73
diff --git a/softmmu/vl.c b/softmmu/vl.c
173
}
74
index XXXXXXX..XXXXXXX 100644
174
75
--- a/softmmu/vl.c
175
/*
76
+++ b/softmmu/vl.c
77
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv, char **envp)
78
break;
79
}
80
case QEMU_OPTION_drive:
81
- if (drive_def(optarg) == NULL) {
82
+ opts = qemu_opts_parse_noisily(qemu_find_opts("drive"),
83
+ optarg, false);
84
+ if (opts == NULL) {
85
exit(1);
86
}
87
break;
88
--
176
--
89
2.31.1
177
2.13.6
90
178
91
179
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Add a subtree drain version to the existing test cases.
2
2
3
The BlockBackend root child can change when aio_poll() is invoked. This
4
happens when a temporary filter node is removed upon blockjob
5
completion, for example.
6
7
Functions in block/block-backend.c must be aware of this when using a
8
blk_bs() pointer across aio_poll() because the BlockDriverState refcnt
9
may reach 0, resulting in a stale pointer.
10
11
One example is scsi_device_purge_requests(), which calls blk_drain() to
12
wait for in-flight requests to cancel. If the backup blockjob is active,
13
then the BlockBackend root child is a temporary filter BDS owned by the
14
blockjob. The blockjob can complete during bdrv_drained_begin() and the
15
last reference to the BDS is released when the temporary filter node is
16
removed. This results in a use-after-free when blk_drain() calls
17
bdrv_drained_end(bs) on the dangling pointer.
18
19
Explicitly hold a reference to bs across block APIs that invoke
20
aio_poll().
21
22
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778
23
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Message-Id: <20220111153613.25453-2-stefanha@redhat.com>
26
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
27
---
4
---
28
block/block-backend.c | 19 +++++++++++++++++--
5
tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++-
29
1 file changed, 17 insertions(+), 2 deletions(-)
6
1 file changed, 26 insertions(+), 1 deletion(-)
30
7
31
diff --git a/block/block-backend.c b/block/block-backend.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
32
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
33
--- a/block/block-backend.c
10
--- a/tests/test-bdrv-drain.c
34
+++ b/block/block-backend.c
11
+++ b/tests/test-bdrv-drain.c
35
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
12
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
36
void blk_remove_bs(BlockBackend *blk)
13
enum drain_type {
37
{
14
BDRV_DRAIN_ALL,
38
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
15
BDRV_DRAIN,
39
- BlockDriverState *bs;
16
+ BDRV_SUBTREE_DRAIN,
40
BdrvChild *root;
17
DRAIN_TYPE_MAX,
41
18
};
42
notifier_list_notify(&blk->remove_bs_notifiers, blk);
19
43
if (tgm->throttle_state) {
20
@@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
44
- bs = blk_bs(blk);
21
switch (drain_type) {
45
+ BlockDriverState *bs = blk_bs(blk);
22
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
46
+
23
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
47
+ /*
24
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
48
+ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
25
default: g_assert_not_reached();
49
+ * example, if a temporary filter node is removed by a blockjob.
50
+ */
51
+ bdrv_ref(bs);
52
bdrv_drained_begin(bs);
53
throttle_group_detach_aio_context(tgm);
54
throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
55
bdrv_drained_end(bs);
56
+ bdrv_unref(bs);
57
}
58
59
blk_update_root_state(blk);
60
@@ -XXX,XX +XXX,XX @@ void blk_drain(BlockBackend *blk)
61
BlockDriverState *bs = blk_bs(blk);
62
63
if (bs) {
64
+ bdrv_ref(bs);
65
bdrv_drained_begin(bs);
66
}
67
68
@@ -XXX,XX +XXX,XX @@ void blk_drain(BlockBackend *blk)
69
70
if (bs) {
71
bdrv_drained_end(bs);
72
+ bdrv_unref(bs);
73
}
26
}
74
}
27
}
75
28
@@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
76
@@ -XXX,XX +XXX,XX @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
29
switch (drain_type) {
77
int ret;
30
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
78
31
case BDRV_DRAIN: bdrv_drained_end(bs); break;
79
if (bs) {
32
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
80
+ bdrv_ref(bs);
33
default: g_assert_not_reached();
81
+
82
if (update_root_node) {
83
ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
84
errp);
85
if (ret < 0) {
86
+ bdrv_unref(bs);
87
return ret;
88
}
89
}
90
@@ -XXX,XX +XXX,XX @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
91
throttle_group_attach_aio_context(tgm, new_context);
92
bdrv_drained_end(bs);
93
}
94
+
95
+ bdrv_unref(bs);
96
}
97
98
blk->ctx = new_context;
99
@@ -XXX,XX +XXX,XX @@ void blk_io_limits_disable(BlockBackend *blk)
100
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
101
assert(tgm->throttle_state);
102
if (bs) {
103
+ bdrv_ref(bs);
104
bdrv_drained_begin(bs);
105
}
106
throttle_group_unregister_tgm(tgm);
107
if (bs) {
108
bdrv_drained_end(bs);
109
+ bdrv_unref(bs);
110
}
34
}
111
}
35
}
112
36
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
37
test_drv_cb_common(BDRV_DRAIN, false);
38
}
39
40
+static void test_drv_cb_drain_subtree(void)
41
+{
42
+ test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
43
+}
44
+
45
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
46
{
47
BlockBackend *blk;
48
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
49
test_quiesce_common(BDRV_DRAIN, false);
50
}
51
52
+static void test_quiesce_drain_subtree(void)
53
+{
54
+ test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
55
+}
56
+
57
static void test_nested(void)
58
{
59
BlockBackend *blk;
60
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
61
/* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
62
int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
63
(inner != BDRV_DRAIN_ALL);
64
- int backing_quiesce = 0;
65
+ int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
66
+ (inner == BDRV_SUBTREE_DRAIN);
67
int backing_cb_cnt = (outer != BDRV_DRAIN) +
68
(inner != BDRV_DRAIN);
69
70
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void)
71
test_blockjob_common(BDRV_DRAIN);
72
}
73
74
+static void test_blockjob_drain_subtree(void)
75
+{
76
+ test_blockjob_common(BDRV_SUBTREE_DRAIN);
77
+}
78
+
79
int main(int argc, char **argv)
80
{
81
bdrv_init();
82
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
83
84
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
85
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
86
+ g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
87
+ test_drv_cb_drain_subtree);
88
89
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
90
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
91
+ g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
92
+ test_quiesce_drain_subtree);
93
94
g_test_add_func("/bdrv-drain/nested", test_nested);
95
96
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
97
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
98
+ g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
99
+ test_blockjob_drain_subtree);
100
101
return g_test_run();
102
}
113
--
103
--
114
2.31.1
104
2.13.6
115
105
116
106
diff view generated by jsdifflib
1
From: Hanna Reitz <hreitz@redhat.com>
1
If bdrv_do_drained_begin/end() are called in coroutine context, they
2
first use a BH to get out of the coroutine context. Call some existing
3
tests again from a coroutine to cover this code path.
2
4
3
Test the following scenario:
4
- Simple stream block in two-layer backing chain (base and top)
5
- The job is drained via blk_drain(), then an error occurs while the job
6
settles the ongoing request
7
- And so the job completes while in blk_drain()
8
9
This was reported as a segfault, but is fixed by "block-backend: prevent
10
dangling BDS pointers across aio_poll()".
11
12
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178
13
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-Id: <20220111153613.25453-3-stefanha@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
---
6
---
18
.../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++
7
tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
19
.../tests/stream-error-on-reset.out | 5 +
8
1 file changed, 59 insertions(+)
20
2 files changed, 145 insertions(+)
21
create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset
22
create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out
23
9
24
diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
25
new file mode 100755
11
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX
12
--- a/tests/test-bdrv-drain.c
27
--- /dev/null
13
+++ b/tests/test-bdrv-drain.c
28
+++ b/tests/qemu-iotests/tests/stream-error-on-reset
14
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
29
@@ -XXX,XX +XXX,XX @@
15
*aio_ret = ret;
30
+#!/usr/bin/env python3
16
}
31
+# group: rw quick
17
32
+#
18
+typedef struct CallInCoroutineData {
33
+# Test what happens when a stream job completes in a blk_drain().
19
+ void (*entry)(void);
34
+#
20
+ bool done;
35
+# Copyright (C) 2022 Red Hat, Inc.
21
+} CallInCoroutineData;
36
+#
37
+# This program is free software; you can redistribute it and/or modify
38
+# it under the terms of the GNU General Public License as published by
39
+# the Free Software Foundation; either version 2 of the License, or
40
+# (at your option) any later version.
41
+#
42
+# This program is distributed in the hope that it will be useful,
43
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
44
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45
+# GNU General Public License for more details.
46
+#
47
+# You should have received a copy of the GNU General Public License
48
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
49
+#
50
+
22
+
51
+import os
23
+static coroutine_fn void call_in_coroutine_entry(void *opaque)
52
+import iotests
24
+{
53
+from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase
25
+ CallInCoroutineData *data = opaque;
26
+
27
+ data->entry();
28
+ data->done = true;
29
+}
30
+
31
+static void call_in_coroutine(void (*entry)(void))
32
+{
33
+ Coroutine *co;
34
+ CallInCoroutineData data = {
35
+ .entry = entry,
36
+ .done = false,
37
+ };
38
+
39
+ co = qemu_coroutine_create(call_in_coroutine_entry, &data);
40
+ qemu_coroutine_enter(co);
41
+ while (!data.done) {
42
+ aio_poll(qemu_get_aio_context(), true);
43
+ }
44
+}
45
+
46
enum drain_type {
47
BDRV_DRAIN_ALL,
48
BDRV_DRAIN,
49
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void)
50
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
51
}
52
53
+static void test_drv_cb_co_drain(void)
54
+{
55
+ call_in_coroutine(test_drv_cb_drain);
56
+}
57
+
58
+static void test_drv_cb_co_drain_subtree(void)
59
+{
60
+ call_in_coroutine(test_drv_cb_drain_subtree);
61
+}
62
+
63
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
64
{
65
BlockBackend *blk;
66
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void)
67
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
68
}
69
70
+static void test_quiesce_co_drain(void)
71
+{
72
+ call_in_coroutine(test_quiesce_drain);
73
+}
74
+
75
+static void test_quiesce_co_drain_subtree(void)
76
+{
77
+ call_in_coroutine(test_quiesce_drain_subtree);
78
+}
79
+
80
static void test_nested(void)
81
{
82
BlockBackend *blk;
83
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
84
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
85
test_drv_cb_drain_subtree);
86
87
+ // XXX bdrv_drain_all() doesn't work in coroutine context
88
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
89
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
90
+ test_drv_cb_co_drain_subtree);
54
+
91
+
55
+
92
+
56
+image_size = 1 * 1024 * 1024
93
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
57
+data_size = 64 * 1024
94
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
58
+base = os.path.join(iotests.test_dir, 'base.img')
95
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
59
+top = os.path.join(iotests.test_dir, 'top.img')
96
test_quiesce_drain_subtree);
97
98
+ // XXX bdrv_drain_all() doesn't work in coroutine context
99
+ g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
100
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
101
+ test_quiesce_co_drain_subtree);
60
+
102
+
61
+
103
g_test_add_func("/bdrv-drain/nested", test_nested);
62
+# We want to test completing a stream job in a blk_drain().
104
63
+#
105
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
64
+# The blk_drain() we are going to use is a virtio-scsi device resetting,
65
+# which we can trigger by resetting the system.
66
+#
67
+# In order to have the block job complete on drain, we (1) throttle its
68
+# base image so we can start the drain after it has begun, but before it
69
+# completes, and (2) make it encounter an I/O error on the ensuing write.
70
+# (If it completes regularly, the completion happens after the drain for
71
+# some reason.)
72
+
73
+class TestStreamErrorOnReset(QMPTestCase):
74
+ def setUp(self) -> None:
75
+ """
76
+ Create two images:
77
+ - base image {base} with {data_size} bytes allocated
78
+ - top image {top} without any data allocated
79
+
80
+ And the following VM configuration:
81
+ - base image throttled to {data_size}
82
+ - top image with a blkdebug configuration so the first write access
83
+ to it will result in an error
84
+ - top image is attached to a virtio-scsi device
85
+ """
86
+ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0
87
+ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0
88
+ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0
89
+
90
+ self.vm = iotests.VM()
91
+ self.vm.add_args('-accel', 'tcg') # Make throttling work properly
92
+ self.vm.add_object(self.vm.qmp_to_opts({
93
+ 'qom-type': 'throttle-group',
94
+ 'id': 'thrgr',
95
+ 'x-bps-total': str(data_size)
96
+ }))
97
+ self.vm.add_blockdev(self.vm.qmp_to_opts({
98
+ 'driver': imgfmt,
99
+ 'node-name': 'base',
100
+ 'file': {
101
+ 'driver': 'throttle',
102
+ 'throttle-group': 'thrgr',
103
+ 'file': {
104
+ 'driver': 'file',
105
+ 'filename': base
106
+ }
107
+ }
108
+ }))
109
+ self.vm.add_blockdev(self.vm.qmp_to_opts({
110
+ 'driver': imgfmt,
111
+ 'node-name': 'top',
112
+ 'file': {
113
+ 'driver': 'blkdebug',
114
+ 'node-name': 'top-blkdebug',
115
+ 'inject-error': [{
116
+ 'event': 'pwritev',
117
+ 'immediately': 'true',
118
+ 'once': 'true'
119
+ }],
120
+ 'image': {
121
+ 'driver': 'file',
122
+ 'filename': top
123
+ }
124
+ },
125
+ 'backing': 'base'
126
+ }))
127
+ self.vm.add_device(self.vm.qmp_to_opts({
128
+ 'driver': 'virtio-scsi',
129
+ 'id': 'vscsi'
130
+ }))
131
+ self.vm.add_device(self.vm.qmp_to_opts({
132
+ 'driver': 'scsi-hd',
133
+ 'bus': 'vscsi.0',
134
+ 'drive': 'top'
135
+ }))
136
+ self.vm.launch()
137
+
138
+ def tearDown(self) -> None:
139
+ self.vm.shutdown()
140
+ os.remove(top)
141
+ os.remove(base)
142
+
143
+ def test_stream_error_on_reset(self) -> None:
144
+ # Launch a stream job, which will take at least a second to
145
+ # complete, because the base image is throttled (so we can
146
+ # get in between it having started and it having completed)
147
+ res = self.vm.qmp('block-stream', job_id='stream', device='top')
148
+ self.assert_qmp(res, 'return', {})
149
+
150
+ while True:
151
+ ev = self.vm.event_wait('JOB_STATUS_CHANGE')
152
+ if ev['data']['status'] == 'running':
153
+ # Once the stream job is running, reset the system, which
154
+ # forces the virtio-scsi device to be reset, thus draining
155
+ # the stream job, and making it complete. Completing
156
+ # inside of that drain should not result in a segfault.
157
+ res = self.vm.qmp('system_reset')
158
+ self.assert_qmp(res, 'return', {})
159
+ elif ev['data']['status'] == 'null':
160
+ # The test is done once the job is gone
161
+ break
162
+
163
+
164
+if __name__ == '__main__':
165
+ # Passes with any format with backing file support, but qed and
166
+ # qcow1 do not seem to exercise the used-to-be problematic code
167
+ # path, so there is no point in having them in this list
168
+ iotests.main(supported_fmts=['qcow2', 'vmdk'],
169
+ supported_protocols=['file'])
170
diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out
171
new file mode 100644
172
index XXXXXXX..XXXXXXX
173
--- /dev/null
174
+++ b/tests/qemu-iotests/tests/stream-error-on-reset.out
175
@@ -XXX,XX +XXX,XX @@
176
+.
177
+----------------------------------------------------------------------
178
+Ran 1 tests
179
+
180
+OK
181
--
106
--
182
2.31.1
107
2.13.6
183
108
184
109
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Test that drain sections are correctly propagated through the graph.
2
2
3
Reported-by: Eric Blake <eblake@redhat.com>
4
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Message-Id: <20220107105420.395011-2-f4bug@amsat.org>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
---
4
---
8
docs/tools/qemu-storage-daemon.rst | 2 +-
5
tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
9
1 file changed, 1 insertion(+), 1 deletion(-)
6
1 file changed, 74 insertions(+)
10
7
11
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/docs/tools/qemu-storage-daemon.rst
10
--- a/tests/test-bdrv-drain.c
14
+++ b/docs/tools/qemu-storage-daemon.rst
11
+++ b/tests/test-bdrv-drain.c
15
@@ -XXX,XX +XXX,XX @@ Export raw image file ``disk.img`` over NBD UNIX domain socket ``nbd.sock``::
12
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
16
--nbd-server addr.type=unix,addr.path=nbd.sock \
13
blk_unref(blk);
17
--export type=nbd,id=export,node-name=disk,writable=on
14
}
18
15
19
-Export a qcow2 image file ``disk.qcow2`` as a vhosts-user-blk device over UNIX
16
+static void test_multiparent(void)
20
+Export a qcow2 image file ``disk.qcow2`` as a vhost-user-blk device over UNIX
17
+{
21
domain socket ``vhost-user-blk.sock``::
18
+ BlockBackend *blk_a, *blk_b;
22
19
+ BlockDriverState *bs_a, *bs_b, *backing;
23
$ qemu-storage-daemon \
20
+ BDRVTestState *a_s, *b_s, *backing_s;
21
+
22
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
23
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
24
+ &error_abort);
25
+ a_s = bs_a->opaque;
26
+ blk_insert_bs(blk_a, bs_a, &error_abort);
27
+
28
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
29
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
30
+ &error_abort);
31
+ b_s = bs_b->opaque;
32
+ blk_insert_bs(blk_b, bs_b, &error_abort);
33
+
34
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
35
+ backing_s = backing->opaque;
36
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
37
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
38
+
39
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
40
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
41
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(a_s->drain_count, ==, 0);
43
+ g_assert_cmpint(b_s->drain_count, ==, 0);
44
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
45
+
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
47
+
48
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
49
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
50
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
51
+ g_assert_cmpint(a_s->drain_count, ==, 1);
52
+ g_assert_cmpint(b_s->drain_count, ==, 1);
53
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
54
+
55
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
56
+
57
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
58
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
59
+ g_assert_cmpint(backing->quiesce_counter, ==, 2);
60
+ g_assert_cmpint(a_s->drain_count, ==, 2);
61
+ g_assert_cmpint(b_s->drain_count, ==, 2);
62
+ g_assert_cmpint(backing_s->drain_count, ==, 2);
63
+
64
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
65
+
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
69
+ g_assert_cmpint(a_s->drain_count, ==, 1);
70
+ g_assert_cmpint(b_s->drain_count, ==, 1);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
74
+
75
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
76
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
77
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
78
+ g_assert_cmpint(a_s->drain_count, ==, 0);
79
+ g_assert_cmpint(b_s->drain_count, ==, 0);
80
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
81
+
82
+ bdrv_unref(backing);
83
+ bdrv_unref(bs_a);
84
+ bdrv_unref(bs_b);
85
+ blk_unref(blk_a);
86
+ blk_unref(blk_b);
87
+}
88
+
89
90
typedef struct TestBlockJob {
91
BlockJob common;
92
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
93
test_quiesce_co_drain_subtree);
94
95
g_test_add_func("/bdrv-drain/nested", test_nested);
96
+ g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
97
98
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
99
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
24
--
100
--
25
2.31.1
101
2.13.6
26
102
27
103
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
We need to remember how many of the drain sections in which a node is
2
2
were recursive (i.e. subtree drain rather than node drain), so that they
3
First, this permission never protected a node from being changed, as
3
can be correctly applied when children are added or removed during the
4
generic child-replacing functions don't check it.
4
drained section.
5
5
6
Second, it's a strange thing: it presents a permission of parent node
6
With this change, it is safe to modify the graph even inside a
7
to change its child. But generally, children are replaced by different
7
bdrv_subtree_drained_begin/end() section.
8
mechanisms, like jobs or qmp commands, not by nodes.
8
9
10
Graph-mod permission is hard to understand. All other permissions
11
describe operations which done by parent node on its child: read,
12
write, resize. Graph modification operations are something completely
13
different.
14
15
The only place where BLK_PERM_GRAPH_MOD is used as "perm" (not shared
16
perm) is mirror_start_job, for s->target. Still modern code should use
17
bdrv_freeze_backing_chain() to protect from graph modification, if we
18
don't do it somewhere it may be considered as a bug. So, it's a bit
19
risky to drop GRAPH_MOD, and analyzing of possible loss of protection
20
is hard. But one day we should do it, let's do it now.
21
22
One more bit of information is that locking the corresponding byte in
23
file-posix doesn't make sense at all.
24
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26
Message-Id: <20210902093754.2352-1-vsementsov@virtuozzo.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
10
---
29
qapi/block-core.json | 7 ++-----
11
include/block/block.h | 2 --
30
include/block/block.h | 9 +++++----
12
include/block/block_int.h | 5 +++++
31
block.c | 7 +------
13
block.c | 32 +++++++++++++++++++++++++++++---
32
block/commit.c | 1 -
14
block/io.c | 28 ++++++++++++++++++++++++----
33
block/mirror.c | 15 +++------------
15
4 files changed, 58 insertions(+), 9 deletions(-)
34
hw/block/block.c | 3 +--
16
35
scripts/render_block_graph.py | 1 -
36
tests/qemu-iotests/273.out | 4 ----
37
8 files changed, 12 insertions(+), 35 deletions(-)
38
39
diff --git a/qapi/block-core.json b/qapi/block-core.json
40
index XXXXXXX..XXXXXXX 100644
41
--- a/qapi/block-core.json
42
+++ b/qapi/block-core.json
43
@@ -XXX,XX +XXX,XX @@
44
#
45
# @resize: This permission is required to change the size of a block node.
46
#
47
-# @graph-mod: This permission is required to change the node that this
48
-# BdrvChild points to.
49
-#
50
# Since: 4.0
51
##
52
{ 'enum': 'BlockPermission',
53
- 'data': [ 'consistent-read', 'write', 'write-unchanged', 'resize',
54
- 'graph-mod' ] }
55
+ 'data': [ 'consistent-read', 'write', 'write-unchanged', 'resize' ] }
56
+
57
##
58
# @XDbgBlockGraphEdge:
59
#
60
diff --git a/include/block/block.h b/include/block/block.h
17
diff --git a/include/block/block.h b/include/block/block.h
61
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
62
--- a/include/block/block.h
19
--- a/include/block/block.h
63
+++ b/include/block/block.h
20
+++ b/include/block/block.h
64
@@ -XXX,XX +XXX,XX @@ enum {
21
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs);
65
BLK_PERM_RESIZE = 0x08,
22
/**
66
23
* Like bdrv_drained_begin, but recursively begins a quiesced section for
67
/**
24
* exclusive access to all child nodes as well.
68
- * This permission is required to change the node that this BdrvChild
25
- *
69
- * points to.
26
- * Graph changes are not allowed during a subtree drain section.
70
+ * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
27
*/
71
+ * 6.1 and earlier may still lock the corresponding byte in block/file-posix
28
void bdrv_subtree_drained_begin(BlockDriverState *bs);
72
+ * locking. So, implementing some new permission should be very careful to
29
73
+ * not interfere with this old unused thing.
30
diff --git a/include/block/block_int.h b/include/block/block_int.h
74
*/
31
index XXXXXXX..XXXXXXX 100644
75
- BLK_PERM_GRAPH_MOD = 0x10,
32
--- a/include/block/block_int.h
76
33
+++ b/include/block/block_int.h
77
- BLK_PERM_ALL = 0x1f,
34
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
78
+ BLK_PERM_ALL = 0x0f,
35
79
36
/* Accessed with atomic ops. */
80
DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
37
int quiesce_counter;
81
| BLK_PERM_WRITE
38
+ int recursive_quiesce_counter;
39
+
40
unsigned int write_gen; /* Current data generation */
41
42
/* Protected by reqs_lock. */
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
44
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
45
BdrvRequestFlags flags);
46
47
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
48
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
49
+
50
int get_tmp_filename(char *filename, int size);
51
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
52
const char *filename);
82
diff --git a/block.c b/block.c
53
diff --git a/block.c b/block.c
83
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
84
--- a/block.c
55
--- a/block.c
85
+++ b/block.c
56
+++ b/block.c
86
@@ -XXX,XX +XXX,XX @@ char *bdrv_perm_names(uint64_t perm)
57
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
87
{ BLK_PERM_WRITE, "write" },
58
bdrv_drained_end(bs);
88
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
59
}
89
{ BLK_PERM_RESIZE, "resize" },
60
90
- { BLK_PERM_GRAPH_MOD, "change children" },
61
+static void bdrv_child_cb_attach(BdrvChild *child)
91
{ 0, NULL }
62
+{
92
};
63
+ BlockDriverState *bs = child->opaque;
93
64
+ bdrv_apply_subtree_drain(child, bs);
94
@@ -XXX,XX +XXX,XX @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
65
+}
95
shared = 0;
66
+
67
+static void bdrv_child_cb_detach(BdrvChild *child)
68
+{
69
+ BlockDriverState *bs = child->opaque;
70
+ bdrv_unapply_subtree_drain(child, bs);
71
+}
72
+
73
static int bdrv_child_cb_inactivate(BdrvChild *child)
74
{
75
BlockDriverState *bs = child->opaque;
76
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = {
77
.inherit_options = bdrv_inherited_options,
78
.drained_begin = bdrv_child_cb_drained_begin,
79
.drained_end = bdrv_child_cb_drained_end,
80
+ .attach = bdrv_child_cb_attach,
81
+ .detach = bdrv_child_cb_detach,
82
.inactivate = bdrv_child_cb_inactivate,
83
};
84
85
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = {
86
.inherit_options = bdrv_inherited_fmt_options,
87
.drained_begin = bdrv_child_cb_drained_begin,
88
.drained_end = bdrv_child_cb_drained_end,
89
+ .attach = bdrv_child_cb_attach,
90
+ .detach = bdrv_child_cb_detach,
91
.inactivate = bdrv_child_cb_inactivate,
92
};
93
94
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c)
95
parent->backing_blocker);
96
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
97
parent->backing_blocker);
98
+
99
+ bdrv_child_cb_attach(c);
100
}
101
102
static void bdrv_backing_detach(BdrvChild *c)
103
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c)
104
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
105
error_free(parent->backing_blocker);
106
parent->backing_blocker = NULL;
107
+
108
+ bdrv_child_cb_detach(c);
109
}
110
111
/*
112
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
113
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
96
}
114
}
97
115
if (old_bs) {
98
- shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
116
+ /* Detach first so that the recursive drain sections coming from @child
99
- BLK_PERM_WRITE_UNCHANGED;
117
+ * are already gone and we only end the drain sections that came from
100
+ shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
118
+ * elsewhere. */
101
119
+ if (child->role->detach) {
102
if (bs->open_flags & BDRV_O_INACTIVE) {
120
+ child->role->detach(child);
103
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
121
+ }
104
@@ -XXX,XX +XXX,XX @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
122
if (old_bs->quiesce_counter && child->role->drained_end) {
105
[BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
123
for (i = 0; i < old_bs->quiesce_counter; i++) {
106
[BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
124
child->role->drained_end(child);
107
[BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
125
}
108
- [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD,
126
}
109
};
127
- if (child->role->detach) {
110
128
- child->role->detach(child);
111
QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
129
- }
112
@@ -XXX,XX +XXX,XX @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
130
QLIST_REMOVE(child, next_parent);
113
update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
114
115
/* success - we can delete the intermediate states, and link top->base */
116
- /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
117
- * we've figured out how they should work. */
118
if (!backing_file_str) {
119
bdrv_refresh_filename(base);
120
backing_file_str = base->filename;
121
diff --git a/block/commit.c b/block/commit.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/block/commit.c
124
+++ b/block/commit.c
125
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
126
s->base = blk_new(s->common.job.aio_context,
127
base_perms,
128
BLK_PERM_CONSISTENT_READ
129
- | BLK_PERM_GRAPH_MOD
130
| BLK_PERM_WRITE_UNCHANGED);
131
ret = blk_insert_bs(s->base, base, errp);
132
if (ret < 0) {
133
diff --git a/block/mirror.c b/block/mirror.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/mirror.c
136
+++ b/block/mirror.c
137
@@ -XXX,XX +XXX,XX @@ static void mirror_complete(Job *job, Error **errp)
138
replace_aio_context = bdrv_get_aio_context(s->to_replace);
139
aio_context_acquire(replace_aio_context);
140
141
- /* TODO Translate this into permission system. Current definition of
142
- * GRAPH_MOD would require to request it for the parents; they might
143
- * not even be BlockDriverStates, however, so a BdrvChild can't address
144
- * them. May need redefinition of GRAPH_MOD. */
145
+ /* TODO Translate this into child freeze system. */
146
error_setg(&s->replace_blocker,
147
"block device is in use by block-job-complete");
148
bdrv_op_block_all(s->to_replace, s->replace_blocker);
149
@@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job(
150
s = block_job_create(job_id, driver, NULL, mirror_top_bs,
151
BLK_PERM_CONSISTENT_READ,
152
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
153
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
154
+ BLK_PERM_WRITE, speed,
155
creation_flags, cb, opaque, errp);
156
if (!s) {
157
goto fail;
158
@@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job(
159
target_perms |= BLK_PERM_RESIZE;
160
}
161
162
- target_shared_perms |= BLK_PERM_CONSISTENT_READ
163
- | BLK_PERM_WRITE
164
- | BLK_PERM_GRAPH_MOD;
165
+ target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
166
} else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
167
/*
168
* We may want to allow this in the future, but it would
169
@@ -XXX,XX +XXX,XX @@ static BlockJob *mirror_start_job(
170
goto fail;
171
}
131
}
172
132
173
- if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
133
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
174
- target_perms |= BLK_PERM_GRAPH_MOD;
134
}
175
- }
135
}
176
-
136
177
s->target = blk_new(s->common.job.aio_context,
137
+ /* Attach only after starting new drained sections, so that recursive
178
target_perms, target_shared_perms);
138
+ * drain sections coming from @child don't get an extra .drained_begin
179
ret = blk_insert_bs(s->target, target, errp);
139
+ * callback. */
180
diff --git a/hw/block/block.c b/hw/block/block.c
140
if (child->role->attach) {
181
index XXXXXXX..XXXXXXX 100644
141
child->role->attach(child);
182
--- a/hw/block/block.c
142
}
183
+++ b/hw/block/block.c
143
diff --git a/block/io.c b/block/io.c
184
@@ -XXX,XX +XXX,XX @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
144
index XXXXXXX..XXXXXXX 100644
185
perm |= BLK_PERM_WRITE;
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
148
assert(data.done);
149
}
150
151
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
152
- BdrvChild *parent)
153
+void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
154
+ BdrvChild *parent)
155
{
156
BdrvChild *child, *next;
157
158
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
159
bdrv_drain_recurse(bs);
160
161
if (recursive) {
162
+ bs->recursive_quiesce_counter++;
163
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
164
bdrv_do_drained_begin(child->bs, true, child);
165
}
166
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
167
bdrv_do_drained_begin(bs, true, NULL);
168
}
169
170
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
171
- BdrvChild *parent)
172
+void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
173
+ BdrvChild *parent)
174
{
175
BdrvChild *child, *next;
176
int old_quiesce_counter;
177
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
186
}
178
}
187
179
188
- shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
180
if (recursive) {
189
- BLK_PERM_GRAPH_MOD;
181
+ bs->recursive_quiesce_counter--;
190
+ shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
182
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
191
if (resizable) {
183
bdrv_do_drained_end(child->bs, true, child);
192
shared_perm |= BLK_PERM_RESIZE;
184
}
193
}
185
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
194
diff --git a/scripts/render_block_graph.py b/scripts/render_block_graph.py
186
bdrv_do_drained_end(bs, true, NULL);
195
index XXXXXXX..XXXXXXX 100755
187
}
196
--- a/scripts/render_block_graph.py
188
197
+++ b/scripts/render_block_graph.py
189
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
198
@@ -XXX,XX +XXX,XX @@ def perm(arr):
190
+{
199
s = 'w' if 'write' in arr else '_'
191
+ int i;
200
s += 'r' if 'consistent-read' in arr else '_'
192
+
201
s += 'u' if 'write-unchanged' in arr else '_'
193
+ for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
202
- s += 'g' if 'graph-mod' in arr else '_'
194
+ bdrv_do_drained_begin(child->bs, true, child);
203
s += 's' if 'resize' in arr else '_'
195
+ }
204
return s
196
+}
205
197
+
206
diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out
198
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
207
index XXXXXXX..XXXXXXX 100644
199
+{
208
--- a/tests/qemu-iotests/273.out
200
+ int i;
209
+++ b/tests/qemu-iotests/273.out
201
+
210
@@ -XXX,XX +XXX,XX @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
202
+ for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
211
"name": "file",
203
+ bdrv_do_drained_end(child->bs, true, child);
212
"parent": 5,
204
+ }
213
"shared-perm": [
205
+}
214
- "graph-mod",
206
+
215
"write-unchanged",
207
/*
216
"consistent-read"
208
* Wait for pending requests to complete on a single BlockDriverState subtree,
217
],
209
* and suspend block driver's internal I/O until next request arrives.
218
@@ -XXX,XX +XXX,XX @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
219
"name": "backing",
220
"parent": 5,
221
"shared-perm": [
222
- "graph-mod",
223
"resize",
224
"write-unchanged",
225
"write",
226
@@ -XXX,XX +XXX,XX @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
227
"name": "file",
228
"parent": 3,
229
"shared-perm": [
230
- "graph-mod",
231
"write-unchanged",
232
"consistent-read"
233
],
234
@@ -XXX,XX +XXX,XX @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
235
"name": "backing",
236
"parent": 3,
237
"shared-perm": [
238
- "graph-mod",
239
"resize",
240
"write-unchanged",
241
"write",
242
--
210
--
243
2.31.1
211
2.13.6
244
212
245
213
diff view generated by jsdifflib
1
From: Daniel P. Berrangé <berrange@redhat.com>
2
3
The -device JSON syntax impl leaks a reference on the created
4
DeviceState instance. As a result when you hot-unplug the
5
device, the device_finalize method won't be called and thus
6
it will fail to emit the required DEVICE_DELETED event.
7
8
A 'json-cli' feature was previously added against the
9
'device_add' QMP command QAPI schema to indicated to mgmt
10
apps that -device supported JSON syntax. Given the hotplug
11
bug that feature flag is not usable for its purpose, so
12
we add a new 'json-cli-hotplug' feature to indicate the
13
-device supports JSON without breaking hotplug.
14
15
Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e
16
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802
17
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
18
Message-Id: <20220105123847.4047954-2-berrange@redhat.com>
19
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
20
Tested-by: Ján Tomko <jtomko@redhat.com>
21
Reviewed-by: Thomas Huth <thuth@redhat.com>
22
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
---
2
---
24
qapi/qdev.json | 5 ++++-
3
tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
25
softmmu/vl.c | 4 +++-
4
1 file changed, 80 insertions(+)
26
tests/qtest/device-plug-test.c | 19 +++++++++++++++++++
27
3 files changed, 26 insertions(+), 2 deletions(-)
28
5
29
diff --git a/qapi/qdev.json b/qapi/qdev.json
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
30
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
31
--- a/qapi/qdev.json
8
--- a/tests/test-bdrv-drain.c
32
+++ b/qapi/qdev.json
9
+++ b/tests/test-bdrv-drain.c
33
@@ -XXX,XX +XXX,XX @@
10
@@ -XXX,XX +XXX,XX @@ static void test_multiparent(void)
34
# @json-cli: If present, the "-device" command line option supports JSON
11
blk_unref(blk_b);
35
# syntax with a structure identical to the arguments of this
36
# command.
37
+# @json-cli-hotplug: If present, the "-device" command line option supports JSON
38
+# syntax without the reference counting leak that broke
39
+# hot-unplug
40
#
41
# Notes:
42
#
43
@@ -XXX,XX +XXX,XX @@
44
{ 'command': 'device_add',
45
'data': {'driver': 'str', '*bus': 'str', '*id': 'str'},
46
'gen': false, # so we can get the additional arguments
47
- 'features': ['json-cli'] }
48
+ 'features': ['json-cli', 'json-cli-hotplug'] }
49
50
##
51
# @device_del:
52
diff --git a/softmmu/vl.c b/softmmu/vl.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/softmmu/vl.c
55
+++ b/softmmu/vl.c
56
@@ -XXX,XX +XXX,XX @@ static void qemu_create_cli_devices(void)
57
qemu_opts_foreach(qemu_find_opts("device"),
58
device_init_func, NULL, &error_fatal);
59
QTAILQ_FOREACH(opt, &device_opts, next) {
60
+ DeviceState *dev;
61
loc_push_restore(&opt->loc);
62
/*
63
* TODO Eventually we should call qmp_device_add() here to make sure it
64
@@ -XXX,XX +XXX,XX @@ static void qemu_create_cli_devices(void)
65
* from the start, so call qdev_device_add_from_qdict() directly for
66
* now.
67
*/
68
- qdev_device_add_from_qdict(opt->opts, true, &error_fatal);
69
+ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal);
70
+ object_unref(OBJECT(dev));
71
loc_pop(&opt->loc);
72
}
73
rom_reset_order_override();
74
diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/tests/qtest/device-plug-test.c
77
+++ b/tests/qtest/device-plug-test.c
78
@@ -XXX,XX +XXX,XX @@ static void test_pci_unplug_request(void)
79
qtest_quit(qtest);
80
}
12
}
81
13
82
+static void test_pci_unplug_json_request(void)
14
+static void test_graph_change(void)
83
+{
15
+{
84
+ QTestState *qtest = qtest_initf(
16
+ BlockBackend *blk_a, *blk_b;
85
+ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'");
17
+ BlockDriverState *bs_a, *bs_b, *backing;
18
+ BDRVTestState *a_s, *b_s, *backing_s;
86
+
19
+
87
+ /*
20
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
88
+ * Request device removal. As the guest is not running, the request won't
21
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
89
+ * be processed. However during system reset, the removal will be
22
+ &error_abort);
90
+ * handled, removing the device.
23
+ a_s = bs_a->opaque;
91
+ */
24
+ blk_insert_bs(blk_a, bs_a, &error_abort);
92
+ device_del(qtest, "dev0");
93
+ system_reset(qtest);
94
+ wait_device_deleted_event(qtest, "dev0");
95
+
25
+
96
+ qtest_quit(qtest);
26
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
27
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
28
+ &error_abort);
29
+ b_s = bs_b->opaque;
30
+ blk_insert_bs(blk_b, bs_b, &error_abort);
31
+
32
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
33
+ backing_s = backing->opaque;
34
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
35
+
36
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
37
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
38
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
39
+ g_assert_cmpint(a_s->drain_count, ==, 0);
40
+ g_assert_cmpint(b_s->drain_count, ==, 0);
41
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
42
+
43
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
44
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
45
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
47
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
48
+
49
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
50
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
51
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
52
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
53
+ g_assert_cmpint(a_s->drain_count, ==, 5);
54
+ g_assert_cmpint(b_s->drain_count, ==, 5);
55
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
56
+
57
+ bdrv_set_backing_hd(bs_b, NULL, &error_abort);
58
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
59
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
60
+ g_assert_cmpint(backing->quiesce_counter, ==, 3);
61
+ g_assert_cmpint(a_s->drain_count, ==, 3);
62
+ g_assert_cmpint(b_s->drain_count, ==, 2);
63
+ g_assert_cmpint(backing_s->drain_count, ==, 3);
64
+
65
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
69
+ g_assert_cmpint(a_s->drain_count, ==, 5);
70
+ g_assert_cmpint(b_s->drain_count, ==, 5);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
74
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
75
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
76
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
77
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
78
+
79
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
80
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
81
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
82
+ g_assert_cmpint(a_s->drain_count, ==, 0);
83
+ g_assert_cmpint(b_s->drain_count, ==, 0);
84
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
85
+
86
+ bdrv_unref(backing);
87
+ bdrv_unref(bs_a);
88
+ bdrv_unref(bs_b);
89
+ blk_unref(blk_a);
90
+ blk_unref(blk_b);
97
+}
91
+}
98
+
92
+
99
static void test_ccw_unplug(void)
93
100
{
94
typedef struct TestBlockJob {
101
QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0");
95
BlockJob common;
102
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
96
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
103
*/
97
104
qtest_add_func("/device-plug/pci-unplug-request",
98
g_test_add_func("/bdrv-drain/nested", test_nested);
105
test_pci_unplug_request);
99
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
106
+ qtest_add_func("/device-plug/pci-unplug-json-request",
100
+ g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
107
+ test_pci_unplug_json_request);
101
108
102
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
109
if (!strcmp(arch, "s390x")) {
103
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
110
qtest_add_func("/device-plug/ccw-unplug",
111
--
104
--
112
2.31.1
105
2.13.6
113
106
114
107
diff view generated by jsdifflib
New patch
1
Since commit bde70715, base is the only node that is reopened in
2
commit_start(). This means that the code, which still involves an
3
explicit BlockReopenQueue, can now be simplified by using bdrv_reopen().
1
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Fam Zheng <famz@redhat.com>
7
---
8
block/commit.c | 8 +-------
9
1 file changed, 1 insertion(+), 7 deletions(-)
10
11
diff --git a/block/commit.c b/block/commit.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/block/commit.c
14
+++ b/block/commit.c
15
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
16
const char *filter_node_name, Error **errp)
17
{
18
CommitBlockJob *s;
19
- BlockReopenQueue *reopen_queue = NULL;
20
int orig_base_flags;
21
BlockDriverState *iter;
22
BlockDriverState *commit_top_bs = NULL;
23
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
24
/* convert base to r/w, if necessary */
25
orig_base_flags = bdrv_get_flags(base);
26
if (!(orig_base_flags & BDRV_O_RDWR)) {
27
- reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
28
- orig_base_flags | BDRV_O_RDWR);
29
- }
30
-
31
- if (reopen_queue) {
32
- bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
33
+ bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
34
if (local_err != NULL) {
35
error_propagate(errp, local_err);
36
goto fail;
37
--
38
2.13.6
39
40
diff view generated by jsdifflib
1
This demonstrates what happens when the block status changes in
1
The bdrv_reopen*() implementation doesn't like it if the graph is
2
sub-min_sparse granularity, but all of the parts are zeroed out. The
2
changed between queuing nodes for reopen and actually reopening them
3
alignment logic in is_allocated_sectors() prevents that the target image
3
(one of the reasons is that queuing can be recursive).
4
remains fully sparse as expected, but turns it into a data cluster of
4
5
explicit zeros.
5
So instead of draining the device only in bdrv_reopen_multiple(),
6
require that callers already drained all affected nodes, and assert this
7
in bdrv_reopen_queue().
6
8
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Message-Id: <20211217164654.1184218-2-vsementsov@virtuozzo.com>
10
Tested-by: Peter Lieven <pl@kamp.de>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
11
---
13
tests/qemu-iotests/122 | 1 +
12
block.c | 23 ++++++++++++++++-------
14
tests/qemu-iotests/122.out | 10 ++++++++--
13
block/replication.c | 6 ++++++
15
2 files changed, 9 insertions(+), 2 deletions(-)
14
qemu-io-cmds.c | 3 +++
15
3 files changed, 25 insertions(+), 7 deletions(-)
16
16
17
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
17
diff --git a/block.c b/block.c
18
index XXXXXXX..XXXXXXX 100755
19
--- a/tests/qemu-iotests/122
20
+++ b/tests/qemu-iotests/122
21
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c "write -P 0 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_test
22
$QEMU_IO -c "write 0 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
23
$QEMU_IO -c "write 8k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
24
$QEMU_IO -c "write 17k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
25
+$QEMU_IO -c "write -P 0 65k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
26
27
for min_sparse in 4k 8k; do
28
echo
29
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
30
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
31
--- a/tests/qemu-iotests/122.out
19
--- a/block.c
32
+++ b/tests/qemu-iotests/122.out
20
+++ b/block.c
33
@@ -XXX,XX +XXX,XX @@ wrote 1024/1024 bytes at offset 8192
21
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
34
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
22
* returns a pointer to bs_queue, which is either the newly allocated
35
wrote 1024/1024 bytes at offset 17408
23
* bs_queue, or the existing bs_queue being used.
36
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
24
*
37
+wrote 1024/1024 bytes at offset 66560
25
+ * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
38
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
26
*/
39
27
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
40
convert -S 4k
28
BlockDriverState *bs,
41
[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
29
@@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
42
@@ -XXX,XX +XXX,XX @@ convert -S 4k
30
BdrvChild *child;
43
{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
31
QDict *old_options, *explicit_options;
44
{ "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
32
45
{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
33
+ /* Make sure that the caller remembered to use a drained section. This is
46
-{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}]
34
+ * important to avoid graph changes between the recursive queuing here and
47
+{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false},
35
+ * bdrv_reopen_multiple(). */
48
+{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
36
+ assert(bs->quiesce_counter > 0);
49
+{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}]
37
+
50
38
if (bs_queue == NULL) {
51
convert -c -S 4k
39
bs_queue = g_new0(BlockReopenQueue, 1);
52
[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
40
QSIMPLEQ_INIT(bs_queue);
53
@@ -XXX,XX +XXX,XX @@ convert -c -S 4k
41
@@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
54
42
* If all devices prepare successfully, then the changes are committed
55
convert -S 8k
43
* to all devices.
56
[{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
44
*
57
-{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}]
45
+ * All affected nodes must be drained between bdrv_reopen_queue() and
58
+{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false},
46
+ * bdrv_reopen_multiple().
59
+{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
47
*/
60
+{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}]
48
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
61
49
{
62
convert -c -S 8k
50
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
63
[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
51
52
assert(bs_queue != NULL);
53
54
- aio_context_release(ctx);
55
- bdrv_drain_all_begin();
56
- aio_context_acquire(ctx);
57
-
58
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
59
+ assert(bs_entry->state.bs->quiesce_counter > 0);
60
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
61
error_propagate(errp, local_err);
62
goto cleanup;
63
@@ -XXX,XX +XXX,XX @@ cleanup:
64
}
65
g_free(bs_queue);
66
67
- bdrv_drain_all_end();
68
-
69
return ret;
70
}
71
72
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
73
{
74
int ret = -1;
75
Error *local_err = NULL;
76
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
77
+ BlockReopenQueue *queue;
78
79
+ bdrv_subtree_drained_begin(bs);
80
+
81
+ queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
82
ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
83
if (local_err != NULL) {
84
error_propagate(errp, local_err);
85
}
86
+
87
+ bdrv_subtree_drained_end(bs);
88
+
89
return ret;
90
}
91
92
diff --git a/block/replication.c b/block/replication.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/replication.c
95
+++ b/block/replication.c
96
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
97
new_secondary_flags = s->orig_secondary_flags;
98
}
99
100
+ bdrv_subtree_drained_begin(s->hidden_disk->bs);
101
+ bdrv_subtree_drained_begin(s->secondary_disk->bs);
102
+
103
if (orig_hidden_flags != new_hidden_flags) {
104
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
105
new_hidden_flags);
106
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
107
reopen_queue, &local_err);
108
error_propagate(errp, local_err);
109
}
110
+
111
+ bdrv_subtree_drained_end(s->hidden_disk->bs);
112
+ bdrv_subtree_drained_end(s->secondary_disk->bs);
113
}
114
115
static void backup_job_cleanup(BlockDriverState *bs)
116
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/qemu-io-cmds.c
119
+++ b/qemu-io-cmds.c
120
@@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
121
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
122
qemu_opts_reset(&reopen_opts);
123
124
+ bdrv_subtree_drained_begin(bs);
125
brq = bdrv_reopen_queue(NULL, bs, opts, flags);
126
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
127
+ bdrv_subtree_drained_end(bs);
128
+
129
if (local_err) {
130
error_report_err(local_err);
131
} else {
64
--
132
--
65
2.31.1
133
2.13.6
66
134
67
135
diff view generated by jsdifflib