1
The following changes since commit 2ef2f16781af9dee6ba6517755e9073ba5799fa2:
1
The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e:
2
2
3
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180615a' into staging (2018-06-15 18:13:35 +0100)
3
Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 4c790afe2503eab12874508acab5b388d7babfd2:
9
for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809:
10
10
11
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block (2018-06-18 17:20:42 +0200)
11
qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches:
15
15
16
- Active mirror (blockdev-mirror copy-mode=write-blocking)
16
- Fix resize (extending) of short overlays
17
- bdrv_drain_*() fixes and test cases
17
- nvme: introduce PMR support from NVMe 1.4 spec
18
- Fix crash with scsi-hd and drive_del
18
- qemu-storage-daemon: Fix non-string --object properties
19
19
20
----------------------------------------------------------------
20
----------------------------------------------------------------
21
Greg Kurz (1):
21
Alberto Garcia (1):
22
block: fix QEMU crash with scsi-hd and drive_del
22
qcow2: Add incompatibility note between backing files and raw external data files
23
23
24
Kevin Wolf (20):
24
Andrzej Jakowski (1):
25
test-bdrv-drain: bdrv_drain() works with cross-AioContext events
25
nvme: introduce PMR support from NVMe 1.4 spec
26
block: Use bdrv_do_drain_begin/end in bdrv_drain_all()
27
block: Remove 'recursive' parameter from bdrv_drain_invoke()
28
block: Don't manually poll in bdrv_drain_all()
29
tests/test-bdrv-drain: bdrv_drain_all() works in coroutines now
30
block: Avoid unnecessary aio_poll() in AIO_WAIT_WHILE()
31
block: Really pause block jobs on drain
32
block: Remove bdrv_drain_recurse()
33
block: Drain recursively with a single BDRV_POLL_WHILE()
34
test-bdrv-drain: Test node deletion in subtree recursion
35
block: Don't poll in parent drain callbacks
36
test-bdrv-drain: Graph change through parent callback
37
block: Defer .bdrv_drain_begin callback to polling phase
38
test-bdrv-drain: Test that bdrv_drain_invoke() doesn't poll
39
block: Allow AIO_WAIT_WHILE with NULL ctx
40
block: Move bdrv_drain_all_begin() out of coroutine context
41
block: ignore_bds_parents parameter for drain functions
42
block: Allow graph changes in bdrv_drain_all_begin/end sections
43
test-bdrv-drain: Test graph changes in drain_all section
44
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block
45
26
46
Max Reitz (15):
27
Kevin Wolf (12):
47
test-bdrv-drain: Add test for node deletion
28
block: Add flags to BlockDriver.bdrv_co_truncate()
48
block/mirror: Pull out mirror_perform()
29
block: Add flags to bdrv(_co)_truncate()
49
block/mirror: Convert to coroutines
30
block-backend: Add flags to blk_truncate()
50
block/mirror: Use CoQueue to wait on in-flight ops
31
qcow2: Support BDRV_REQ_ZERO_WRITE for truncate
51
block/mirror: Wait for in-flight op conflicts
32
raw-format: Support BDRV_REQ_ZERO_WRITE for truncate
52
block/mirror: Use source as a BdrvChild
33
file-posix: Support BDRV_REQ_ZERO_WRITE for truncate
53
block: Generalize should_update_child() rule
34
block: truncate: Don't make backing file data visible
54
hbitmap: Add @advance param to hbitmap_iter_next()
35
iotests: Filter testfiles out in filter_img_info()
55
test-hbitmap: Add non-advancing iter_next tests
36
iotests: Test committing to short backing file
56
block/dirty-bitmap: Add bdrv_dirty_iter_next_area
37
qcow2: Forward ZERO_WRITE flag for full preallocation
57
block/mirror: Add MirrorBDSOpaque
38
qom: Factor out user_creatable_add_dict()
58
job: Add job_progress_increase_remaining()
39
qemu-storage-daemon: Fix non-string --object properties
59
block/mirror: Add active mirroring
60
block/mirror: Add copy mode QAPI interface
61
iotests: Add test for active mirroring
62
40
63
qapi/block-core.json | 29 +-
41
Paolo Bonzini (1):
64
include/block/aio-wait.h | 25 +-
42
qemu-iotests: allow qcow2 external discarded clusters to contain stale data
65
include/block/block.h | 31 +-
66
include/block/block_int.h | 18 +-
67
include/block/blockjob_int.h | 8 +
68
include/block/dirty-bitmap.h | 2 +
69
include/qemu/hbitmap.h | 5 +-
70
include/qemu/job.h | 15 +
71
block.c | 96 +++++-
72
block/backup.c | 2 +-
73
block/block-backend.c | 5 +
74
block/dirty-bitmap.c | 57 +++-
75
block/io.c | 332 ++++++++++++--------
76
block/mirror.c | 613 +++++++++++++++++++++++++++++--------
77
block/vvfat.c | 1 +
78
blockdev.c | 9 +-
79
blockjob.c | 23 ++
80
job.c | 5 +
81
tests/test-bdrv-drain.c | 705 +++++++++++++++++++++++++++++++++++++++++--
82
tests/test-hbitmap.c | 38 ++-
83
util/hbitmap.c | 10 +-
84
tests/qemu-iotests/151 | 120 ++++++++
85
tests/qemu-iotests/151.out | 5 +
86
tests/qemu-iotests/group | 1 +
87
24 files changed, 1836 insertions(+), 319 deletions(-)
88
create mode 100755 tests/qemu-iotests/151
89
create mode 100644 tests/qemu-iotests/151.out
90
43
44
docs/interop/qcow2.txt | 3 +
45
hw/block/nvme.h | 2 +
46
include/block/block.h | 5 +-
47
include/block/block_int.h | 10 +-
48
include/block/nvme.h | 172 ++++++++++++++++++++++++++
49
include/qom/object_interfaces.h | 16 +++
50
include/sysemu/block-backend.h | 2 +-
51
block.c | 3 +-
52
block/block-backend.c | 4 +-
53
block/commit.c | 4 +-
54
block/crypto.c | 7 +-
55
block/file-posix.c | 6 +-
56
block/file-win32.c | 2 +-
57
block/gluster.c | 1 +
58
block/io.c | 43 ++++++-
59
block/iscsi.c | 2 +-
60
block/mirror.c | 2 +-
61
block/nfs.c | 3 +-
62
block/parallels.c | 6 +-
63
block/qcow.c | 4 +-
64
block/qcow2-cluster.c | 2 +-
65
block/qcow2-refcount.c | 2 +-
66
block/qcow2.c | 73 +++++++++--
67
block/qed.c | 3 +-
68
block/raw-format.c | 6 +-
69
block/rbd.c | 1 +
70
block/sheepdog.c | 4 +-
71
block/ssh.c | 2 +-
72
block/vdi.c | 2 +-
73
block/vhdx-log.c | 2 +-
74
block/vhdx.c | 6 +-
75
block/vmdk.c | 8 +-
76
block/vpc.c | 2 +-
77
blockdev.c | 2 +-
78
hw/block/nvme.c | 109 ++++++++++++++++
79
qemu-img.c | 2 +-
80
qemu-io-cmds.c | 2 +-
81
qemu-storage-daemon.c | 4 +-
82
qom/object_interfaces.c | 31 +++++
83
qom/qom-qmp-cmds.c | 24 +---
84
tests/test-block-iothread.c | 9 +-
85
tests/qemu-iotests/iotests.py | 5 +-
86
hw/block/Makefile.objs | 2 +-
87
hw/block/trace-events | 4 +
88
tests/qemu-iotests/244 | 10 +-
89
tests/qemu-iotests/244.out | 9 +-
90
tests/qemu-iotests/274 | 155 +++++++++++++++++++++++
91
tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++
92
tests/qemu-iotests/group | 1 +
93
49 files changed, 951 insertions(+), 96 deletions(-)
94
create mode 100755 tests/qemu-iotests/274
95
create mode 100644 tests/qemu-iotests/274.out
96
97
diff view generated by jsdifflib
Deleted patch
1
As long as nobody keeps the other I/O thread from working, there is no
2
reason why bdrv_drain() wouldn't work with cross-AioContext events. The
3
key is that the root request we're waiting for is in the AioContext
4
we're polling (which it always is for bdrv_drain()) so that aio_poll()
5
is woken up in the end.
6
1
7
Add a test case that shows that it works. Remove the comment in
8
bdrv_drain() that claims otherwise.
9
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
block/io.c | 4 --
13
tests/test-bdrv-drain.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
14
2 files changed, 186 insertions(+), 5 deletions(-)
15
16
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/io.c
19
+++ b/block/io.c
20
@@ -XXX,XX +XXX,XX @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
21
*
22
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
23
* AioContext.
24
- *
25
- * Only this BlockDriverState's AioContext is run, so in-flight requests must
26
- * not depend on events in other AioContexts. In that case, use
27
- * bdrv_drain_all() instead.
28
*/
29
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
30
{
31
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tests/test-bdrv-drain.c
34
+++ b/tests/test-bdrv-drain.c
35
@@ -XXX,XX +XXX,XX @@
36
#include "block/blockjob_int.h"
37
#include "sysemu/block-backend.h"
38
#include "qapi/error.h"
39
+#include "iothread.h"
40
+
41
+static QemuEvent done_event;
42
43
typedef struct BDRVTestState {
44
int drain_count;
45
+ AioContext *bh_indirection_ctx;
46
} BDRVTestState;
47
48
static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
49
@@ -XXX,XX +XXX,XX @@ static void bdrv_test_close(BlockDriverState *bs)
50
g_assert_cmpint(s->drain_count, >, 0);
51
}
52
53
+static void co_reenter_bh(void *opaque)
54
+{
55
+ aio_co_wake(opaque);
56
+}
57
+
58
static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
59
uint64_t offset, uint64_t bytes,
60
QEMUIOVector *qiov, int flags)
61
{
62
+ BDRVTestState *s = bs->opaque;
63
+
64
/* We want this request to stay until the polling loop in drain waits for
65
* it to complete. We need to sleep a while as bdrv_drain_invoke() comes
66
* first and polls its result, too, but it shouldn't accidentally complete
67
* this request yet. */
68
qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
69
70
+ if (s->bh_indirection_ctx) {
71
+ aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh,
72
+ qemu_coroutine_self());
73
+ qemu_coroutine_yield();
74
+ }
75
+
76
return 0;
77
}
78
79
@@ -XXX,XX +XXX,XX @@ static void test_graph_change(void)
80
blk_unref(blk_b);
81
}
82
83
+struct test_iothread_data {
84
+ BlockDriverState *bs;
85
+ enum drain_type drain_type;
86
+ int *aio_ret;
87
+};
88
+
89
+static void test_iothread_drain_entry(void *opaque)
90
+{
91
+ struct test_iothread_data *data = opaque;
92
+
93
+ aio_context_acquire(bdrv_get_aio_context(data->bs));
94
+ do_drain_begin(data->drain_type, data->bs);
95
+ g_assert_cmpint(*data->aio_ret, ==, 0);
96
+ do_drain_end(data->drain_type, data->bs);
97
+ aio_context_release(bdrv_get_aio_context(data->bs));
98
+
99
+ qemu_event_set(&done_event);
100
+}
101
+
102
+static void test_iothread_aio_cb(void *opaque, int ret)
103
+{
104
+ int *aio_ret = opaque;
105
+ *aio_ret = ret;
106
+ qemu_event_set(&done_event);
107
+}
108
+
109
+/*
110
+ * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
111
+ * The request involves a BH on iothread 2 before it can complete.
112
+ *
113
+ * @drain_thread = 0 means that do_drain_begin/end are called from the main
114
+ * thread, @drain_thread = 1 means that they are called from iothread 1. Drain
115
+ * for this BDS cannot be called from iothread 2 because only the main thread
116
+ * may do cross-AioContext polling.
117
+ */
118
+static void test_iothread_common(enum drain_type drain_type, int drain_thread)
119
+{
120
+ BlockBackend *blk;
121
+ BlockDriverState *bs;
122
+ BDRVTestState *s;
123
+ BlockAIOCB *acb;
124
+ int aio_ret;
125
+ struct test_iothread_data data;
126
+
127
+ IOThread *a = iothread_new();
128
+ IOThread *b = iothread_new();
129
+ AioContext *ctx_a = iothread_get_aio_context(a);
130
+ AioContext *ctx_b = iothread_get_aio_context(b);
131
+
132
+ QEMUIOVector qiov;
133
+ struct iovec iov = {
134
+ .iov_base = NULL,
135
+ .iov_len = 0,
136
+ };
137
+ qemu_iovec_init_external(&qiov, &iov, 1);
138
+
139
+ /* bdrv_drain_all() may only be called from the main loop thread */
140
+ if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) {
141
+ goto out;
142
+ }
143
+
144
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
145
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
146
+ &error_abort);
147
+ s = bs->opaque;
148
+ blk_insert_bs(blk, bs, &error_abort);
149
+
150
+ blk_set_aio_context(blk, ctx_a);
151
+ aio_context_acquire(ctx_a);
152
+
153
+ s->bh_indirection_ctx = ctx_b;
154
+
155
+ aio_ret = -EINPROGRESS;
156
+ if (drain_thread == 0) {
157
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret);
158
+ } else {
159
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
160
+ }
161
+ g_assert(acb != NULL);
162
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
163
+
164
+ aio_context_release(ctx_a);
165
+
166
+ data = (struct test_iothread_data) {
167
+ .bs = bs,
168
+ .drain_type = drain_type,
169
+ .aio_ret = &aio_ret,
170
+ };
171
+
172
+ switch (drain_thread) {
173
+ case 0:
174
+ if (drain_type != BDRV_DRAIN_ALL) {
175
+ aio_context_acquire(ctx_a);
176
+ }
177
+
178
+ /* The request is running on the IOThread a. Draining its block device
179
+ * will make sure that it has completed as far as the BDS is concerned,
180
+ * but the drain in this thread can continue immediately after
181
+ * bdrv_dec_in_flight() and aio_ret might be assigned only slightly
182
+ * later. */
183
+ qemu_event_reset(&done_event);
184
+ do_drain_begin(drain_type, bs);
185
+ g_assert_cmpint(bs->in_flight, ==, 0);
186
+
187
+ if (drain_type != BDRV_DRAIN_ALL) {
188
+ aio_context_release(ctx_a);
189
+ }
190
+ qemu_event_wait(&done_event);
191
+ if (drain_type != BDRV_DRAIN_ALL) {
192
+ aio_context_acquire(ctx_a);
193
+ }
194
+
195
+ g_assert_cmpint(aio_ret, ==, 0);
196
+ do_drain_end(drain_type, bs);
197
+
198
+ if (drain_type != BDRV_DRAIN_ALL) {
199
+ aio_context_release(ctx_a);
200
+ }
201
+ break;
202
+ case 1:
203
+ qemu_event_reset(&done_event);
204
+ aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
205
+ qemu_event_wait(&done_event);
206
+ break;
207
+ default:
208
+ g_assert_not_reached();
209
+ }
210
+
211
+ aio_context_acquire(ctx_a);
212
+ blk_set_aio_context(blk, qemu_get_aio_context());
213
+ aio_context_release(ctx_a);
214
+
215
+ bdrv_unref(bs);
216
+ blk_unref(blk);
217
+
218
+out:
219
+ iothread_join(a);
220
+ iothread_join(b);
221
+}
222
+
223
+static void test_iothread_drain_all(void)
224
+{
225
+ test_iothread_common(BDRV_DRAIN_ALL, 0);
226
+ test_iothread_common(BDRV_DRAIN_ALL, 1);
227
+}
228
+
229
+static void test_iothread_drain(void)
230
+{
231
+ test_iothread_common(BDRV_DRAIN, 0);
232
+ test_iothread_common(BDRV_DRAIN, 1);
233
+}
234
+
235
+static void test_iothread_drain_subtree(void)
236
+{
237
+ test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
238
+ test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
239
+}
240
+
241
242
typedef struct TestBlockJob {
243
BlockJob common;
244
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain_subtree(void)
245
246
int main(int argc, char **argv)
247
{
248
+ int ret;
249
+
250
bdrv_init();
251
qemu_init_main_loop(&error_abort);
252
253
g_test_init(&argc, &argv, NULL);
254
+ qemu_event_init(&done_event, false);
255
256
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
257
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
258
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
259
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
260
g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
261
262
+ g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
263
+ g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
264
+ g_test_add_func("/bdrv-drain/iothread/drain_subtree",
265
+ test_iothread_drain_subtree);
266
+
267
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
268
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
269
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
270
test_blockjob_drain_subtree);
271
272
- return g_test_run();
273
+ ret = g_test_run();
274
+ qemu_event_destroy(&done_event);
275
+ return ret;
276
}
277
--
278
2.13.6
279
280
diff view generated by jsdifflib
Deleted patch
1
bdrv_do_drain_begin/end() implement already everything that
2
bdrv_drain_all_begin/end() need and currently still do manually: Disable
3
external events, call parent drain callbacks, call block driver
4
callbacks.
5
1
6
It also does two more things:
7
8
The first is incrementing bs->quiesce_counter. bdrv_drain_all() already
9
stood out in the test case by behaving different from the other drain
10
variants. Adding this is not only safe, but in fact a bug fix.
11
12
The second is calling bdrv_drain_recurse(). We already do that later in
13
the same function in a loop, so basically doing an early first iteration
14
doesn't hurt.
15
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
19
block/io.c | 10 ++--------
20
tests/test-bdrv-drain.c | 14 ++++----------
21
2 files changed, 6 insertions(+), 18 deletions(-)
22
23
diff --git a/block/io.c b/block/io.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io.c
26
+++ b/block/io.c
27
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
28
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
29
AioContext *aio_context = bdrv_get_aio_context(bs);
30
31
- /* Stop things in parent-to-child order */
32
aio_context_acquire(aio_context);
33
- aio_disable_external(aio_context);
34
- bdrv_parent_drained_begin(bs, NULL);
35
- bdrv_drain_invoke(bs, true, true);
36
+ bdrv_do_drained_begin(bs, true, NULL);
37
aio_context_release(aio_context);
38
39
if (!g_slist_find(aio_ctxs, aio_context)) {
40
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
41
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
42
AioContext *aio_context = bdrv_get_aio_context(bs);
43
44
- /* Re-enable things in child-to-parent order */
45
aio_context_acquire(aio_context);
46
- bdrv_drain_invoke(bs, false, true);
47
- bdrv_parent_drained_end(bs, NULL);
48
- aio_enable_external(aio_context);
49
+ bdrv_do_drained_end(bs, true, NULL);
50
aio_context_release(aio_context);
51
}
52
}
53
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tests/test-bdrv-drain.c
56
+++ b/tests/test-bdrv-drain.c
57
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
58
59
static void test_quiesce_drain_all(void)
60
{
61
- // XXX drain_all doesn't quiesce
62
- //test_quiesce_common(BDRV_DRAIN_ALL, true);
63
+ test_quiesce_common(BDRV_DRAIN_ALL, true);
64
}
65
66
static void test_quiesce_drain(void)
67
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
68
69
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
70
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
71
- /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
72
- int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
73
- (inner != BDRV_DRAIN_ALL);
74
- int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
75
- (inner == BDRV_SUBTREE_DRAIN);
76
- int backing_cb_cnt = (outer != BDRV_DRAIN) +
77
+ int backing_quiesce = (outer != BDRV_DRAIN) +
78
(inner != BDRV_DRAIN);
79
80
g_assert_cmpint(bs->quiesce_counter, ==, 0);
81
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
82
do_drain_begin(outer, bs);
83
do_drain_begin(inner, bs);
84
85
- g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
86
+ g_assert_cmpint(bs->quiesce_counter, ==, 2);
87
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
88
g_assert_cmpint(s->drain_count, ==, 2);
89
- g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
90
+ g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
91
92
do_drain_end(inner, bs);
93
do_drain_end(outer, bs);
94
--
95
2.13.6
96
97
diff view generated by jsdifflib
1
If bdrv_do_drained_begin() polls during its subtree recursion, the graph
1
From: Alberto Garcia <berto@igalia.com>
2
can change and mess up the bs->children iteration. Test that this
3
doesn't happen.
4
2
3
Backing files and raw external data files are mutually exclusive.
4
The documentation of the raw external data bit (in autoclear_features)
5
already indicates that, but we should also mention it on the other
6
side.
7
8
Suggested-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Alberto Garcia <berto@igalia.com>
10
Message-Id: <20200410121816.8334-1-berto@igalia.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
13
---
7
tests/test-bdrv-drain.c | 38 +++++++++++++++++++++++++++++---------
14
docs/interop/qcow2.txt | 3 +++
8
1 file changed, 29 insertions(+), 9 deletions(-)
15
1 file changed, 3 insertions(+)
9
16
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
17
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tests/test-bdrv-drain.c
19
--- a/docs/interop/qcow2.txt
13
+++ b/tests/test-bdrv-drain.c
20
+++ b/docs/interop/qcow2.txt
14
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
21
@@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header:
15
* If @detach_instead_of_delete is set, the BDS is not going to be
22
is stored (NB: The string is not null terminated). 0 if the
16
* deleted but will only detach all of its children.
23
image doesn't have a backing file.
17
*/
24
18
-static void do_test_delete_by_drain(bool detach_instead_of_delete)
25
+ Note: backing files are incompatible with raw external data
19
+static void do_test_delete_by_drain(bool detach_instead_of_delete,
26
+ files (auto-clear feature bit 1).
20
+ enum drain_type drain_type)
21
{
22
BlockBackend *blk;
23
BlockDriverState *bs, *child_bs, *null_bs;
24
@@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete)
25
* test_co_delete_by_drain() resuming. Thus, @bs will be deleted
26
* and the coroutine will exit while this drain operation is still
27
* in progress. */
28
- bdrv_ref(child_bs);
29
- bdrv_drain(child_bs);
30
- bdrv_unref(child_bs);
31
+ switch (drain_type) {
32
+ case BDRV_DRAIN:
33
+ bdrv_ref(child_bs);
34
+ bdrv_drain(child_bs);
35
+ bdrv_unref(child_bs);
36
+ break;
37
+ case BDRV_SUBTREE_DRAIN:
38
+ /* Would have to ref/unref bs here for !detach_instead_of_delete, but
39
+ * then the whole test becomes pointless because the graph changes
40
+ * don't occur during the drain any more. */
41
+ assert(detach_instead_of_delete);
42
+ bdrv_subtree_drained_begin(bs);
43
+ bdrv_subtree_drained_end(bs);
44
+ break;
45
+ default:
46
+ g_assert_not_reached();
47
+ }
48
49
while (!dbdd.done) {
50
aio_poll(qemu_get_aio_context(), true);
51
@@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete)
52
}
53
}
54
55
-
56
static void test_delete_by_drain(void)
57
{
58
- do_test_delete_by_drain(false);
59
+ do_test_delete_by_drain(false, BDRV_DRAIN);
60
}
61
62
static void test_detach_by_drain(void)
63
{
64
- do_test_delete_by_drain(true);
65
+ do_test_delete_by_drain(true, BDRV_DRAIN);
66
+}
67
+
27
+
68
+static void test_detach_by_drain_subtree(void)
28
16 - 19: backing_file_size
69
+{
29
Length of the backing file name in bytes. Must not be
70
+ do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
30
longer than 1023 bytes. Undefined if the image doesn't have
71
}
72
73
74
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
75
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
76
test_blockjob_drain_subtree);
77
78
- g_test_add_func("/bdrv-drain/deletion", test_delete_by_drain);
79
- g_test_add_func("/bdrv-drain/detach", test_detach_by_drain);
80
+ g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
81
+ g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
82
+ g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
83
84
ret = g_test_run();
85
qemu_event_destroy(&done_event);
86
--
31
--
87
2.13.6
32
2.25.3
88
33
89
34
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
This patch adds two bdrv-drain tests for what happens if some BDS goes
3
Test 244 checks the expected behavior of qcow2 external data files
4
away during the drainage.
4
with respect to zero and discarded clusters. Filesystems however
5
are free to ignore discard requests, and this seems to be the
6
case for overlayfs. Relax the tests to skip checks on the
7
external data file for discarded areas, which implies not using
8
qemu-img compare in the data_file_raw=on case.
5
9
6
The basic idea is that you have a parent BDS with some child nodes.
10
This fixes docker tests on RHEL8.
7
Then, you drain one of the children. Because of that, the party who
8
actually owns the parent decides to (A) delete it, or (B) detach all its
9
children from it -- both while the child is still being drained.
10
11
11
A real-world case where this can happen is the mirror block job, which
12
Cc: Kevin Wolf <kwolf@redhat.com>
12
may exit if you drain one of its children.
13
Cc: qemu-block@nongnu.org
13
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
Message-Id: <20200409191006.24429-1-pbonzini@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
17
---
17
tests/test-bdrv-drain.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++
18
tests/qemu-iotests/244 | 10 ++++++++--
18
1 file changed, 169 insertions(+)
19
tests/qemu-iotests/244.out | 9 ++++++---
20
2 files changed, 14 insertions(+), 5 deletions(-)
19
21
20
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
22
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
23
index XXXXXXX..XXXXXXX 100755
24
--- a/tests/qemu-iotests/244
25
+++ b/tests/qemu-iotests/244
26
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
27
echo
28
$QEMU_IO -c 'read -P 0 0 1M' \
29
-c 'read -P 0x11 1M 1M' \
30
- -c 'read -P 0 2M 2M' \
31
-c 'read -P 0x11 4M 1M' \
32
-c 'read -P 0 5M 1M' \
33
-f raw "$TEST_IMG.data" |
34
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
35
-f $IMGFMT "$TEST_IMG" |
36
_filter_qemu_io
37
38
+# Discarded clusters are only marked as such in the qcow2 metadata, but
39
+# they can contain stale data in the external data file. Instead, zero
40
+# clusters must be zeroed in the external data file too.
41
echo
42
-$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data"
43
+$QEMU_IO -c 'read -P 0 0 1M' \
44
+ -c 'read -P 0x11 1M 1M' \
45
+ -c 'read -P 0 3M 3M' \
46
+ -f raw "$TEST_IMG".data |
47
+ _filter_qemu_io
48
49
echo -n "qcow2 file size after I/O: "
50
du -b $TEST_IMG | cut -f1
51
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
21
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
22
--- a/tests/test-bdrv-drain.c
53
--- a/tests/qemu-iotests/244.out
23
+++ b/tests/test-bdrv-drain.c
54
+++ b/tests/qemu-iotests/244.out
24
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain_subtree(void)
55
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
25
test_blockjob_common(BDRV_SUBTREE_DRAIN);
56
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
26
}
57
read 1048576/1048576 bytes at offset 1048576
27
58
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
28
+
59
-read 2097152/2097152 bytes at offset 2097152
29
+typedef struct BDRVTestTopState {
60
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
30
+ BdrvChild *wait_child;
61
read 1048576/1048576 bytes at offset 4194304
31
+} BDRVTestTopState;
62
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
32
+
63
read 1048576/1048576 bytes at offset 5242880
33
+static void bdrv_test_top_close(BlockDriverState *bs)
64
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576
34
+{
65
read 4194304/4194304 bytes at offset 2097152
35
+ BdrvChild *c, *next_c;
66
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
36
+ QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
67
37
+ bdrv_unref_child(bs, c);
68
-Images are identical.
38
+ }
69
+read 1048576/1048576 bytes at offset 0
39
+}
70
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
40
+
71
+read 1048576/1048576 bytes at offset 1048576
41
+static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
72
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
42
+ uint64_t offset, uint64_t bytes,
73
+read 3145728/3145728 bytes at offset 3145728
43
+ QEMUIOVector *qiov, int flags)
74
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
44
+{
75
qcow2 file size after I/O: 327680
45
+ BDRVTestTopState *tts = bs->opaque;
76
46
+ return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
77
=== bdrv_co_block_status test for file and offset=0 ===
47
+}
48
+
49
+static BlockDriver bdrv_test_top_driver = {
50
+ .format_name = "test_top_driver",
51
+ .instance_size = sizeof(BDRVTestTopState),
52
+
53
+ .bdrv_close = bdrv_test_top_close,
54
+ .bdrv_co_preadv = bdrv_test_top_co_preadv,
55
+
56
+ .bdrv_child_perm = bdrv_format_default_perms,
57
+};
58
+
59
+typedef struct TestCoDeleteByDrainData {
60
+ BlockBackend *blk;
61
+ bool detach_instead_of_delete;
62
+ bool done;
63
+} TestCoDeleteByDrainData;
64
+
65
+static void coroutine_fn test_co_delete_by_drain(void *opaque)
66
+{
67
+ TestCoDeleteByDrainData *dbdd = opaque;
68
+ BlockBackend *blk = dbdd->blk;
69
+ BlockDriverState *bs = blk_bs(blk);
70
+ BDRVTestTopState *tts = bs->opaque;
71
+ void *buffer = g_malloc(65536);
72
+ QEMUIOVector qiov;
73
+ struct iovec iov = {
74
+ .iov_base = buffer,
75
+ .iov_len = 65536,
76
+ };
77
+
78
+ qemu_iovec_init_external(&qiov, &iov, 1);
79
+
80
+ /* Pretend some internal write operation from parent to child.
81
+ * Important: We have to read from the child, not from the parent!
82
+ * Draining works by first propagating it all up the tree to the
83
+ * root and then waiting for drainage from root to the leaves
84
+ * (protocol nodes). If we have a request waiting on the root,
85
+ * everything will be drained before we go back down the tree, but
86
+ * we do not want that. We want to be in the middle of draining
87
+ * when this following requests returns. */
88
+ bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
89
+
90
+ g_assert_cmpint(bs->refcnt, ==, 1);
91
+
92
+ if (!dbdd->detach_instead_of_delete) {
93
+ blk_unref(blk);
94
+ } else {
95
+ BdrvChild *c, *next_c;
96
+ QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
97
+ bdrv_unref_child(bs, c);
98
+ }
99
+ }
100
+
101
+ dbdd->done = true;
102
+}
103
+
104
+/**
105
+ * Test what happens when some BDS has some children, you drain one of
106
+ * them and this results in the BDS being deleted.
107
+ *
108
+ * If @detach_instead_of_delete is set, the BDS is not going to be
109
+ * deleted but will only detach all of its children.
110
+ */
111
+static void do_test_delete_by_drain(bool detach_instead_of_delete)
112
+{
113
+ BlockBackend *blk;
114
+ BlockDriverState *bs, *child_bs, *null_bs;
115
+ BDRVTestTopState *tts;
116
+ TestCoDeleteByDrainData dbdd;
117
+ Coroutine *co;
118
+
119
+ bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR,
120
+ &error_abort);
121
+ bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
122
+ tts = bs->opaque;
123
+
124
+ null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
125
+ &error_abort);
126
+ bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort);
127
+
128
+ /* This child will be the one to pass to requests through to, and
129
+ * it will stall until a drain occurs */
130
+ child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR,
131
+ &error_abort);
132
+ child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
133
+ /* Takes our reference to child_bs */
134
+ tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_file,
135
+ &error_abort);
136
+
137
+ /* This child is just there to be deleted
138
+ * (for detach_instead_of_delete == true) */
139
+ null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
140
+ &error_abort);
141
+ bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort);
142
+
143
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
144
+ blk_insert_bs(blk, bs, &error_abort);
145
+
146
+ /* Referenced by blk now */
147
+ bdrv_unref(bs);
148
+
149
+ g_assert_cmpint(bs->refcnt, ==, 1);
150
+ g_assert_cmpint(child_bs->refcnt, ==, 1);
151
+ g_assert_cmpint(null_bs->refcnt, ==, 1);
152
+
153
+
154
+ dbdd = (TestCoDeleteByDrainData){
155
+ .blk = blk,
156
+ .detach_instead_of_delete = detach_instead_of_delete,
157
+ .done = false,
158
+ };
159
+ co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd);
160
+ qemu_coroutine_enter(co);
161
+
162
+ /* Drain the child while the read operation is still pending.
163
+ * This should result in the operation finishing and
164
+ * test_co_delete_by_drain() resuming. Thus, @bs will be deleted
165
+ * and the coroutine will exit while this drain operation is still
166
+ * in progress. */
167
+ bdrv_ref(child_bs);
168
+ bdrv_drain(child_bs);
169
+ bdrv_unref(child_bs);
170
+
171
+ while (!dbdd.done) {
172
+ aio_poll(qemu_get_aio_context(), true);
173
+ }
174
+
175
+ if (detach_instead_of_delete) {
176
+ /* Here, the reference has not passed over to the coroutine,
177
+ * so we have to delete the BB ourselves */
178
+ blk_unref(blk);
179
+ }
180
+}
181
+
182
+
183
+static void test_delete_by_drain(void)
184
+{
185
+ do_test_delete_by_drain(false);
186
+}
187
+
188
+static void test_detach_by_drain(void)
189
+{
190
+ do_test_delete_by_drain(true);
191
+}
192
+
193
+
194
int main(int argc, char **argv)
195
{
196
int ret;
197
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
198
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
199
test_blockjob_drain_subtree);
200
201
+ g_test_add_func("/bdrv-drain/deletion", test_delete_by_drain);
202
+ g_test_add_func("/bdrv-drain/detach", test_detach_by_drain);
203
+
204
ret = g_test_run();
205
qemu_event_destroy(&done_event);
206
return ret;
207
--
78
--
208
2.13.6
79
2.25.3
209
80
210
81
diff view generated by jsdifflib
1
bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and
1
This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate()
2
did a subtree drain for each of them. This works fine as long as the
2
driver callbacks, and a supported_truncate_flags field in
3
graph is static, but sadly, reality looks different.
3
BlockDriverState that allows drivers to advertise support for request
4
4
flags in the context of truncate.
5
If the graph changes so that root nodes are added or removed, we would
5
6
have to compensate for this. bdrv_next() returns each root node only
6
For now, we always pass 0 and no drivers declare support for any flag.
7
once even if it's the root node for multiple BlockBackends or for a
7
8
monitor-owned block driver tree, which would only complicate things.
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
The much easier and more obviously correct way is to fundamentally
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
change the way the functions work: Iterate over all BlockDriverStates,
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
12
no matter who owns them, and drain them individually. Compensation is
12
Message-Id: <20200424125448.63318-2-kwolf@redhat.com>
13
only necessary when a new BDS is created inside a drain_all section.
14
Removal of a BDS doesn't require any action because it's gone afterwards
15
anyway.
16
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
14
---
19
include/block/block.h | 1 +
15
include/block/block_int.h | 10 +++++++++-
20
include/block/block_int.h | 1 +
16
block/crypto.c | 3 ++-
21
block.c | 34 ++++++++++++++++++++++++---
17
block/file-posix.c | 2 +-
22
block/io.c | 60 ++++++++++++++++++++++++++++++++++++-----------
18
block/file-win32.c | 2 +-
23
4 files changed, 79 insertions(+), 17 deletions(-)
19
block/gluster.c | 1 +
24
20
block/io.c | 8 +++++++-
25
diff --git a/include/block/block.h b/include/block/block.h
21
block/iscsi.c | 2 +-
26
index XXXXXXX..XXXXXXX 100644
22
block/nfs.c | 3 ++-
27
--- a/include/block/block.h
23
block/qcow2.c | 2 +-
28
+++ b/include/block/block.h
24
block/qed.c | 1 +
29
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_lookup_bs(const char *device,
25
block/raw-format.c | 2 +-
30
Error **errp);
26
block/rbd.c | 1 +
31
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
27
block/sheepdog.c | 4 ++--
32
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
28
block/ssh.c | 2 +-
33
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
29
tests/test-block-iothread.c | 3 ++-
34
30
15 files changed, 33 insertions(+), 13 deletions(-)
35
typedef struct BdrvNextIterator {
31
36
enum {
37
diff --git a/include/block/block_int.h b/include/block/block_int.h
32
diff --git a/include/block/block_int.h b/include/block/block_int.h
38
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
39
--- a/include/block/block_int.h
34
--- a/include/block/block_int.h
40
+++ b/include/block/block_int.h
35
+++ b/include/block/block_int.h
41
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
36
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
42
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
37
*/
43
BdrvRequestFlags flags);
38
int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
44
39
bool exact, PreallocMode prealloc,
45
+extern unsigned int bdrv_drain_all_count;
40
- Error **errp);
46
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
41
+ BdrvRequestFlags flags, Error **errp);
47
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
42
48
43
int64_t (*bdrv_getlength)(BlockDriverState *bs);
49
diff --git a/block.c b/block.c
44
bool has_variable_length;
50
index XXXXXXX..XXXXXXX 100644
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
51
--- a/block.c
46
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
52
+++ b/block.c
47
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
53
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_new(void)
48
unsigned int supported_zero_flags;
54
49
+ /*
55
qemu_co_queue_init(&bs->flush_queue);
50
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
56
51
+ *
57
+ for (i = 0; i < bdrv_drain_all_count; i++) {
52
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
58
+ bdrv_drained_begin(bs);
53
+ * that any added space reads as all zeros. If this can't be guaranteed,
59
+ }
54
+ * the operation must fail.
60
+
55
+ */
61
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
56
+ unsigned int supported_truncate_flags;
62
57
63
return bs;
58
/* the following member gives a name to every node on the bs graph. */
64
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
59
char node_name[32];
65
int open_flags, Error **errp)
60
diff --git a/block/crypto.c b/block/crypto.c
66
{
61
index XXXXXXX..XXXXXXX 100644
67
Error *local_err = NULL;
62
--- a/block/crypto.c
68
- int ret;
63
+++ b/block/crypto.c
69
+ int i, ret;
64
@@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
70
65
71
bdrv_assign_node_name(bs, node_name, &local_err);
66
static int coroutine_fn
72
if (local_err) {
67
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
73
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
68
- PreallocMode prealloc, Error **errp)
74
assert(bdrv_min_mem_align(bs) != 0);
69
+ PreallocMode prealloc, BdrvRequestFlags flags,
75
assert(is_power_of_2(bs->bl.request_alignment));
70
+ Error **errp)
76
71
{
77
+ for (i = 0; i < bs->quiesce_counter; i++) {
72
BlockCrypto *crypto = bs->opaque;
78
+ if (drv->bdrv_co_drain_begin) {
73
uint64_t payload_offset =
79
+ drv->bdrv_co_drain_begin(bs);
74
diff --git a/block/file-posix.c b/block/file-posix.c
80
+ }
75
index XXXXXXX..XXXXXXX 100644
81
+ }
76
--- a/block/file-posix.c
82
+
77
+++ b/block/file-posix.c
83
return 0;
78
@@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
84
open_failed:
79
85
bs->drv = NULL;
80
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
86
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
81
bool exact, PreallocMode prealloc,
87
child->role->detach(child);
82
- Error **errp)
88
}
83
+ BdrvRequestFlags flags, Error **errp)
89
if (old_bs->quiesce_counter && child->role->drained_end) {
84
{
90
- for (i = 0; i < old_bs->quiesce_counter; i++) {
85
BDRVRawState *s = bs->opaque;
91
+ int num = old_bs->quiesce_counter;
86
struct stat st;
92
+ if (child->role->parent_is_bds) {
87
diff --git a/block/file-win32.c b/block/file-win32.c
93
+ num -= bdrv_drain_all_count;
88
index XXXXXXX..XXXXXXX 100644
94
+ }
89
--- a/block/file-win32.c
95
+ assert(num >= 0);
90
+++ b/block/file-win32.c
96
+ for (i = 0; i < num; i++) {
91
@@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs)
97
child->role->drained_end(child);
92
98
}
93
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
99
}
94
bool exact, PreallocMode prealloc,
100
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
95
- Error **errp)
101
if (new_bs) {
96
+ BdrvRequestFlags flags, Error **errp)
102
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
97
{
103
if (new_bs->quiesce_counter && child->role->drained_begin) {
98
BDRVRawState *s = bs->opaque;
104
- for (i = 0; i < new_bs->quiesce_counter; i++) {
99
LONG low, high;
105
+ int num = new_bs->quiesce_counter;
100
diff --git a/block/gluster.c b/block/gluster.c
106
+ if (child->role->parent_is_bds) {
101
index XXXXXXX..XXXXXXX 100644
107
+ num -= bdrv_drain_all_count;
102
--- a/block/gluster.c
108
+ }
103
+++ b/block/gluster.c
109
+ assert(num >= 0);
104
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs,
110
+ for (i = 0; i < num; i++) {
105
int64_t offset,
111
child->role->drained_begin(child);
106
bool exact,
112
}
107
PreallocMode prealloc,
113
}
108
+ BdrvRequestFlags flags,
114
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
109
Error **errp)
115
return QTAILQ_NEXT(bs, node_list);
110
{
116
}
111
BDRVGlusterState *s = bs->opaque;
117
118
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
119
+{
120
+ if (!bs) {
121
+ return QTAILQ_FIRST(&all_bdrv_states);
122
+ }
123
+ return QTAILQ_NEXT(bs, bs_list);
124
+}
125
+
126
const char *bdrv_get_node_name(const BlockDriverState *bs)
127
{
128
return bs->node_name;
129
diff --git a/block/io.c b/block/io.c
112
diff --git a/block/io.c b/block/io.c
130
index XXXXXXX..XXXXXXX 100644
113
index XXXXXXX..XXXXXXX 100644
131
--- a/block/io.c
114
--- a/block/io.c
132
+++ b/block/io.c
115
+++ b/block/io.c
133
@@ -XXX,XX +XXX,XX @@
116
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
134
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
117
BlockDriverState *bs = child->bs;
135
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
118
BlockDriver *drv = bs->drv;
136
119
BdrvTrackedRequest req;
137
+static AioWait drain_all_aio_wait;
120
+ BdrvRequestFlags flags = 0;
138
+
121
int64_t old_size, new_bytes;
139
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
122
int ret;
140
int64_t offset, int bytes, BdrvRequestFlags flags);
123
141
124
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
142
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
143
}
125
}
126
127
if (drv->bdrv_co_truncate) {
128
- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
129
+ if (flags & ~bs->supported_truncate_flags) {
130
+ error_setg(errp, "Block driver does not support requested flags");
131
+ ret = -ENOTSUP;
132
+ goto out;
133
+ }
134
+ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
135
} else if (bs->file && drv->is_filter) {
136
ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
137
} else {
138
diff --git a/block/iscsi.c b/block/iscsi.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/block/iscsi.c
141
+++ b/block/iscsi.c
142
@@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
143
144
static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
145
bool exact, PreallocMode prealloc,
146
- Error **errp)
147
+ BdrvRequestFlags flags, Error **errp)
148
{
149
IscsiLun *iscsilun = bs->opaque;
150
int64_t cur_length;
151
diff --git a/block/nfs.c b/block/nfs.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/block/nfs.c
154
+++ b/block/nfs.c
155
@@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
156
157
static int coroutine_fn
158
nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
159
- PreallocMode prealloc, Error **errp)
160
+ PreallocMode prealloc, BdrvRequestFlags flags,
161
+ Error **errp)
162
{
163
NFSClient *client = bs->opaque;
164
int ret;
165
diff --git a/block/qcow2.c b/block/qcow2.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/block/qcow2.c
168
+++ b/block/qcow2.c
169
@@ -XXX,XX +XXX,XX @@ fail:
170
171
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
172
bool exact, PreallocMode prealloc,
173
- Error **errp)
174
+ BdrvRequestFlags flags, Error **errp)
175
{
176
BDRVQcow2State *s = bs->opaque;
177
uint64_t old_length;
178
diff --git a/block/qed.c b/block/qed.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/block/qed.c
181
+++ b/block/qed.c
182
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
183
int64_t offset,
184
bool exact,
185
PreallocMode prealloc,
186
+ BdrvRequestFlags flags,
187
Error **errp)
188
{
189
BDRVQEDState *s = bs->opaque;
190
diff --git a/block/raw-format.c b/block/raw-format.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/block/raw-format.c
193
+++ b/block/raw-format.c
194
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
195
196
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
197
bool exact, PreallocMode prealloc,
198
- Error **errp)
199
+ BdrvRequestFlags flags, Error **errp)
200
{
201
BDRVRawState *s = bs->opaque;
202
203
diff --git a/block/rbd.c b/block/rbd.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/block/rbd.c
206
+++ b/block/rbd.c
207
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
208
int64_t offset,
209
bool exact,
210
PreallocMode prealloc,
211
+ BdrvRequestFlags flags,
212
Error **errp)
213
{
214
int r;
215
diff --git a/block/sheepdog.c b/block/sheepdog.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/block/sheepdog.c
218
+++ b/block/sheepdog.c
219
@@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs)
220
221
static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
222
bool exact, PreallocMode prealloc,
223
- Error **errp)
224
+ BdrvRequestFlags flags, Error **errp)
225
{
226
BDRVSheepdogState *s = bs->opaque;
227
int ret, fd;
228
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
229
230
assert(!flags);
231
if (offset > s->inode.vdi_size) {
232
- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL);
233
+ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
234
if (ret < 0) {
235
return ret;
236
}
237
diff --git a/block/ssh.c b/block/ssh.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/block/ssh.c
240
+++ b/block/ssh.c
241
@@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs)
242
243
static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
244
bool exact, PreallocMode prealloc,
245
- Error **errp)
246
+ BdrvRequestFlags flags, Error **errp)
247
{
248
BDRVSSHState *s = bs->opaque;
249
250
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
251
index XXXXXXX..XXXXXXX 100644
252
--- a/tests/test-block-iothread.c
253
+++ b/tests/test-block-iothread.c
254
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
255
256
static int coroutine_fn
257
bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
258
- PreallocMode prealloc, Error **errp)
259
+ PreallocMode prealloc, BdrvRequestFlags flags,
260
+ Error **errp)
261
{
262
return 0;
144
}
263
}
145
146
+unsigned int bdrv_drain_all_count = 0;
147
+
148
+static bool bdrv_drain_all_poll(void)
149
+{
150
+ BlockDriverState *bs = NULL;
151
+ bool result = false;
152
+
153
+ /* Execute pending BHs first (may modify the graph) and check everything
154
+ * else only after the BHs have executed. */
155
+ while (aio_poll(qemu_get_aio_context(), false));
156
+
157
+ /* bdrv_drain_poll() can't make changes to the graph and we are holding the
158
+ * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
159
+ while ((bs = bdrv_next_all_states(bs))) {
160
+ AioContext *aio_context = bdrv_get_aio_context(bs);
161
+ aio_context_acquire(aio_context);
162
+ result |= bdrv_drain_poll(bs, false, NULL, true);
163
+ aio_context_release(aio_context);
164
+ }
165
+
166
+ return result;
167
+}
168
+
169
/*
170
* Wait for pending requests to complete across all BlockDriverStates
171
*
172
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
173
*/
174
void bdrv_drain_all_begin(void)
175
{
176
- BlockDriverState *bs;
177
- BdrvNextIterator it;
178
+ BlockDriverState *bs = NULL;
179
180
if (qemu_in_coroutine()) {
181
- bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
182
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
183
return;
184
}
185
186
- /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
187
- * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
188
- * nodes in several different AioContexts, so make sure we're in the main
189
- * context. */
190
+ /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
191
+ * loop AioContext, so make sure we're in the main context. */
192
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
193
+ assert(bdrv_drain_all_count < INT_MAX);
194
+ bdrv_drain_all_count++;
195
196
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
197
+ /* Quiesce all nodes, without polling in-flight requests yet. The graph
198
+ * cannot change during this loop. */
199
+ while ((bs = bdrv_next_all_states(bs))) {
200
AioContext *aio_context = bdrv_get_aio_context(bs);
201
202
aio_context_acquire(aio_context);
203
- bdrv_do_drained_begin(bs, true, NULL, false, true);
204
+ bdrv_do_drained_begin(bs, false, NULL, true, false);
205
aio_context_release(aio_context);
206
}
207
208
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
209
+ /* Now poll the in-flight requests */
210
+ AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
211
+
212
+ while ((bs = bdrv_next_all_states(bs))) {
213
bdrv_drain_assert_idle(bs);
214
}
215
}
216
217
void bdrv_drain_all_end(void)
218
{
219
- BlockDriverState *bs;
220
- BdrvNextIterator it;
221
+ BlockDriverState *bs = NULL;
222
223
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
224
+ while ((bs = bdrv_next_all_states(bs))) {
225
AioContext *aio_context = bdrv_get_aio_context(bs);
226
227
aio_context_acquire(aio_context);
228
- bdrv_do_drained_end(bs, true, NULL, false);
229
+ bdrv_do_drained_end(bs, false, NULL, true);
230
aio_context_release(aio_context);
231
}
232
+
233
+ assert(bdrv_drain_all_count > 0);
234
+ bdrv_drain_all_count--;
235
}
236
237
void bdrv_drain_all(void)
238
@@ -XXX,XX +XXX,XX @@ void bdrv_inc_in_flight(BlockDriverState *bs)
239
void bdrv_wakeup(BlockDriverState *bs)
240
{
241
aio_wait_kick(bdrv_get_aio_wait(bs));
242
+ aio_wait_kick(&drain_all_aio_wait);
243
}
244
245
void bdrv_dec_in_flight(BlockDriverState *bs)
246
--
264
--
247
2.13.6
265
2.25.3
248
266
249
267
diff view generated by jsdifflib
1
bdrv_do_drained_begin() is only safe if we have a single
1
Now that block drivers can support flags for .bdrv_co_truncate, expose
2
BDRV_POLL_WHILE() after quiescing all affected nodes. We cannot allow
2
the parameter in the node level interfaces bdrv_co_truncate() and
3
that parent callbacks introduce a nested polling loop that could cause
3
bdrv_truncate().
4
graph changes while we're traversing the graph.
5
6
Split off bdrv_do_drained_begin_quiesce(), which only quiesces a single
7
node without waiting for its requests to complete. These requests will
8
be waited for in the BDRV_POLL_WHILE() call down the call chain.
9
4
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Message-Id: <20200424125448.63318-3-kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
include/block/block.h | 9 +++++++++
12
include/block/block.h | 5 +++--
13
block.c | 2 +-
13
block/block-backend.c | 2 +-
14
block/io.c | 24 ++++++++++++++++--------
14
block/crypto.c | 2 +-
15
3 files changed, 26 insertions(+), 9 deletions(-)
15
block/io.c | 12 +++++++-----
16
block/parallels.c | 6 +++---
17
block/qcow.c | 4 ++--
18
block/qcow2-refcount.c | 2 +-
19
block/qcow2.c | 15 +++++++++------
20
block/raw-format.c | 2 +-
21
block/vhdx-log.c | 2 +-
22
block/vhdx.c | 2 +-
23
block/vmdk.c | 2 +-
24
tests/test-block-iothread.c | 6 +++---
25
13 files changed, 34 insertions(+), 28 deletions(-)
16
26
17
diff --git a/include/block/block.h b/include/block/block.h
27
diff --git a/include/block/block.h b/include/block/block.h
18
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block.h
29
--- a/include/block/block.h
20
+++ b/include/block/block.h
30
+++ b/include/block/block.h
21
@@ -XXX,XX +XXX,XX @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
31
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
22
void bdrv_drained_begin(BlockDriverState *bs);
32
void bdrv_refresh_filename(BlockDriverState *bs);
23
33
24
/**
34
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
25
+ * bdrv_do_drained_begin_quiesce:
35
- PreallocMode prealloc, Error **errp);
26
+ *
36
+ PreallocMode prealloc, BdrvRequestFlags flags,
27
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
37
+ Error **errp);
28
+ * running requests to complete.
38
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
29
+ */
39
- PreallocMode prealloc, Error **errp);
30
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
40
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
31
+ BdrvChild *parent);
41
32
+
42
int64_t bdrv_nb_sectors(BlockDriverState *bs);
33
+/**
43
int64_t bdrv_getlength(BlockDriverState *bs);
34
* Like bdrv_drained_begin, but recursively begins a quiesced section for
44
diff --git a/block/block-backend.c b/block/block-backend.c
35
* exclusive access to all child nodes as well.
45
index XXXXXXX..XXXXXXX 100644
36
*/
46
--- a/block/block-backend.c
37
diff --git a/block.c b/block.c
47
+++ b/block/block-backend.c
38
index XXXXXXX..XXXXXXX 100644
48
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
39
--- a/block.c
49
return -ENOMEDIUM;
40
+++ b/block.c
50
}
41
@@ -XXX,XX +XXX,XX @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
51
42
static void bdrv_child_cb_drained_begin(BdrvChild *child)
52
- return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
43
{
53
+ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
44
BlockDriverState *bs = child->opaque;
54
}
45
- bdrv_drained_begin(bs);
55
46
+ bdrv_do_drained_begin_quiesce(bs, NULL);
56
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
47
}
57
diff --git a/block/crypto.c b/block/crypto.c
48
58
index XXXXXXX..XXXXXXX 100644
49
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
59
--- a/block/crypto.c
60
+++ b/block/crypto.c
61
@@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
62
63
offset += payload_offset;
64
65
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
66
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
67
}
68
69
static void block_crypto_close(BlockDriverState *bs)
50
diff --git a/block/io.c b/block/io.c
70
diff --git a/block/io.c b/block/io.c
51
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
52
--- a/block/io.c
72
--- a/block/io.c
53
+++ b/block/io.c
73
+++ b/block/io.c
54
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
74
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
55
assert(data.done);
75
* 'offset' bytes in length.
56
}
76
*/
57
77
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
58
-void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
78
- PreallocMode prealloc, Error **errp)
59
- BdrvChild *parent, bool poll)
79
+ PreallocMode prealloc, BdrvRequestFlags flags,
60
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
80
+ Error **errp)
61
+ BdrvChild *parent)
62
{
81
{
63
- BdrvChild *child, *next;
82
BlockDriverState *bs = child->bs;
64
-
83
BlockDriver *drv = bs->drv;
65
- if (qemu_in_coroutine()) {
84
BdrvTrackedRequest req;
66
- bdrv_co_yield_to_drain(bs, true, recursive, parent, poll);
85
- BdrvRequestFlags flags = 0;
67
- return;
86
int64_t old_size, new_bytes;
68
- }
87
int ret;
69
+ assert(!qemu_in_coroutine());
88
70
89
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
71
/* Stop things in parent-to-child order */
90
}
72
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
91
ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
73
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
92
} else if (bs->file && drv->is_filter) {
74
93
- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
75
bdrv_parent_drained_begin(bs, parent);
94
+ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
76
bdrv_drain_invoke(bs, true);
95
} else {
77
+}
96
error_setg(errp, "Image format driver does not support resize");
78
+
97
ret = -ENOTSUP;
79
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
98
@@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo {
80
+ BdrvChild *parent, bool poll)
99
int64_t offset;
81
+{
100
bool exact;
82
+ BdrvChild *child, *next;
101
PreallocMode prealloc;
83
+
102
+ BdrvRequestFlags flags;
84
+ if (qemu_in_coroutine()) {
103
Error **errp;
85
+ bdrv_co_yield_to_drain(bs, true, recursive, parent, poll);
104
int ret;
86
+ return;
105
} TruncateCo;
87
+ }
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
88
+
107
{
89
+ bdrv_do_drained_begin_quiesce(bs, parent);
108
TruncateCo *tco = opaque;
90
109
tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
91
if (recursive) {
110
- tco->prealloc, tco->errp);
92
bs->recursive_quiesce_counter++;
111
+ tco->prealloc, tco->flags, tco->errp);
112
aio_wait_kick();
113
}
114
115
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
116
- PreallocMode prealloc, Error **errp)
117
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
118
{
119
Coroutine *co;
120
TruncateCo tco = {
121
@@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
122
.offset = offset,
123
.exact = exact,
124
.prealloc = prealloc,
125
+ .flags = flags,
126
.errp = errp,
127
.ret = NOT_DONE,
128
};
129
diff --git a/block/parallels.c b/block/parallels.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/parallels.c
132
+++ b/block/parallels.c
133
@@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
134
} else {
135
ret = bdrv_truncate(bs->file,
136
(s->data_end + space) << BDRV_SECTOR_BITS,
137
- false, PREALLOC_MODE_OFF, NULL);
138
+ false, PREALLOC_MODE_OFF, 0, NULL);
139
}
140
if (ret < 0) {
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
143
* That means we have to pass exact=true.
144
*/
145
ret = bdrv_truncate(bs->file, res->image_end_offset, true,
146
- PREALLOC_MODE_OFF, &local_err);
147
+ PREALLOC_MODE_OFF, 0, &local_err);
148
if (ret < 0) {
149
error_report_err(local_err);
150
res->check_errors++;
151
@@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs)
152
153
/* errors are ignored, so we might as well pass exact=true */
154
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
155
- PREALLOC_MODE_OFF, NULL);
156
+ PREALLOC_MODE_OFF, 0, NULL);
157
}
158
159
g_free(s->bat_dirty_bmap);
160
diff --git a/block/qcow.c b/block/qcow.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/block/qcow.c
163
+++ b/block/qcow.c
164
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
165
return -E2BIG;
166
}
167
ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
168
- false, PREALLOC_MODE_OFF, NULL);
169
+ false, PREALLOC_MODE_OFF, 0, NULL);
170
if (ret < 0) {
171
return ret;
172
}
173
@@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs)
174
l1_length) < 0)
175
return -1;
176
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false,
177
- PREALLOC_MODE_OFF, NULL);
178
+ PREALLOC_MODE_OFF, 0, NULL);
179
if (ret < 0)
180
return ret;
181
182
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/block/qcow2-refcount.c
185
+++ b/block/qcow2-refcount.c
186
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
187
}
188
189
ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
190
- PREALLOC_MODE_OFF, &local_err);
191
+ PREALLOC_MODE_OFF, 0, &local_err);
192
if (ret < 0) {
193
error_report_err(local_err);
194
goto resize_fail;
195
diff --git a/block/qcow2.c b/block/qcow2.c
196
index XXXXXXX..XXXXXXX 100644
197
--- a/block/qcow2.c
198
+++ b/block/qcow2.c
199
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
200
mode = PREALLOC_MODE_OFF;
201
}
202
ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
203
- mode, errp);
204
+ mode, 0, errp);
205
if (ret < 0) {
206
return ret;
207
}
208
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
209
* always fulfilled, so there is no need to pass it on.)
210
*/
211
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
212
- false, PREALLOC_MODE_OFF, &local_err);
213
+ false, PREALLOC_MODE_OFF, 0, &local_err);
214
if (local_err) {
215
warn_reportf_err(local_err,
216
"Failed to truncate the tail of the image: ");
217
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
218
* file should be resized to the exact target size, too,
219
* so we pass @exact here.
220
*/
221
- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
222
+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
223
+ errp);
224
if (ret < 0) {
225
goto fail;
226
}
227
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
228
new_file_size = allocation_start +
229
nb_new_data_clusters * s->cluster_size;
230
/* Image file grows, so @exact does not matter */
231
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
232
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
233
+ errp);
234
if (ret < 0) {
235
error_prepend(errp, "Failed to resize underlying file: ");
236
qcow2_free_clusters(bs, allocation_start,
237
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
238
if (len < 0) {
239
return len;
240
}
241
- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
242
+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
243
+ NULL);
244
}
245
246
if (offset_into_cluster(s, offset)) {
247
@@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs)
248
}
249
250
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
251
- PREALLOC_MODE_OFF, &local_err);
252
+ PREALLOC_MODE_OFF, 0, &local_err);
253
if (ret < 0) {
254
error_report_err(local_err);
255
goto fail;
256
diff --git a/block/raw-format.c b/block/raw-format.c
257
index XXXXXXX..XXXXXXX 100644
258
--- a/block/raw-format.c
259
+++ b/block/raw-format.c
260
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
261
262
s->size = offset;
263
offset += s->offset;
264
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
265
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
266
}
267
268
static void raw_eject(BlockDriverState *bs, bool eject_flag)
269
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
270
index XXXXXXX..XXXXXXX 100644
271
--- a/block/vhdx-log.c
272
+++ b/block/vhdx-log.c
273
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
274
goto exit;
275
}
276
ret = bdrv_truncate(bs->file, new_file_size, false,
277
- PREALLOC_MODE_OFF, NULL);
278
+ PREALLOC_MODE_OFF, 0, NULL);
279
if (ret < 0) {
280
goto exit;
281
}
282
diff --git a/block/vhdx.c b/block/vhdx.c
283
index XXXXXXX..XXXXXXX 100644
284
--- a/block/vhdx.c
285
+++ b/block/vhdx.c
286
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
287
}
288
289
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
290
- PREALLOC_MODE_OFF, NULL);
291
+ PREALLOC_MODE_OFF, 0, NULL);
292
}
293
294
/*
295
diff --git a/block/vmdk.c b/block/vmdk.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/block/vmdk.c
298
+++ b/block/vmdk.c
299
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
300
}
301
length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
302
ret = bdrv_truncate(s->extents[i].file, length, false,
303
- PREALLOC_MODE_OFF, NULL);
304
+ PREALLOC_MODE_OFF, 0, NULL);
305
if (ret < 0) {
306
return ret;
307
}
308
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
309
index XXXXXXX..XXXXXXX 100644
310
--- a/tests/test-block-iothread.c
311
+++ b/tests/test-block-iothread.c
312
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c)
313
int ret;
314
315
/* Normal success path */
316
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
317
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
318
g_assert_cmpint(ret, ==, 0);
319
320
/* Early error: Negative offset */
321
- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL);
322
+ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL);
323
g_assert_cmpint(ret, ==, -EINVAL);
324
325
/* Error: Read-only image */
326
c->bs->read_only = true;
327
c->bs->open_flags &= ~BDRV_O_RDWR;
328
329
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
330
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
331
g_assert_cmpint(ret, ==, -EACCES);
332
333
c->bs->read_only = false;
93
--
334
--
94
2.13.6
335
2.25.3
95
336
96
337
diff view generated by jsdifflib
1
We already requested that block jobs be paused in .bdrv_drained_begin,
1
Now that node level interface bdrv_truncate() supports passing request
2
but no guarantee was made that the job was actually inactive at the
2
flags to the block driver, expose this on the BlockBackend level, too.
3
point where bdrv_drained_begin() returned.
3
4
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
This introduces a new callback BdrvChildRole.bdrv_drained_poll() and
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
uses it to make bdrv_drain_poll() consider block jobs using the node to
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
be drained.
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
8
Message-Id: <20200424125448.63318-4-kwolf@redhat.com>
9
For the test case to work as expected, we have to switch from
10
block_job_sleep_ns() to qemu_co_sleep_ns() so that the test job is even
11
considered active and must be waited for when draining the node.
12
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
10
---
15
include/block/block.h | 8 ++++++++
11
include/sysemu/block-backend.h | 2 +-
16
include/block/block_int.h | 7 +++++++
12
block.c | 3 ++-
17
include/block/blockjob_int.h | 8 ++++++++
13
block/block-backend.c | 4 ++--
18
block.c | 9 +++++++++
14
block/commit.c | 4 ++--
19
block/io.c | 40 ++++++++++++++++++++++++++++++++++------
15
block/crypto.c | 2 +-
20
block/mirror.c | 8 ++++++++
16
block/mirror.c | 2 +-
21
blockjob.c | 23 +++++++++++++++++++++++
17
block/qcow2.c | 4 ++--
22
tests/test-bdrv-drain.c | 18 ++++++++++--------
18
block/qed.c | 2 +-
23
8 files changed, 107 insertions(+), 14 deletions(-)
19
block/vdi.c | 2 +-
24
20
block/vhdx.c | 4 ++--
25
diff --git a/include/block/block.h b/include/block/block.h
21
block/vmdk.c | 6 +++---
26
index XXXXXXX..XXXXXXX 100644
22
block/vpc.c | 2 +-
27
--- a/include/block/block.h
23
blockdev.c | 2 +-
28
+++ b/include/block/block.h
24
qemu-img.c | 2 +-
29
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
25
qemu-io-cmds.c | 2 +-
30
void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
26
15 files changed, 22 insertions(+), 21 deletions(-)
31
27
32
/**
28
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
33
+ * bdrv_drain_poll:
29
index XXXXXXX..XXXXXXX 100644
34
+ *
30
--- a/include/sysemu/block-backend.h
35
+ * Poll for pending requests in @bs and its parents (except for
31
+++ b/include/sysemu/block-backend.h
36
+ * @ignore_parent). This is part of bdrv_drained_begin.
32
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
37
+ */
33
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
38
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent);
34
int bytes);
39
+
35
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
40
+/**
36
- PreallocMode prealloc, Error **errp);
41
* bdrv_drained_begin:
37
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
42
*
38
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
43
* Begin a quiesced section for exclusive access to the BDS, by disabling
39
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
44
diff --git a/include/block/block_int.h b/include/block/block_int.h
40
int64_t pos, int size);
45
index XXXXXXX..XXXXXXX 100644
46
--- a/include/block/block_int.h
47
+++ b/include/block/block_int.h
48
@@ -XXX,XX +XXX,XX @@ struct BdrvChildRole {
49
void (*drained_begin)(BdrvChild *child);
50
void (*drained_end)(BdrvChild *child);
51
52
+ /*
53
+ * Returns whether the parent has pending requests for the child. This
54
+ * callback is polled after .drained_begin() has been called until all
55
+ * activity on the child has stopped.
56
+ */
57
+ bool (*drained_poll)(BdrvChild *child);
58
+
59
/* Notifies the parent that the child has been activated/inactivated (e.g.
60
* when migration is completing) and it can start/stop requesting
61
* permissions and doing I/O on it. */
62
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
63
index XXXXXXX..XXXXXXX 100644
64
--- a/include/block/blockjob_int.h
65
+++ b/include/block/blockjob_int.h
66
@@ -XXX,XX +XXX,XX @@ struct BlockJobDriver {
67
JobDriver job_driver;
68
69
/*
70
+ * Returns whether the job has pending requests for the child or will
71
+ * submit new requests before the next pause point. This callback is polled
72
+ * in the context of draining a job node after requesting that the job be
73
+ * paused, until all activity on the child has stopped.
74
+ */
75
+ bool (*drained_poll)(BlockJob *job);
76
+
77
+ /*
78
* If the callback is not NULL, it will be invoked before the job is
79
* resumed in a new AioContext. This is the place to move any resources
80
* besides job->blk to the new AioContext.
81
diff --git a/block.c b/block.c
41
diff --git a/block.c b/block.c
82
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
83
--- a/block.c
43
--- a/block.c
84
+++ b/block.c
44
+++ b/block.c
85
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
45
@@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
86
bdrv_drained_begin(bs);
46
int64_t size;
47
int ret;
48
49
- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err);
50
+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
51
+ &local_err);
52
if (ret < 0 && ret != -ENOTSUP) {
53
error_propagate(errp, local_err);
54
return ret;
55
diff --git a/block/block-backend.c b/block/block-backend.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/block/block-backend.c
58
+++ b/block/block-backend.c
59
@@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
87
}
60
}
88
61
89
+static bool bdrv_child_cb_drained_poll(BdrvChild *child)
62
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
90
+{
63
- PreallocMode prealloc, Error **errp)
91
+ BlockDriverState *bs = child->opaque;
64
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
92
+ return bdrv_drain_poll(bs, NULL);
93
+}
94
+
95
static void bdrv_child_cb_drained_end(BdrvChild *child)
96
{
65
{
97
BlockDriverState *bs = child->opaque;
66
if (!blk_is_available(blk)) {
98
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = {
67
error_setg(errp, "No medium inserted");
99
.get_parent_desc = bdrv_child_get_parent_desc,
68
return -ENOMEDIUM;
100
.inherit_options = bdrv_inherited_options,
69
}
101
.drained_begin = bdrv_child_cb_drained_begin,
70
102
+ .drained_poll = bdrv_child_cb_drained_poll,
71
- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
103
.drained_end = bdrv_child_cb_drained_end,
72
+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
104
.attach = bdrv_child_cb_attach,
105
.detach = bdrv_child_cb_detach,
106
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = {
107
.get_parent_desc = bdrv_child_get_parent_desc,
108
.inherit_options = bdrv_inherited_fmt_options,
109
.drained_begin = bdrv_child_cb_drained_begin,
110
+ .drained_poll = bdrv_child_cb_drained_poll,
111
.drained_end = bdrv_child_cb_drained_end,
112
.attach = bdrv_child_cb_attach,
113
.detach = bdrv_child_cb_detach,
114
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_backing = {
115
.detach = bdrv_backing_detach,
116
.inherit_options = bdrv_backing_options,
117
.drained_begin = bdrv_child_cb_drained_begin,
118
+ .drained_poll = bdrv_child_cb_drained_poll,
119
.drained_end = bdrv_child_cb_drained_end,
120
.inactivate = bdrv_child_cb_inactivate,
121
.update_filename = bdrv_backing_update_filename,
122
diff --git a/block/io.c b/block/io.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/block/io.c
125
+++ b/block/io.c
126
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
127
}
128
}
73
}
129
74
130
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore)
75
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
131
+{
76
diff --git a/block/commit.c b/block/commit.c
132
+ BdrvChild *c, *next;
77
index XXXXXXX..XXXXXXX 100644
133
+ bool busy = false;
78
--- a/block/commit.c
134
+
79
+++ b/block/commit.c
135
+ QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
80
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp)
136
+ if (c == ignore) {
81
}
137
+ continue;
82
138
+ }
83
if (base_len < len) {
139
+ if (c->role->drained_poll) {
84
- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL);
140
+ busy |= c->role->drained_poll(c);
85
+ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
141
+ }
86
if (ret) {
142
+ }
87
goto out;
143
+
88
}
144
+ return busy;
89
@@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs)
145
+}
90
* grow the backing file image if possible. If not possible,
146
+
91
* we must return an error */
147
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
92
if (length > backing_length) {
148
{
93
- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF,
149
dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
94
+ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
150
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
95
&local_err);
96
if (ret < 0) {
97
error_report_err(local_err);
98
diff --git a/block/crypto.c b/block/crypto.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/crypto.c
101
+++ b/block/crypto.c
102
@@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block,
103
* which will be used by the crypto header
104
*/
105
return blk_truncate(data->blk, data->size + headerlen, false,
106
- data->prealloc, errp);
107
+ data->prealloc, 0, errp);
151
}
108
}
152
109
153
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
110
154
-static bool bdrv_drain_poll(BlockDriverState *bs)
155
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent)
156
+{
157
+ if (bdrv_parent_drained_poll(bs, ignore_parent)) {
158
+ return true;
159
+ }
160
+
161
+ return atomic_read(&bs->in_flight);
162
+}
163
+
164
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
165
+ BdrvChild *ignore_parent)
166
{
167
/* Execute pending BHs first and check everything else only after the BHs
168
* have executed. */
169
while (aio_poll(bs->aio_context, false));
170
- return atomic_read(&bs->in_flight);
171
+
172
+ return bdrv_drain_poll(bs, ignore_parent);
173
}
174
175
-static bool bdrv_drain_recurse(BlockDriverState *bs)
176
+static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent)
177
{
178
BdrvChild *child, *tmp;
179
bool waited;
180
181
/* Wait for drained requests to finish */
182
- waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs));
183
+ waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
184
185
QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
186
BlockDriverState *bs = child->bs;
187
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
188
*/
189
bdrv_ref(bs);
190
}
191
- waited |= bdrv_drain_recurse(bs);
192
+ waited |= bdrv_drain_recurse(bs, child);
193
if (in_main_loop) {
194
bdrv_unref(bs);
195
}
196
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
197
198
bdrv_parent_drained_begin(bs, parent);
199
bdrv_drain_invoke(bs, true);
200
- bdrv_drain_recurse(bs);
201
+ bdrv_drain_recurse(bs, parent);
202
203
if (recursive) {
204
bs->recursive_quiesce_counter++;
205
diff --git a/block/mirror.c b/block/mirror.c
111
diff --git a/block/mirror.c b/block/mirror.c
206
index XXXXXXX..XXXXXXX 100644
112
index XXXXXXX..XXXXXXX 100644
207
--- a/block/mirror.c
113
--- a/block/mirror.c
208
+++ b/block/mirror.c
114
+++ b/block/mirror.c
209
@@ -XXX,XX +XXX,XX @@ static void mirror_pause(Job *job)
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
210
mirror_wait_for_all_io(s);
116
211
}
117
if (s->bdev_length > base_length) {
212
118
ret = blk_truncate(s->target, s->bdev_length, false,
213
+static bool mirror_drained_poll(BlockJob *job)
119
- PREALLOC_MODE_OFF, NULL);
214
+{
120
+ PREALLOC_MODE_OFF, 0, NULL);
215
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
121
if (ret < 0) {
216
+ return !!s->in_flight;
122
goto immediate_exit;
217
+}
123
}
218
+
124
diff --git a/block/qcow2.c b/block/qcow2.c
219
static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
125
index XXXXXXX..XXXXXXX 100644
220
{
126
--- a/block/qcow2.c
221
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
127
+++ b/block/qcow2.c
222
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver mirror_job_driver = {
128
@@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
223
.pause = mirror_pause,
129
224
.complete = mirror_complete,
130
/* Okay, now that we have a valid image, let's give it the right size */
225
},
131
ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
226
+ .drained_poll = mirror_drained_poll,
132
- errp);
227
.attached_aio_context = mirror_attached_aio_context,
133
+ 0, errp);
228
.drain = mirror_drain,
134
if (ret < 0) {
229
};
135
error_prepend(errp, "Could not resize image: ");
230
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = {
136
goto out;
231
.pause = mirror_pause,
137
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
232
.complete = mirror_complete,
138
* Amending image options should ensure that the image has
233
},
139
* exactly the given new values, so pass exact=true here.
234
+ .drained_poll = mirror_drained_poll,
140
*/
235
.attached_aio_context = mirror_attached_aio_context,
141
- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
236
.drain = mirror_drain,
142
+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
237
};
143
blk_unref(blk);
238
diff --git a/blockjob.c b/blockjob.c
144
if (ret < 0) {
239
index XXXXXXX..XXXXXXX 100644
145
return ret;
240
--- a/blockjob.c
146
diff --git a/block/qed.c b/block/qed.c
241
+++ b/blockjob.c
147
index XXXXXXX..XXXXXXX 100644
242
@@ -XXX,XX +XXX,XX @@ static void child_job_drained_begin(BdrvChild *c)
148
--- a/block/qed.c
243
job_pause(&job->job);
149
+++ b/block/qed.c
244
}
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
245
151
* The QED format associates file length with allocation status,
246
+static bool child_job_drained_poll(BdrvChild *c)
152
* so a new file (which is empty) must have a length of 0.
247
+{
153
*/
248
+ BlockJob *bjob = c->opaque;
154
- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp);
249
+ Job *job = &bjob->job;
155
+ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
250
+ const BlockJobDriver *drv = block_job_driver(bjob);
156
if (ret < 0) {
251
+
157
goto out;
252
+ /* An inactive or completed job doesn't have any pending requests. Jobs
158
}
253
+ * with !job->busy are either already paused or have a pause point after
159
diff --git a/block/vdi.c b/block/vdi.c
254
+ * being reentered, so no job driver code will run before they pause. */
160
index XXXXXXX..XXXXXXX 100644
255
+ if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
161
--- a/block/vdi.c
256
+ return false;
162
+++ b/block/vdi.c
257
+ }
163
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
258
+
164
259
+ /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
165
if (image_type == VDI_TYPE_STATIC) {
260
+ * override this assumption. */
166
ret = blk_truncate(blk, offset + blocks * block_size, false,
261
+ if (drv->drained_poll) {
167
- PREALLOC_MODE_OFF, errp);
262
+ return drv->drained_poll(bjob);
168
+ PREALLOC_MODE_OFF, 0, errp);
263
+ } else {
169
if (ret < 0) {
264
+ return true;
170
error_prepend(errp, "Failed to statically allocate file");
265
+ }
171
goto exit;
266
+}
172
diff --git a/block/vhdx.c b/block/vhdx.c
267
+
173
index XXXXXXX..XXXXXXX 100644
268
static void child_job_drained_end(BdrvChild *c)
174
--- a/block/vhdx.c
269
{
175
+++ b/block/vhdx.c
270
BlockJob *job = c->opaque;
176
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
271
@@ -XXX,XX +XXX,XX @@ static void child_job_drained_end(BdrvChild *c)
177
/* All zeroes, so we can just extend the file - the end of the BAT
272
static const BdrvChildRole child_job = {
178
* is the furthest thing we have written yet */
273
.get_parent_desc = child_job_get_parent_desc,
179
ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
274
.drained_begin = child_job_drained_begin,
180
- errp);
275
+ .drained_poll = child_job_drained_poll,
181
+ 0, errp);
276
.drained_end = child_job_drained_end,
182
if (ret < 0) {
277
.stay_at_node = true,
183
goto exit;
278
};
184
}
279
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
185
} else if (type == VHDX_TYPE_FIXED) {
280
index XXXXXXX..XXXXXXX 100644
186
ret = blk_truncate(blk, data_file_offset + image_size, false,
281
--- a/tests/test-bdrv-drain.c
187
- PREALLOC_MODE_OFF, errp);
282
+++ b/tests/test-bdrv-drain.c
188
+ PREALLOC_MODE_OFF, 0, errp);
283
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn test_job_start(void *opaque)
189
if (ret < 0) {
284
190
goto exit;
285
job_transition_to_ready(&s->common.job);
191
}
286
while (!s->should_complete) {
192
diff --git a/block/vmdk.c b/block/vmdk.c
287
- job_sleep_ns(&s->common.job, 100000);
193
index XXXXXXX..XXXXXXX 100644
288
+ /* Avoid block_job_sleep_ns() because it marks the job as !busy. We
194
--- a/block/vmdk.c
289
+ * want to emulate some actual activity (probably some I/O) here so
195
+++ b/block/vmdk.c
290
+ * that drain has to wait for this acitivity to stop. */
196
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
291
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
197
int gd_buf_size;
292
+ job_pause_point(&s->common.job);
198
293
}
199
if (flat) {
294
200
- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp);
295
job_defer_to_main_loop(&s->common.job, test_job_completed, NULL);
201
+ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
296
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
202
goto exit;
297
203
}
298
g_assert_cmpint(job->job.pause_count, ==, 0);
204
magic = cpu_to_be32(VMDK4_MAGIC);
299
g_assert_false(job->job.paused);
205
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
300
- g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
206
}
301
+ g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
207
302
208
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
303
do_drain_begin(drain_type, src);
209
- PREALLOC_MODE_OFF, errp);
304
210
+ PREALLOC_MODE_OFF, 0, errp);
305
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
211
if (ret < 0) {
212
goto exit;
213
}
214
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
215
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
216
* for description file */
217
if (desc_offset == 0) {
218
- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp);
219
+ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
220
if (ret < 0) {
221
goto exit;
222
}
223
diff --git a/block/vpc.c b/block/vpc.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/block/vpc.c
226
+++ b/block/vpc.c
227
@@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
228
/* Add footer to total size */
229
total_size += HEADER_SIZE;
230
231
- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp);
232
+ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
233
if (ret < 0) {
234
return ret;
235
}
236
diff --git a/blockdev.c b/blockdev.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/blockdev.c
239
+++ b/blockdev.c
240
@@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device,
241
}
242
243
bdrv_drained_begin(bs);
244
- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp);
245
+ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
246
bdrv_drained_end(bs);
247
248
out:
249
diff --git a/qemu-img.c b/qemu-img.c
250
index XXXXXXX..XXXXXXX 100644
251
--- a/qemu-img.c
252
+++ b/qemu-img.c
253
@@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv)
254
* resizing, so pass @exact=true. It is of no use to report
255
* success when the image has not actually been resized.
256
*/
257
- ret = blk_truncate(blk, total_size, true, prealloc, &err);
258
+ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
259
if (!ret) {
260
qprintf(quiet, "Image resized.\n");
306
} else {
261
} else {
307
g_assert_cmpint(job->job.pause_count, ==, 1);
262
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
308
}
263
index XXXXXXX..XXXXXXX 100644
309
- /* XXX We don't wait until the job is actually paused. Is this okay? */
264
--- a/qemu-io-cmds.c
310
- /* g_assert_true(job->job.paused); */
265
+++ b/qemu-io-cmds.c
311
+ g_assert_true(job->job.paused);
266
@@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
312
g_assert_false(job->job.busy); /* The job is paused */
267
* exact=true. It is better to err on the "emit more errors" side
313
268
* than to be overly permissive.
314
do_drain_end(drain_type, src);
269
*/
315
270
- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err);
316
g_assert_cmpint(job->job.pause_count, ==, 0);
271
+ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err);
317
g_assert_false(job->job.paused);
272
if (ret < 0) {
318
- g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
273
error_report_err(local_err);
319
+ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
274
return ret;
320
321
do_drain_begin(drain_type, target);
322
323
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
324
} else {
325
g_assert_cmpint(job->job.pause_count, ==, 1);
326
}
327
- /* XXX We don't wait until the job is actually paused. Is this okay? */
328
- /* g_assert_true(job->job.paused); */
329
+ g_assert_true(job->job.paused);
330
g_assert_false(job->job.busy); /* The job is paused */
331
332
do_drain_end(drain_type, target);
333
334
g_assert_cmpint(job->job.pause_count, ==, 0);
335
g_assert_false(job->job.paused);
336
- g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
337
+ g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
338
339
ret = job_complete_sync(&job->job, &error_abort);
340
g_assert_cmpint(ret, ==, 0);
341
--
275
--
342
2.13.6
276
2.25.3
343
277
344
278
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling
2
qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't
3
undo any previous preallocation, but just adds the zero flag to all
4
relevant L2 entries. If an external data file is in use, a write_zeroes
5
request to the data file is made instead.
2
6
3
This patch implements active synchronous mirroring. In active mode, the
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
passive mechanism will still be in place and is used to copy all
8
Message-Id: <20200424125448.63318-5-kwolf@redhat.com>
5
initially dirty clusters off the source disk; but every write request
9
Reviewed-by: Eric Blake <eblake@redhat.com>
6
will write data both to the source and the target disk, so the source
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
cannot be dirtied faster than data is mirrored to the target. Also,
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
once the block job has converged (BLOCK_JOB_READY sent), source and
12
---
9
target are guaranteed to stay in sync (unless an error occurs).
13
block/qcow2-cluster.c | 2 +-
14
block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 35 insertions(+), 1 deletion(-)
10
16
11
Active mode is completely optional and currently disabled at runtime. A
17
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
12
later patch will add a way for users to enable it.
13
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Message-id: 20180613181823.13618-13-mreitz@redhat.com
17
Signed-off-by: Max Reitz <mreitz@redhat.com>
18
---
19
qapi/block-core.json | 18 ++++
20
block/mirror.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++-
21
2 files changed, 265 insertions(+), 5 deletions(-)
22
23
diff --git a/qapi/block-core.json b/qapi/block-core.json
24
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
25
--- a/qapi/block-core.json
19
--- a/block/qcow2-cluster.c
26
+++ b/qapi/block-core.json
20
+++ b/block/qcow2-cluster.c
27
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
28
'data': ['top', 'full', 'none', 'incremental'] }
22
/* Caller must pass aligned values, except at image end */
29
23
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
30
##
24
assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
31
+# @MirrorCopyMode:
25
- end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
32
+#
26
+ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS);
33
+# An enumeration whose values tell the mirror block job when to
27
34
+# trigger writes to the target.
28
/* The zero flag is only supported by version 3 and newer */
35
+#
29
if (s->qcow_version < 3) {
36
+# @background: copy data in background only.
30
diff --git a/block/qcow2.c b/block/qcow2.c
37
+#
31
index XXXXXXX..XXXXXXX 100644
38
+# @write-blocking: when data is written to the source, write it
32
--- a/block/qcow2.c
39
+# (synchronously) to the target as well. In
33
+++ b/block/qcow2.c
40
+# addition, data is copied in background just like in
34
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
41
+# @background mode.
35
42
+#
36
bs->supported_zero_flags = header.version >= 3 ?
43
+# Since: 3.0
37
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
44
+##
38
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
45
+{ 'enum': 'MirrorCopyMode',
39
46
+ 'data': ['background', 'write-blocking'] }
40
/* Repair image if dirty */
41
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
43
g_assert_not_reached();
44
}
45
46
+ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
47
+ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size);
47
+
48
+
48
+##
49
+ /*
49
# @BlockJobInfo:
50
+ * Use zero clusters as much as we can. qcow2_cluster_zeroize()
50
#
51
+ * requires a cluster-aligned start. The end may be unaligned if it is
51
# Information about a long-running block device operation.
52
+ * at the end of the image (which it is here).
52
diff --git a/block/mirror.c b/block/mirror.c
53
+ */
53
index XXXXXXX..XXXXXXX 100644
54
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
54
--- a/block/mirror.c
55
+ if (ret < 0) {
55
+++ b/block/mirror.c
56
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
56
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
57
+ goto fail;
57
Error *replace_blocker;
58
bool is_none_mode;
59
BlockMirrorBackingMode backing_mode;
60
+ MirrorCopyMode copy_mode;
61
BlockdevOnError on_source_error, on_target_error;
62
bool synced;
63
+ /* Set when the target is synced (dirty bitmap is clean, nothing
64
+ * in flight) and the job is running in active mode */
65
+ bool actively_synced;
66
bool should_complete;
67
int64_t granularity;
68
size_t buf_size;
69
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
70
int target_cluster_size;
71
int max_iov;
72
bool initial_zeroing_ongoing;
73
+ int in_active_write_counter;
74
} MirrorBlockJob;
75
76
typedef struct MirrorBDSOpaque {
77
@@ -XXX,XX +XXX,XX @@ struct MirrorOp {
78
int64_t *bytes_handled;
79
80
bool is_pseudo_op;
81
+ bool is_active_write;
82
CoQueue waiting_requests;
83
84
QTAILQ_ENTRY(MirrorOp) next;
85
@@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
86
int error)
87
{
88
s->synced = false;
89
+ s->actively_synced = false;
90
if (read) {
91
return block_job_error_action(&s->common, s->on_source_error,
92
true, error);
93
@@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
94
return ret;
95
}
96
97
-static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
98
+static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
99
{
100
MirrorOp *op;
101
102
@@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
103
* caller of this function. Since there is only one pseudo op
104
* at any given time, we will always find some real operation
105
* to wait on. */
106
- if (!op->is_pseudo_op) {
107
+ if (!op->is_pseudo_op && op->is_active_write == active) {
108
qemu_co_queue_wait(&op->waiting_requests, NULL);
109
return;
110
}
111
@@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
112
abort();
113
}
114
115
+static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
116
+{
117
+ /* Only non-active operations use up in-flight slots */
118
+ mirror_wait_for_any_operation(s, false);
119
+}
120
+
121
/* Perform a mirror copy operation.
122
*
123
* *op->bytes_handled is set to the number of bytes copied after and
124
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque)
125
/* Transition to the READY state and wait for complete. */
126
job_transition_to_ready(&s->common.job);
127
s->synced = true;
128
+ s->actively_synced = true;
129
while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
130
job_yield(&s->common.job);
131
}
132
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque)
133
int64_t cnt, delta;
134
bool should_complete;
135
136
+ /* Do not start passive operations while there are active
137
+ * writes in progress */
138
+ while (s->in_active_write_counter) {
139
+ mirror_wait_for_any_operation(s, true);
140
+ }
58
+ }
141
+
59
+
142
if (s->ret < 0) {
60
+ /* Write explicit zeros for the unaligned head */
143
ret = s->ret;
61
+ if (zero_start > old_length) {
144
goto immediate_exit;
62
+ uint64_t len = zero_start - old_length;
145
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque)
63
+ uint8_t *buf = qemu_blockalign0(bs, len);
146
*/
64
+ QEMUIOVector qiov;
147
job_transition_to_ready(&s->common.job);
65
+ qemu_iovec_init_buf(&qiov, buf, len);
148
s->synced = true;
149
+ if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
150
+ s->actively_synced = true;
151
+ }
152
}
153
154
should_complete = s->should_complete ||
155
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = {
156
.drain = mirror_drain,
157
};
158
159
+static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
160
+ uint64_t offset, uint64_t bytes,
161
+ QEMUIOVector *qiov, int flags)
162
+{
163
+ BdrvDirtyBitmapIter *iter;
164
+ QEMUIOVector target_qiov;
165
+ uint64_t dirty_offset;
166
+ int dirty_bytes;
167
+
66
+
168
+ if (qiov) {
67
+ qemu_co_mutex_unlock(&s->lock);
169
+ qemu_iovec_init(&target_qiov, qiov->niov);
68
+ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
170
+ }
69
+ qemu_co_mutex_lock(&s->lock);
171
+
70
+
172
+ iter = bdrv_dirty_iter_new(job->dirty_bitmap);
71
+ qemu_vfree(buf);
173
+ bdrv_set_dirty_iter(iter, offset);
72
+ if (ret < 0) {
174
+
73
+ error_setg_errno(errp, -ret, "Failed to zero out the new area");
175
+ while (true) {
74
+ goto fail;
176
+ bool valid_area;
177
+ int ret;
178
+
179
+ bdrv_dirty_bitmap_lock(job->dirty_bitmap);
180
+ valid_area = bdrv_dirty_iter_next_area(iter, offset + bytes,
181
+ &dirty_offset, &dirty_bytes);
182
+ if (!valid_area) {
183
+ bdrv_dirty_bitmap_unlock(job->dirty_bitmap);
184
+ break;
185
+ }
186
+
187
+ bdrv_reset_dirty_bitmap_locked(job->dirty_bitmap,
188
+ dirty_offset, dirty_bytes);
189
+ bdrv_dirty_bitmap_unlock(job->dirty_bitmap);
190
+
191
+ job_progress_increase_remaining(&job->common.job, dirty_bytes);
192
+
193
+ assert(dirty_offset - offset <= SIZE_MAX);
194
+ if (qiov) {
195
+ qemu_iovec_reset(&target_qiov);
196
+ qemu_iovec_concat(&target_qiov, qiov,
197
+ dirty_offset - offset, dirty_bytes);
198
+ }
199
+
200
+ switch (method) {
201
+ case MIRROR_METHOD_COPY:
202
+ ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes,
203
+ qiov ? &target_qiov : NULL, flags);
204
+ break;
205
+
206
+ case MIRROR_METHOD_ZERO:
207
+ assert(!qiov);
208
+ ret = blk_co_pwrite_zeroes(job->target, dirty_offset, dirty_bytes,
209
+ flags);
210
+ break;
211
+
212
+ case MIRROR_METHOD_DISCARD:
213
+ assert(!qiov);
214
+ ret = blk_co_pdiscard(job->target, dirty_offset, dirty_bytes);
215
+ break;
216
+
217
+ default:
218
+ abort();
219
+ }
220
+
221
+ if (ret >= 0) {
222
+ job_progress_update(&job->common.job, dirty_bytes);
223
+ } else {
224
+ BlockErrorAction action;
225
+
226
+ bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_offset, dirty_bytes);
227
+ job->actively_synced = false;
228
+
229
+ action = mirror_error_action(job, false, -ret);
230
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
231
+ if (!job->ret) {
232
+ job->ret = ret;
233
+ }
234
+ break;
235
+ }
75
+ }
236
+ }
76
+ }
237
+ }
77
+ }
238
+
78
+
239
+ bdrv_dirty_iter_free(iter);
79
if (prealloc != PREALLOC_MODE_OFF) {
240
+ if (qiov) {
80
/* Flush metadata before actually changing the image size */
241
+ qemu_iovec_destroy(&target_qiov);
81
ret = qcow2_write_caches(bs);
242
+ }
243
+}
244
+
245
+static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
246
+ uint64_t offset,
247
+ uint64_t bytes)
248
+{
249
+ MirrorOp *op;
250
+ uint64_t start_chunk = offset / s->granularity;
251
+ uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
252
+
253
+ op = g_new(MirrorOp, 1);
254
+ *op = (MirrorOp){
255
+ .s = s,
256
+ .offset = offset,
257
+ .bytes = bytes,
258
+ .is_active_write = true,
259
+ };
260
+ qemu_co_queue_init(&op->waiting_requests);
261
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
262
+
263
+ s->in_active_write_counter++;
264
+
265
+ mirror_wait_on_conflicts(op, s, offset, bytes);
266
+
267
+ bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
268
+
269
+ return op;
270
+}
271
+
272
+static void coroutine_fn active_write_settle(MirrorOp *op)
273
+{
274
+ uint64_t start_chunk = op->offset / op->s->granularity;
275
+ uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
276
+ op->s->granularity);
277
+
278
+ if (!--op->s->in_active_write_counter && op->s->actively_synced) {
279
+ BdrvChild *source = op->s->mirror_top_bs->backing;
280
+
281
+ if (QLIST_FIRST(&source->bs->parents) == source &&
282
+ QLIST_NEXT(source, next_parent) == NULL)
283
+ {
284
+ /* Assert that we are back in sync once all active write
285
+ * operations are settled.
286
+ * Note that we can only assert this if the mirror node
287
+ * is the source node's only parent. */
288
+ assert(!bdrv_get_dirty_count(op->s->dirty_bitmap));
289
+ }
290
+ }
291
+ bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
292
+ QTAILQ_REMOVE(&op->s->ops_in_flight, op, next);
293
+ qemu_co_queue_restart_all(&op->waiting_requests);
294
+ g_free(op);
295
+}
296
+
297
static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
298
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
299
{
300
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
301
}
302
303
+static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
304
+ MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
305
+ int flags)
306
+{
307
+ MirrorOp *op = NULL;
308
+ MirrorBDSOpaque *s = bs->opaque;
309
+ int ret = 0;
310
+ bool copy_to_target;
311
+
312
+ copy_to_target = s->job->ret >= 0 &&
313
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
314
+
315
+ if (copy_to_target) {
316
+ op = active_write_prepare(s->job, offset, bytes);
317
+ }
318
+
319
+ switch (method) {
320
+ case MIRROR_METHOD_COPY:
321
+ ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
322
+ break;
323
+
324
+ case MIRROR_METHOD_ZERO:
325
+ ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
326
+ break;
327
+
328
+ case MIRROR_METHOD_DISCARD:
329
+ ret = bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
330
+ break;
331
+
332
+ default:
333
+ abort();
334
+ }
335
+
336
+ if (ret < 0) {
337
+ goto out;
338
+ }
339
+
340
+ if (copy_to_target) {
341
+ do_sync_target_write(s->job, method, offset, bytes, qiov, flags);
342
+ }
343
+
344
+out:
345
+ if (copy_to_target) {
346
+ active_write_settle(op);
347
+ }
348
+ return ret;
349
+}
350
+
351
static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
352
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
353
{
354
- return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
355
+ MirrorBDSOpaque *s = bs->opaque;
356
+ QEMUIOVector bounce_qiov;
357
+ void *bounce_buf;
358
+ int ret = 0;
359
+ bool copy_to_target;
360
+
361
+ copy_to_target = s->job->ret >= 0 &&
362
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
363
+
364
+ if (copy_to_target) {
365
+ /* The guest might concurrently modify the data to write; but
366
+ * the data on source and destination must match, so we have
367
+ * to use a bounce buffer if we are going to write to the
368
+ * target now. */
369
+ bounce_buf = qemu_blockalign(bs, bytes);
370
+ iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes);
371
+
372
+ qemu_iovec_init(&bounce_qiov, 1);
373
+ qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
374
+ qiov = &bounce_qiov;
375
+ }
376
+
377
+ ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
378
+ flags);
379
+
380
+ if (copy_to_target) {
381
+ qemu_iovec_destroy(&bounce_qiov);
382
+ qemu_vfree(bounce_buf);
383
+ }
384
+
385
+ return ret;
386
}
387
388
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
389
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
390
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
391
int64_t offset, int bytes, BdrvRequestFlags flags)
392
{
393
- return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
394
+ return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
395
+ flags);
396
}
397
398
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
399
int64_t offset, int bytes)
400
{
401
- return bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
402
+ return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
403
+ NULL, 0);
404
}
405
406
static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
407
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
408
s->on_target_error = on_target_error;
409
s->is_none_mode = is_none_mode;
410
s->backing_mode = backing_mode;
411
+ s->copy_mode = MIRROR_COPY_MODE_BACKGROUND;
412
s->base = base;
413
s->granularity = granularity;
414
s->buf_size = ROUND_UP(buf_size, granularity);
415
--
82
--
416
2.13.6
83
2.25.3
417
84
418
85
diff view generated by jsdifflib
1
For bdrv_drain(), recursively waiting for child node requests is
1
The raw format driver can simply forward the flag and let its bs->file
2
pointless because we didn't quiesce their parents, so new requests could
2
child take care of actually providing the zeros.
3
come in anyway. Letting the function work only on a single node makes it
4
more consistent.
5
6
For subtree drains and drain_all, we already have the recursion in
7
bdrv_do_drained_begin(), so the extra recursion doesn't add anything
8
either.
9
10
Remove the useless code.
11
3
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20200424125448.63318-6-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
10
---
15
block/io.c | 36 +++---------------------------------
11
block/raw-format.c | 4 +++-
16
1 file changed, 3 insertions(+), 33 deletions(-)
12
1 file changed, 3 insertions(+), 1 deletion(-)
17
13
18
diff --git a/block/io.c b/block/io.c
14
diff --git a/block/raw-format.c b/block/raw-format.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
16
--- a/block/raw-format.c
21
+++ b/block/io.c
17
+++ b/block/raw-format.c
22
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
23
return bdrv_drain_poll(bs, ignore_parent);
19
20
s->size = offset;
21
offset += s->offset;
22
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
23
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
24
}
24
}
25
25
26
-static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent)
26
static void raw_eject(BlockDriverState *bs, bool eject_flag)
27
-{
27
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
28
- BdrvChild *child, *tmp;
28
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
29
- bool waited;
29
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
30
-
30
bs->file->bs->supported_zero_flags);
31
- /* Wait for drained requests to finish */
31
+ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
32
- waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
32
+ BDRV_REQ_ZERO_WRITE;
33
-
33
34
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
34
if (bs->probed && !bdrv_is_read_only(bs)) {
35
- BlockDriverState *bs = child->bs;
35
bdrv_refresh_filename(bs->file->bs);
36
- bool in_main_loop =
37
- qemu_get_current_aio_context() == qemu_get_aio_context();
38
- assert(bs->refcnt > 0);
39
- if (in_main_loop) {
40
- /* In case the recursive bdrv_drain_recurse processes a
41
- * block_job_defer_to_main_loop BH and modifies the graph,
42
- * let's hold a reference to bs until we are done.
43
- *
44
- * IOThread doesn't have such a BH, and it is not safe to call
45
- * bdrv_unref without BQL, so skip doing it there.
46
- */
47
- bdrv_ref(bs);
48
- }
49
- waited |= bdrv_drain_recurse(bs, child);
50
- if (in_main_loop) {
51
- bdrv_unref(bs);
52
- }
53
- }
54
-
55
- return waited;
56
-}
57
-
58
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
59
BdrvChild *parent);
60
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
61
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
62
63
bdrv_parent_drained_begin(bs, parent);
64
bdrv_drain_invoke(bs, true);
65
- bdrv_drain_recurse(bs, parent);
66
+
67
+ /* Wait for drained requests to finish */
68
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
69
70
if (recursive) {
71
bs->recursive_quiesce_counter++;
72
--
36
--
73
2.13.6
37
2.25.3
74
38
75
39
diff view generated by jsdifflib
1
From: Greg Kurz <groug@kaod.org>
1
For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the
2
OS, so we can advertise the flag and just ignore it.
2
3
3
Removing a drive with drive_del while it is being used to run an I/O
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
intensive workload can cause QEMU to crash.
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
An AIO flush can yield at some point:
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
8
Message-Id: <20200424125448.63318-7-kwolf@redhat.com>
8
blk_aio_flush_entry()
9
blk_co_flush(blk)
10
bdrv_co_flush(blk->root->bs)
11
...
12
qemu_coroutine_yield()
13
14
and let the HMP command to run, free blk->root and give control
15
back to the AIO flush:
16
17
hmp_drive_del()
18
blk_remove_bs()
19
bdrv_root_unref_child(blk->root)
20
child_bs = blk->root->bs
21
bdrv_detach_child(blk->root)
22
bdrv_replace_child(blk->root, NULL)
23
blk->root->bs = NULL
24
g_free(blk->root) <============== blk->root becomes stale
25
bdrv_unref(child_bs)
26
bdrv_delete(child_bs)
27
bdrv_close()
28
bdrv_drained_begin()
29
bdrv_do_drained_begin()
30
bdrv_drain_recurse()
31
aio_poll()
32
...
33
qemu_coroutine_switch()
34
35
and the AIO flush completion ends up dereferencing blk->root:
36
37
blk_aio_complete()
38
scsi_aio_complete()
39
blk_get_aio_context(blk)
40
bs = blk_bs(blk)
41
ie, bs = blk->root ? blk->root->bs : NULL
42
^^^^^
43
stale
44
45
The problem is that we should avoid making block driver graph
46
changes while we have in-flight requests. Let's drain all I/O
47
for this BB before calling bdrv_root_unref_child().
48
49
Signed-off-by: Greg Kurz <groug@kaod.org>
50
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
51
---
10
---
52
block/block-backend.c | 5 +++++
11
block/file-posix.c | 4 ++++
53
1 file changed, 5 insertions(+)
12
1 file changed, 4 insertions(+)
54
13
55
diff --git a/block/block-backend.c b/block/block-backend.c
14
diff --git a/block/file-posix.c b/block/file-posix.c
56
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
57
--- a/block/block-backend.c
16
--- a/block/file-posix.c
58
+++ b/block/block-backend.c
17
+++ b/block/file-posix.c
59
@@ -XXX,XX +XXX,XX @@ void blk_remove_bs(BlockBackend *blk)
18
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
60
19
#endif
61
blk_update_root_state(blk);
20
62
21
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
63
+ /* bdrv_root_unref_child() will cause blk->root to become stale and may
22
+ if (S_ISREG(st.st_mode)) {
64
+ * switch to a completion coroutine later on. Let's drain all I/O here
23
+ /* When extending regular files, we get zeros from the OS */
65
+ * to avoid that and a potential QEMU crash.
24
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
66
+ */
25
+ }
67
+ blk_drain(blk);
26
ret = 0;
68
bdrv_root_unref_child(blk->root);
27
fail:
69
blk->root = NULL;
28
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
70
}
71
--
29
--
72
2.13.6
30
2.25.3
73
31
74
32
diff view generated by jsdifflib
1
All callers pass false for the 'recursive' parameter now. Remove it.
1
When extending the size of an image that has a backing file larger than
2
its old size, make sure that the backing file data doesn't become
3
visible in the guest, but the added area is properly zeroed out.
4
5
Consider the following scenario where the overlay is shorter than its
6
backing file:
7
8
base.qcow2: AAAAAAAA
9
overlay.qcow2: BBBB
10
11
When resizing (extending) overlay.qcow2, the new blocks should not stay
12
unallocated and make the additional As from base.qcow2 visible like
13
before this patch, but zeros should be read.
14
15
A similar case happens with the various variants of a commit job when an
16
intermediate file is short (- for unallocated):
17
18
base.qcow2: A-A-AAAA
19
mid.qcow2: BB-B
20
top.qcow2: C--C--C-
21
22
After commit top.qcow2 to mid.qcow2, the following happens:
23
24
mid.qcow2: CB-C00C0 (correct result)
25
mid.qcow2: CB-C--C- (before this fix)
26
27
Without the fix, blocks that previously read as zeros on top.qcow2
28
suddenly turn into A.
2
29
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
30
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
32
Message-Id: <20200424125448.63318-8-kwolf@redhat.com>
33
Reviewed-by: Max Reitz <mreitz@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
---
35
---
6
block/io.c | 13 +++----------
36
block/io.c | 25 +++++++++++++++++++++++++
7
1 file changed, 3 insertions(+), 10 deletions(-)
37
1 file changed, 25 insertions(+)
8
38
9
diff --git a/block/io.c b/block/io.c
39
diff --git a/block/io.c b/block/io.c
10
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
11
--- a/block/io.c
41
--- a/block/io.c
12
+++ b/block/io.c
42
+++ b/block/io.c
13
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
14
}
44
goto out;
15
16
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
17
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
18
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
19
{
20
- BdrvChild *child, *tmp;
21
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
22
23
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
24
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
25
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
26
bdrv_coroutine_enter(bs, data.co);
27
BDRV_POLL_WHILE(bs, !data.done);
28
-
29
- if (recursive) {
30
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
31
- bdrv_drain_invoke(child->bs, begin, true);
32
- }
33
- }
34
}
35
36
static bool bdrv_drain_recurse(BlockDriverState *bs)
37
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
38
}
45
}
39
46
40
bdrv_parent_drained_begin(bs, parent);
47
+ /*
41
- bdrv_drain_invoke(bs, true, false);
48
+ * If the image has a backing file that is large enough that it would
42
+ bdrv_drain_invoke(bs, true);
49
+ * provide data for the new area, we cannot leave it unallocated because
43
bdrv_drain_recurse(bs);
50
+ * then the backing file content would become visible. Instead, zero-fill
44
51
+ * the new area.
45
if (recursive) {
52
+ *
46
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
53
+ * Note that if the image has a backing file, but was opened without the
47
old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
54
+ * backing file, taking care of keeping things consistent with that backing
48
55
+ * file is the user's responsibility.
49
/* Re-enable things in child-to-parent order */
56
+ */
50
- bdrv_drain_invoke(bs, false, false);
57
+ if (new_bytes && bs->backing) {
51
+ bdrv_drain_invoke(bs, false);
58
+ int64_t backing_len;
52
bdrv_parent_drained_end(bs, parent);
59
+
53
if (old_quiesce_counter == 1) {
60
+ backing_len = bdrv_getlength(backing_bs(bs));
54
aio_enable_external(bdrv_get_aio_context(bs));
61
+ if (backing_len < 0) {
62
+ ret = backing_len;
63
+ error_setg_errno(errp, -ret, "Could not get backing file size");
64
+ goto out;
65
+ }
66
+
67
+ if (backing_len > old_size) {
68
+ flags |= BDRV_REQ_ZERO_WRITE;
69
+ }
70
+ }
71
+
72
if (drv->bdrv_co_truncate) {
73
if (flags & ~bs->supported_truncate_flags) {
74
error_setg(errp, "Block driver does not support requested flags");
55
--
75
--
56
2.13.6
76
2.25.3
57
77
58
78
diff view generated by jsdifflib
1
Commit 91af091f923 added an additional aio_poll() to BDRV_POLL_WHILE()
1
We want to keep TEST_IMG for the full path of the main test image, but
2
in order to make sure that all pending BHs are executed on drain. This
2
filter_testfiles() must be called for other test images before replacing
3
was the wrong place to make the fix, as it is useless overhead for all
3
other things like the image format because the test directory path could
4
other users of the macro and unnecessarily complicates the mechanism.
4
contain the format as a substring.
5
5
6
This patch effectively reverts said commit (the context has changed a
6
Insert a filter_testfiles() call between both.
7
bit and the code has moved to AIO_WAIT_WHILE()) and instead polls in the
8
loop condition for drain.
9
10
The effect is probably hard to measure in any real-world use case
11
because actual I/O will dominate, but if I run only the initialisation
12
part of 'qemu-img convert' where it calls bdrv_block_status() for the
13
whole image to find out how much data there is copy, this phase actually
14
needs only roughly half the time after this patch.
15
7
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Message-Id: <20200424125448.63318-9-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
13
---
19
include/block/aio-wait.h | 22 ++++++++--------------
14
tests/qemu-iotests/iotests.py | 5 +++--
20
block/io.c | 11 ++++++++++-
15
1 file changed, 3 insertions(+), 2 deletions(-)
21
2 files changed, 18 insertions(+), 15 deletions(-)
22
16
23
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
17
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
24
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
25
--- a/include/block/aio-wait.h
19
--- a/tests/qemu-iotests/iotests.py
26
+++ b/include/block/aio-wait.h
20
+++ b/tests/qemu-iotests/iotests.py
27
@@ -XXX,XX +XXX,XX @@ typedef struct {
21
@@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename):
28
*/
22
for line in output.split('\n'):
29
#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
23
if 'disk size' in line or 'actual-size' in line:
30
bool waited_ = false; \
24
continue
31
- bool busy_ = true; \
25
- line = line.replace(filename, 'TEST_IMG') \
32
AioWait *wait_ = (wait); \
26
- .replace(imgfmt, 'IMGFMT')
33
AioContext *ctx_ = (ctx); \
27
+ line = line.replace(filename, 'TEST_IMG')
34
if (in_aio_context_home_thread(ctx_)) { \
28
+ line = filter_testfiles(line)
35
- while ((cond) || busy_) { \
29
+ line = line.replace(imgfmt, 'IMGFMT')
36
- busy_ = aio_poll(ctx_, (cond)); \
30
line = re.sub('iters: [0-9]+', 'iters: XXX', line)
37
- waited_ |= !!(cond) | busy_; \
31
line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line)
38
+ while ((cond)) { \
32
line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line)
39
+ aio_poll(ctx_, true); \
40
+ waited_ = true; \
41
} \
42
} else { \
43
assert(qemu_get_current_aio_context() == \
44
qemu_get_aio_context()); \
45
/* Increment wait_->num_waiters before evaluating cond. */ \
46
atomic_inc(&wait_->num_waiters); \
47
- while (busy_) { \
48
- if ((cond)) { \
49
- waited_ = busy_ = true; \
50
- aio_context_release(ctx_); \
51
- aio_poll(qemu_get_aio_context(), true); \
52
- aio_context_acquire(ctx_); \
53
- } else { \
54
- busy_ = aio_poll(ctx_, false); \
55
- waited_ |= busy_; \
56
- } \
57
+ while ((cond)) { \
58
+ aio_context_release(ctx_); \
59
+ aio_poll(qemu_get_aio_context(), true); \
60
+ aio_context_acquire(ctx_); \
61
+ waited_ = true; \
62
} \
63
atomic_dec(&wait_->num_waiters); \
64
} \
65
diff --git a/block/io.c b/block/io.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/block/io.c
68
+++ b/block/io.c
69
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
70
BDRV_POLL_WHILE(bs, !data.done);
71
}
72
73
+/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
74
+static bool bdrv_drain_poll(BlockDriverState *bs)
75
+{
76
+ /* Execute pending BHs first and check everything else only after the BHs
77
+ * have executed. */
78
+ while (aio_poll(bs->aio_context, false));
79
+ return atomic_read(&bs->in_flight);
80
+}
81
+
82
static bool bdrv_drain_recurse(BlockDriverState *bs)
83
{
84
BdrvChild *child, *tmp;
85
bool waited;
86
87
/* Wait for drained requests to finish */
88
- waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
89
+ waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs));
90
91
QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
92
BlockDriverState *bs = child->bs;
93
--
33
--
94
2.13.6
34
2.25.3
95
35
96
36
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Message-Id: <20200424125448.63318-10-kwolf@redhat.com>
3
Reviewed-by: Max Reitz <mreitz@redhat.com>
4
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/qemu-iotests/274 | 155 +++++++++++++++++++++
8
tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++
9
tests/qemu-iotests/group | 1 +
10
3 files changed, 424 insertions(+)
11
create mode 100755 tests/qemu-iotests/274
12
create mode 100644 tests/qemu-iotests/274.out
2
13
3
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
4
Reviewed-by: Fam Zheng <famz@redhat.com>
5
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
Message-id: 20180613181823.13618-15-mreitz@redhat.com
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
---
9
tests/qemu-iotests/151 | 120 +++++++++++++++++++++++++++++++++++++++++++++
10
tests/qemu-iotests/151.out | 5 ++
11
tests/qemu-iotests/group | 1 +
12
3 files changed, 126 insertions(+)
13
create mode 100755 tests/qemu-iotests/151
14
create mode 100644 tests/qemu-iotests/151.out
15
16
diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
17
new file mode 100755
15
new file mode 100755
18
index XXXXXXX..XXXXXXX
16
index XXXXXXX..XXXXXXX
19
--- /dev/null
17
--- /dev/null
20
+++ b/tests/qemu-iotests/151
18
+++ b/tests/qemu-iotests/274
21
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
22
+#!/usr/bin/env python
20
+#!/usr/bin/env python3
23
+#
21
+#
24
+# Tests for active mirroring
22
+# Copyright (C) 2019 Red Hat, Inc.
25
+#
26
+# Copyright (C) 2018 Red Hat, Inc.
27
+#
23
+#
28
+# This program is free software; you can redistribute it and/or modify
24
+# This program is free software; you can redistribute it and/or modify
29
+# it under the terms of the GNU General Public License as published by
25
+# it under the terms of the GNU General Public License as published by
30
+# the Free Software Foundation; either version 2 of the License, or
26
+# the Free Software Foundation; either version 2 of the License, or
31
+# (at your option) any later version.
27
+# (at your option) any later version.
...
...
36
+# GNU General Public License for more details.
32
+# GNU General Public License for more details.
37
+#
33
+#
38
+# You should have received a copy of the GNU General Public License
34
+# You should have received a copy of the GNU General Public License
39
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
35
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
40
+#
36
+#
41
+
37
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
42
+import os
38
+#
39
+# Some tests for short backing files and short overlays
40
+
43
+import iotests
41
+import iotests
44
+from iotests import qemu_img
42
+
45
+
43
+iotests.verify_image_format(supported_fmts=['qcow2'])
46
+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
44
+iotests.verify_platform(['linux'])
47
+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
45
+
48
+
46
+size_short = 1 * 1024 * 1024
49
+class TestActiveMirror(iotests.QMPTestCase):
47
+size_long = 2 * 1024 * 1024
50
+ image_len = 128 * 1024 * 1024 # MB
48
+size_diff = size_long - size_short
51
+ potential_writes_in_flight = True
49
+
52
+
50
+def create_chain() -> None:
53
+ def setUp(self):
51
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base,
54
+ qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
52
+ str(size_long))
55
+ qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
53
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid,
56
+
54
+ str(size_short))
57
+ blk_source = {'id': 'source',
55
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top,
58
+ 'if': 'none',
56
+ str(size_long))
59
+ 'node-name': 'source-node',
57
+
60
+ 'driver': iotests.imgfmt,
58
+ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base)
61
+ 'file': {'driver': 'file',
59
+
62
+ 'filename': source_img}}
60
+def create_vm() -> iotests.VM:
63
+
61
+ vm = iotests.VM()
64
+ blk_target = {'node-name': 'target-node',
62
+ vm.add_blockdev('file,filename=%s,node-name=base-file' % base)
65
+ 'driver': iotests.imgfmt,
63
+ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt)
66
+ 'file': {'driver': 'file',
64
+ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid)
67
+ 'filename': target_img}}
65
+ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base'
68
+
66
+ % iotests.imgfmt)
69
+ self.vm = iotests.VM()
67
+ vm.add_drive(top, 'backing=mid,node-name=top')
70
+ self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source))
68
+ return vm
71
+ self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target))
69
+
72
+ self.vm.add_device('virtio-blk,drive=source')
70
+with iotests.FilePath('base') as base, \
73
+ self.vm.launch()
71
+ iotests.FilePath('mid') as mid, \
74
+
72
+ iotests.FilePath('top') as top:
75
+ def tearDown(self):
73
+
76
+ self.vm.shutdown()
74
+ iotests.log('== Commit tests ==')
77
+
75
+
78
+ if not self.potential_writes_in_flight:
76
+ create_chain()
79
+ self.assertTrue(iotests.compare_images(source_img, target_img),
77
+
80
+ 'mirror target does not match source')
78
+ iotests.log('=== Check visible data ===')
81
+
79
+
82
+ os.remove(source_img)
80
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top)
83
+ os.remove(target_img)
81
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top)
84
+
82
+
85
+ def doActiveIO(self, sync_source_and_target):
83
+ iotests.log('=== Checking allocation status ===')
86
+ # Fill the source image
84
+
87
+ self.vm.hmp_qemu_io('source',
85
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
88
+ 'write -P 1 0 %i' % self.image_len);
86
+ '-c', 'alloc %d %d' % (size_short, size_diff),
89
+
87
+ base)
90
+ # Start some background requests
88
+
91
+ for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
89
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
92
+ self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset)
90
+ '-c', 'alloc %d %d' % (size_short, size_diff),
93
+ for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
91
+ mid)
94
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
92
+
95
+
93
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
96
+ # Start the block job
94
+ '-c', 'alloc %d %d' % (size_short, size_diff),
97
+ result = self.vm.qmp('blockdev-mirror',
95
+ top)
98
+ job_id='mirror',
96
+
99
+ filter_node_name='mirror-node',
97
+ iotests.log('=== Checking map ===')
100
+ device='source-node',
98
+
101
+ target='target-node',
99
+ iotests.qemu_img_log('map', '--output=json', base)
102
+ sync='full',
100
+ iotests.qemu_img_log('map', '--output=human', base)
103
+ copy_mode='write-blocking')
101
+ iotests.qemu_img_log('map', '--output=json', mid)
104
+ self.assert_qmp(result, 'return', {})
102
+ iotests.qemu_img_log('map', '--output=human', mid)
105
+
103
+ iotests.qemu_img_log('map', '--output=json', top)
106
+ # Start some more requests
104
+ iotests.qemu_img_log('map', '--output=human', top)
107
+ for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
105
+
108
+ self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
106
+ iotests.log('=== Testing qemu-img commit (top -> mid) ===')
109
+ for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
107
+
110
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
108
+ iotests.qemu_img_log('commit', top)
111
+
109
+ iotests.img_info_log(mid)
112
+ # Wait for the READY event
110
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
113
+ self.wait_ready(drive='mirror')
111
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
114
+
112
+
115
+ # Now start some final requests; all of these (which land on
113
+ iotests.log('=== Testing HMP commit (top -> mid) ===')
116
+ # the source) should be settled using the active mechanism.
114
+
117
+ # The mirror code itself asserts that the source BDS's dirty
115
+ create_chain()
118
+ # bitmap will stay clean between READY and COMPLETED.
116
+ with create_vm() as vm:
119
+ for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
117
+ vm.launch()
120
+ self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
118
+ vm.qmp_log('human-monitor-command', command_line='commit drive0')
121
+ for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
119
+
122
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
120
+ iotests.img_info_log(mid)
123
+
121
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
124
+ if sync_source_and_target:
122
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
125
+ # If source and target should be in sync after the mirror,
123
+
126
+ # we have to flush before completion
124
+ iotests.log('=== Testing QMP active commit (top -> mid) ===')
127
+ self.vm.hmp_qemu_io('source', 'aio_flush')
125
+
128
+ self.potential_writes_in_flight = False
126
+ create_chain()
129
+
127
+ with create_vm() as vm:
130
+ self.complete_and_wait(drive='mirror', wait_ready=False)
128
+ vm.launch()
131
+
129
+ vm.qmp_log('block-commit', device='top', base_node='mid',
132
+ def testActiveIO(self):
130
+ job_id='job0', auto_dismiss=False)
133
+ self.doActiveIO(False)
131
+ vm.run_job('job0', wait=5)
134
+
132
+
135
+ def testActiveIOFlushed(self):
133
+ iotests.img_info_log(mid)
136
+ self.doActiveIO(True)
134
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
137
+
135
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
138
+
136
+
139
+
137
+
140
+if __name__ == '__main__':
138
+ iotests.log('== Resize tests ==')
141
+ iotests.main(supported_fmts=['qcow2', 'raw'])
139
+
142
diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out
140
+ # Use different sizes for different allocation modes:
141
+ #
142
+ # We want to have at least one test where 32 bit truncation in the size of
143
+ # the overlapping area becomes visible. This is covered by the
144
+ # prealloc='off' case (1G to 6G is an overlap of 5G).
145
+ #
146
+ # However, we can only do this for modes that don't preallocate data
147
+ # because otherwise we might run out of space on the test host.
148
+ #
149
+ # We also want to test some unaligned combinations.
150
+ for (prealloc, base_size, top_size_old, top_size_new, off) in [
151
+ ('off', '6G', '1G', '8G', '5G'),
152
+ ('metadata', '32G', '30G', '33G', '31G'),
153
+ ('falloc', '10M', '5M', '15M', '9M'),
154
+ ('full', '16M', '8M', '12M', '11M'),
155
+ ('off', '384k', '253k', '512k', '253k'),
156
+ ('off', '400k', '256k', '512k', '336k'),
157
+ ('off', '512k', '256k', '500k', '436k')]:
158
+
159
+ iotests.log('=== preallocation=%s ===' % prealloc)
160
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size)
161
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top,
162
+ top_size_old)
163
+ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base)
164
+
165
+ # After this, top_size_old to base_size should be allocated/zeroed.
166
+ #
167
+ # In theory, leaving base_size to top_size_new unallocated would be
168
+ # correct, but in practice, if we zero out anything, we zero out
169
+ # everything up to top_size_new.
170
+ iotests.qemu_img_log('resize', '-f', iotests.imgfmt,
171
+ '--preallocation', prealloc, top, top_size_new)
172
+ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top)
173
+ iotests.qemu_io_log('-c', 'map', top)
174
+ iotests.qemu_img_log('map', '--output=json', top)
175
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
143
new file mode 100644
176
new file mode 100644
144
index XXXXXXX..XXXXXXX
177
index XXXXXXX..XXXXXXX
145
--- /dev/null
178
--- /dev/null
146
+++ b/tests/qemu-iotests/151.out
179
+++ b/tests/qemu-iotests/274.out
147
@@ -XXX,XX +XXX,XX @@
180
@@ -XXX,XX +XXX,XX @@
148
+..
181
+== Commit tests ==
149
+----------------------------------------------------------------------
182
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
150
+Ran 2 tests
183
+
151
+
184
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
152
+OK
185
+
186
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
187
+
188
+wrote 2097152/2097152 bytes at offset 0
189
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
190
+
191
+=== Check visible data ===
192
+read 1048576/1048576 bytes at offset 0
193
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
194
+
195
+read 1048576/1048576 bytes at offset 1048576
196
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
197
+
198
+=== Checking allocation status ===
199
+1048576/1048576 bytes allocated at offset 0 bytes
200
+1048576/1048576 bytes allocated at offset 1 MiB
201
+
202
+0/1048576 bytes allocated at offset 0 bytes
203
+0/0 bytes allocated at offset 1 MiB
204
+
205
+0/1048576 bytes allocated at offset 0 bytes
206
+0/1048576 bytes allocated at offset 1 MiB
207
+
208
+=== Checking map ===
209
+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}]
210
+
211
+Offset Length Mapped to File
212
+0 0x200000 0x50000 TEST_DIR/PID-base
213
+
214
+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}]
215
+
216
+Offset Length Mapped to File
217
+0 0x100000 0x50000 TEST_DIR/PID-base
218
+
219
+[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680},
220
+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}]
221
+
222
+Offset Length Mapped to File
223
+0 0x100000 0x50000 TEST_DIR/PID-base
224
+
225
+=== Testing qemu-img commit (top -> mid) ===
226
+Image committed.
227
+
228
+image: TEST_IMG
229
+file format: IMGFMT
230
+virtual size: 2 MiB (2097152 bytes)
231
+cluster_size: 65536
232
+backing file: TEST_DIR/PID-base
233
+Format specific information:
234
+ compat: 1.1
235
+ lazy refcounts: false
236
+ refcount bits: 16
237
+ corrupt: false
238
+
239
+read 1048576/1048576 bytes at offset 0
240
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
241
+
242
+read 1048576/1048576 bytes at offset 1048576
243
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
244
+
245
+=== Testing HMP commit (top -> mid) ===
246
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
247
+
248
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
249
+
250
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
251
+
252
+wrote 2097152/2097152 bytes at offset 0
253
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
254
+
255
+{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}}
256
+{"return": ""}
257
+image: TEST_IMG
258
+file format: IMGFMT
259
+virtual size: 2 MiB (2097152 bytes)
260
+cluster_size: 65536
261
+backing file: TEST_DIR/PID-base
262
+Format specific information:
263
+ compat: 1.1
264
+ lazy refcounts: false
265
+ refcount bits: 16
266
+ corrupt: false
267
+
268
+read 1048576/1048576 bytes at offset 0
269
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
270
+
271
+read 1048576/1048576 bytes at offset 1048576
272
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
273
+
274
+=== Testing QMP active commit (top -> mid) ===
275
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
276
+
277
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
278
+
279
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
280
+
281
+wrote 2097152/2097152 bytes at offset 0
282
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
283
+
284
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}}
285
+{"return": {}}
286
+{"execute": "job-complete", "arguments": {"id": "job0"}}
287
+{"return": {}}
288
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
289
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
290
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
291
+{"return": {}}
292
+image: TEST_IMG
293
+file format: IMGFMT
294
+virtual size: 2 MiB (2097152 bytes)
295
+cluster_size: 65536
296
+backing file: TEST_DIR/PID-base
297
+Format specific information:
298
+ compat: 1.1
299
+ lazy refcounts: false
300
+ refcount bits: 16
301
+ corrupt: false
302
+
303
+read 1048576/1048576 bytes at offset 0
304
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
305
+
306
+read 1048576/1048576 bytes at offset 1048576
307
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
308
+
309
+== Resize tests ==
310
+=== preallocation=off ===
311
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16
312
+
313
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
314
+
315
+wrote 65536/65536 bytes at offset 5368709120
316
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
317
+
318
+Image resized.
319
+
320
+read 65536/65536 bytes at offset 5368709120
321
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
322
+
323
+1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
324
+7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000)
325
+
326
+[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false},
327
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}]
328
+
329
+=== preallocation=metadata ===
330
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16
331
+
332
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
333
+
334
+wrote 65536/65536 bytes at offset 33285996544
335
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
336
+
337
+Image resized.
338
+
339
+read 65536/65536 bytes at offset 33285996544
340
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
341
+
342
+30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
343
+3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000)
344
+
345
+[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false},
346
+{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680},
347
+{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128},
348
+{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576},
349
+{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024},
350
+{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008},
351
+{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}]
352
+
353
+=== preallocation=falloc ===
354
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16
355
+
356
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
357
+
358
+wrote 65536/65536 bytes at offset 9437184
359
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
360
+
361
+Image resized.
362
+
363
+read 65536/65536 bytes at offset 9437184
364
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
365
+
366
+5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
367
+10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
368
+
369
+[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
370
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
371
+
372
+=== preallocation=full ===
373
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
374
+
375
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
376
+
377
+wrote 65536/65536 bytes at offset 11534336
378
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
379
+
380
+Image resized.
381
+
382
+read 65536/65536 bytes at offset 11534336
383
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
384
+
385
+8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
386
+4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
387
+
388
+[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
389
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
390
+
391
+=== preallocation=off ===
392
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
393
+
394
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
395
+
396
+wrote 65536/65536 bytes at offset 259072
397
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
398
+
399
+Image resized.
400
+
401
+read 65536/65536 bytes at offset 259072
402
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
403
+
404
+192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
405
+320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000)
406
+
407
+[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false},
408
+{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680},
409
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
410
+
411
+=== preallocation=off ===
412
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16
413
+
414
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
415
+
416
+wrote 65536/65536 bytes at offset 344064
417
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
418
+
419
+Image resized.
420
+
421
+read 65536/65536 bytes at offset 344064
422
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
423
+
424
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
425
+256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000)
426
+
427
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
428
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
429
+
430
+=== preallocation=off ===
431
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16
432
+
433
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
434
+
435
+wrote 65536/65536 bytes at offset 446464
436
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
437
+
438
+Image resized.
439
+
440
+read 65536/65536 bytes at offset 446464
441
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
442
+
443
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
444
+244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000)
445
+
446
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
447
+{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}]
448
+
153
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
449
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
154
index XXXXXXX..XXXXXXX 100644
450
index XXXXXXX..XXXXXXX 100644
155
--- a/tests/qemu-iotests/group
451
--- a/tests/qemu-iotests/group
156
+++ b/tests/qemu-iotests/group
452
+++ b/tests/qemu-iotests/group
157
@@ -XXX,XX +XXX,XX @@
453
@@ -XXX,XX +XXX,XX @@
158
148 rw auto quick
454
270 rw backing quick
159
149 rw auto sudo
455
272 rw
160
150 rw auto quick
456
273 backing quick
161
+151 rw auto
457
+274 rw backing
162
152 rw auto quick
458
277 rw quick
163
153 rw auto quick
459
279 rw backing quick
164
154 rw auto backing quick
460
280 rw migration quick
165
--
461
--
166
2.13.6
462
2.25.3
167
463
168
464
diff view generated by jsdifflib
1
Since we use bdrv_do_drained_begin/end() for bdrv_drain_all_begin/end(),
1
The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the
2
coroutine context is automatically left with a BH, preventing the
2
image is possibly preallocated and then the zero flag is added to all
3
deadlocks that made bdrv_drain_all*() unsafe in coroutine context. Now
3
clusters. This means that a copy-on-write operation may be needed when
4
that we even removed the old polling code as dead code, it's obvious
4
writing to these clusters, despite having used preallocation, negating
5
that it's compatible now.
5
one of the major benefits of preallocation.
6
6
7
Enable the coroutine test cases for bdrv_drain_all().
7
Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver,
8
and if the protocol driver can ensure that the new area reads as zeros,
9
we can skip setting the zero flag in the qcow2 layer.
10
11
Unfortunately, the same approach doesn't work for metadata
12
preallocation, so we'll still set the zero flag there.
8
13
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Message-Id: <20200424142701.67053-1-kwolf@redhat.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
19
---
12
tests/test-bdrv-drain.c | 16 ++++++++++++++--
20
block/qcow2.c | 22 +++++++++++++++++++---
13
1 file changed, 14 insertions(+), 2 deletions(-)
21
tests/qemu-iotests/274.out | 4 ++--
22
2 files changed, 21 insertions(+), 5 deletions(-)
14
23
15
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
24
diff --git a/block/qcow2.c b/block/qcow2.c
16
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/test-bdrv-drain.c
26
--- a/block/qcow2.c
18
+++ b/tests/test-bdrv-drain.c
27
+++ b/block/qcow2.c
19
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void)
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
20
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
29
/* Allocate the data area */
21
}
30
new_file_size = allocation_start +
22
31
nb_new_data_clusters * s->cluster_size;
23
+static void test_drv_cb_co_drain_all(void)
32
- /* Image file grows, so @exact does not matter */
24
+{
33
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
25
+ call_in_coroutine(test_drv_cb_drain_all);
34
- errp);
26
+}
35
+ /*
27
+
36
+ * Image file grows, so @exact does not matter.
28
static void test_drv_cb_co_drain(void)
37
+ *
29
{
38
+ * If we need to zero out the new area, try first whether the protocol
30
call_in_coroutine(test_drv_cb_drain);
39
+ * driver can already take care of this.
31
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void)
40
+ */
32
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
41
+ if (flags & BDRV_REQ_ZERO_WRITE) {
33
}
42
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
34
43
+ BDRV_REQ_ZERO_WRITE, NULL);
35
+static void test_quiesce_co_drain_all(void)
44
+ if (ret >= 0) {
36
+{
45
+ flags &= ~BDRV_REQ_ZERO_WRITE;
37
+ call_in_coroutine(test_quiesce_drain_all);
46
+ }
38
+}
47
+ } else {
39
+
48
+ ret = -1;
40
static void test_quiesce_co_drain(void)
49
+ }
41
{
50
+ if (ret < 0) {
42
call_in_coroutine(test_quiesce_drain);
51
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
43
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
52
+ errp);
44
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
53
+ }
45
test_drv_cb_drain_subtree);
54
if (ret < 0) {
46
55
error_prepend(errp, "Failed to resize underlying file: ");
47
- // XXX bdrv_drain_all() doesn't work in coroutine context
56
qcow2_free_clusters(bs, allocation_start,
48
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
57
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
49
+ test_drv_cb_co_drain_all);
58
index XXXXXXX..XXXXXXX 100644
50
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
59
--- a/tests/qemu-iotests/274.out
51
g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
60
+++ b/tests/qemu-iotests/274.out
52
test_drv_cb_co_drain_subtree);
61
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184
53
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
62
10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
54
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
63
55
test_quiesce_drain_subtree);
64
[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
56
65
-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
57
- // XXX bdrv_drain_all() doesn't work in coroutine context
66
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}]
58
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
67
59
+ test_quiesce_co_drain_all);
68
=== preallocation=full ===
60
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
69
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
61
g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
70
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336
62
test_quiesce_co_drain_subtree);
71
4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
72
73
[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
74
-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
75
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}]
76
77
=== preallocation=off ===
78
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
63
--
79
--
64
2.13.6
80
2.25.3
65
81
66
82
diff view generated by jsdifflib
1
Anything can happen inside BDRV_POLL_WHILE(), including graph
1
From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
2
changes that may interfere with its callers (e.g. child list iteration
3
in recursive callers of bdrv_do_drained_begin).
4
2
5
Switch to a single BDRV_POLL_WHILE() call for the whole subtree at the
3
This patch introduces support for PMR that has been defined as part of NVMe 1.4
6
end of bdrv_do_drained_begin() to avoid such effects. The recursion
4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
7
happens now inside the loop condition. As the graph can only change
5
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
8
between bdrv_drain_poll() calls, but not inside of it, doing the
6
device. Guest OS can perform mmio read and writes to the PMR region that will stay
9
recursion here is safe.
7
persistent across system reboot.
10
8
9
Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
13
Reviewed-by: Keith Busch <kbusch@kernel.org>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
15
---
13
include/block/block.h | 9 +++++---
16
hw/block/nvme.h | 2 +
14
block.c | 2 +-
17
include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++
15
block/io.c | 63 ++++++++++++++++++++++++++++++++++++---------------
18
hw/block/nvme.c | 109 ++++++++++++++++++++++++++
16
3 files changed, 52 insertions(+), 22 deletions(-)
19
hw/block/Makefile.objs | 2 +-
20
hw/block/trace-events | 4 +
21
5 files changed, 288 insertions(+), 1 deletion(-)
17
22
18
diff --git a/include/block/block.h b/include/block/block.h
23
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
19
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/block.h
25
--- a/hw/block/nvme.h
21
+++ b/include/block/block.h
26
+++ b/hw/block/nvme.h
22
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
27
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl {
23
/**
28
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
24
* bdrv_drain_poll:
29
30
char *serial;
31
+ HostMemoryBackend *pmrdev;
32
+
33
NvmeNamespace *namespaces;
34
NvmeSQueue **sq;
35
NvmeCQueue **cq;
36
diff --git a/include/block/nvme.h b/include/block/nvme.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/include/block/nvme.h
39
+++ b/include/block/nvme.h
40
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar {
41
uint64_t acq;
42
uint32_t cmbloc;
43
uint32_t cmbsz;
44
+ uint8_t padding[3520]; /* not used by QEMU */
45
+ uint32_t pmrcap;
46
+ uint32_t pmrctl;
47
+ uint32_t pmrsts;
48
+ uint32_t pmrebs;
49
+ uint32_t pmrswtp;
50
+ uint32_t pmrmsc;
51
} NvmeBar;
52
53
enum NvmeCapShift {
54
@@ -XXX,XX +XXX,XX @@ enum NvmeCapShift {
55
CAP_CSS_SHIFT = 37,
56
CAP_MPSMIN_SHIFT = 48,
57
CAP_MPSMAX_SHIFT = 52,
58
+ CAP_PMR_SHIFT = 56,
59
};
60
61
enum NvmeCapMask {
62
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
63
CAP_CSS_MASK = 0xff,
64
CAP_MPSMIN_MASK = 0xf,
65
CAP_MPSMAX_MASK = 0xf,
66
+ CAP_PMR_MASK = 0x1,
67
};
68
69
#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK)
70
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
71
<< CAP_MPSMIN_SHIFT)
72
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
73
<< CAP_MPSMAX_SHIFT)
74
+#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
75
+ << CAP_PMR_SHIFT)
76
77
enum NvmeCcShift {
78
CC_EN_SHIFT = 0,
79
@@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask {
80
#define NVME_CMBSZ_GETSIZE(cmbsz) \
81
(NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
82
83
+enum NvmePmrcapShift {
84
+ PMRCAP_RDS_SHIFT = 3,
85
+ PMRCAP_WDS_SHIFT = 4,
86
+ PMRCAP_BIR_SHIFT = 5,
87
+ PMRCAP_PMRTU_SHIFT = 8,
88
+ PMRCAP_PMRWBM_SHIFT = 10,
89
+ PMRCAP_PMRTO_SHIFT = 16,
90
+ PMRCAP_CMSS_SHIFT = 24,
91
+};
92
+
93
+enum NvmePmrcapMask {
94
+ PMRCAP_RDS_MASK = 0x1,
95
+ PMRCAP_WDS_MASK = 0x1,
96
+ PMRCAP_BIR_MASK = 0x7,
97
+ PMRCAP_PMRTU_MASK = 0x3,
98
+ PMRCAP_PMRWBM_MASK = 0xf,
99
+ PMRCAP_PMRTO_MASK = 0xff,
100
+ PMRCAP_CMSS_MASK = 0x1,
101
+};
102
+
103
+#define NVME_PMRCAP_RDS(pmrcap) \
104
+ ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK)
105
+#define NVME_PMRCAP_WDS(pmrcap) \
106
+ ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK)
107
+#define NVME_PMRCAP_BIR(pmrcap) \
108
+ ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK)
109
+#define NVME_PMRCAP_PMRTU(pmrcap) \
110
+ ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK)
111
+#define NVME_PMRCAP_PMRWBM(pmrcap) \
112
+ ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK)
113
+#define NVME_PMRCAP_PMRTO(pmrcap) \
114
+ ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK)
115
+#define NVME_PMRCAP_CMSS(pmrcap) \
116
+ ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK)
117
+
118
+#define NVME_PMRCAP_SET_RDS(pmrcap, val) \
119
+ (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT)
120
+#define NVME_PMRCAP_SET_WDS(pmrcap, val) \
121
+ (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT)
122
+#define NVME_PMRCAP_SET_BIR(pmrcap, val) \
123
+ (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT)
124
+#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \
125
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT)
126
+#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \
127
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT)
128
+#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \
129
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT)
130
+#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \
131
+ (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT)
132
+
133
+enum NvmePmrctlShift {
134
+ PMRCTL_EN_SHIFT = 0,
135
+};
136
+
137
+enum NvmePmrctlMask {
138
+ PMRCTL_EN_MASK = 0x1,
139
+};
140
+
141
+#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK)
142
+
143
+#define NVME_PMRCTL_SET_EN(pmrctl, val) \
144
+ (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT)
145
+
146
+enum NvmePmrstsShift {
147
+ PMRSTS_ERR_SHIFT = 0,
148
+ PMRSTS_NRDY_SHIFT = 8,
149
+ PMRSTS_HSTS_SHIFT = 9,
150
+ PMRSTS_CBAI_SHIFT = 12,
151
+};
152
+
153
+enum NvmePmrstsMask {
154
+ PMRSTS_ERR_MASK = 0xff,
155
+ PMRSTS_NRDY_MASK = 0x1,
156
+ PMRSTS_HSTS_MASK = 0x7,
157
+ PMRSTS_CBAI_MASK = 0x1,
158
+};
159
+
160
+#define NVME_PMRSTS_ERR(pmrsts) \
161
+ ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK)
162
+#define NVME_PMRSTS_NRDY(pmrsts) \
163
+ ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK)
164
+#define NVME_PMRSTS_HSTS(pmrsts) \
165
+ ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK)
166
+#define NVME_PMRSTS_CBAI(pmrsts) \
167
+ ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK)
168
+
169
+#define NVME_PMRSTS_SET_ERR(pmrsts, val) \
170
+ (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT)
171
+#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \
172
+ (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT)
173
+#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \
174
+ (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT)
175
+#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \
176
+ (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT)
177
+
178
+enum NvmePmrebsShift {
179
+ PMREBS_PMRSZU_SHIFT = 0,
180
+ PMREBS_RBB_SHIFT = 4,
181
+ PMREBS_PMRWBZ_SHIFT = 8,
182
+};
183
+
184
+enum NvmePmrebsMask {
185
+ PMREBS_PMRSZU_MASK = 0xf,
186
+ PMREBS_RBB_MASK = 0x1,
187
+ PMREBS_PMRWBZ_MASK = 0xffffff,
188
+};
189
+
190
+#define NVME_PMREBS_PMRSZU(pmrebs) \
191
+ ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK)
192
+#define NVME_PMREBS_RBB(pmrebs) \
193
+ ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK)
194
+#define NVME_PMREBS_PMRWBZ(pmrebs) \
195
+ ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK)
196
+
197
+#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \
198
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT)
199
+#define NVME_PMREBS_SET_RBB(pmrebs, val) \
200
+ (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT)
201
+#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \
202
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT)
203
+
204
+enum NvmePmrswtpShift {
205
+ PMRSWTP_PMRSWTU_SHIFT = 0,
206
+ PMRSWTP_PMRSWTV_SHIFT = 8,
207
+};
208
+
209
+enum NvmePmrswtpMask {
210
+ PMRSWTP_PMRSWTU_MASK = 0xf,
211
+ PMRSWTP_PMRSWTV_MASK = 0xffffff,
212
+};
213
+
214
+#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \
215
+ ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK)
216
+#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \
217
+ ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK)
218
+
219
+#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \
220
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT)
221
+#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \
222
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
223
+
224
+enum NvmePmrmscShift {
225
+ PMRMSC_CMSE_SHIFT = 1,
226
+ PMRMSC_CBA_SHIFT = 12,
227
+};
228
+
229
+enum NvmePmrmscMask {
230
+ PMRMSC_CMSE_MASK = 0x1,
231
+ PMRMSC_CBA_MASK = 0xfffffffffffff,
232
+};
233
+
234
+#define NVME_PMRMSC_CMSE(pmrmsc) \
235
+ ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK)
236
+#define NVME_PMRMSC_CBA(pmrmsc) \
237
+ ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK)
238
+
239
+#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \
240
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
241
+#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \
242
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
243
+
244
typedef struct NvmeCmd {
245
uint8_t opcode;
246
uint8_t fuse;
247
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
248
index XXXXXXX..XXXXXXX 100644
249
--- a/hw/block/nvme.c
250
+++ b/hw/block/nvme.c
251
@@ -XXX,XX +XXX,XX @@
252
* -drive file=<file>,if=none,id=<drive_id>
253
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
254
* cmb_size_mb=<cmb_size_mb[optional]>, \
255
+ * [pmrdev=<mem_backend_file_id>,] \
256
* num_queues=<N[optional]>
25
*
257
*
26
- * Poll for pending requests in @bs and its parents (except for
258
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
27
- * @ignore_parent). This is part of bdrv_drained_begin.
259
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
28
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
29
+ * and if @recursive is true its children as well.
30
+ *
260
+ *
31
+ * This is part of bdrv_drained_begin.
261
+ * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
262
+ * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
263
+ * both provided.
264
+ * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
265
+ * For example:
266
+ * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
267
+ * size=<size> .... -device nvme,...,pmrdev=<mem_id>
32
*/
268
*/
33
-bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent);
269
34
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
270
#include "qemu/osdep.h"
35
+ BdrvChild *ignore_parent);
271
@@ -XXX,XX +XXX,XX @@
36
272
#include "sysemu/sysemu.h"
37
/**
273
#include "qapi/error.h"
38
* bdrv_drained_begin:
274
#include "qapi/visitor.h"
39
diff --git a/block.c b/block.c
275
+#include "sysemu/hostmem.h"
40
index XXXXXXX..XXXXXXX 100644
276
#include "sysemu/block-backend.h"
41
--- a/block.c
277
+#include "exec/ram_addr.h"
42
+++ b/block.c
278
43
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
279
#include "qemu/log.h"
44
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
280
#include "qemu/module.h"
45
{
281
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
46
BlockDriverState *bs = child->opaque;
282
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
47
- return bdrv_drain_poll(bs, NULL);
283
"invalid write to read only CMBSZ, ignored");
48
+ return bdrv_drain_poll(bs, false, NULL);
284
return;
49
}
285
+ case 0xE00: /* PMRCAP */
50
286
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
51
static void bdrv_child_cb_drained_end(BdrvChild *child)
287
+ "invalid write to PMRCAP register, ignored");
52
diff --git a/block/io.c b/block/io.c
288
+ return;
53
index XXXXXXX..XXXXXXX 100644
289
+ case 0xE04: /* TODO PMRCTL */
54
--- a/block/io.c
290
+ break;
55
+++ b/block/io.c
291
+ case 0xE08: /* PMRSTS */
56
@@ -XXX,XX +XXX,XX @@ typedef struct {
292
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
57
bool done;
293
+ "invalid write to PMRSTS register, ignored");
58
bool begin;
294
+ return;
59
bool recursive;
295
+ case 0xE0C: /* PMREBS */
60
+ bool poll;
296
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
61
BdrvChild *parent;
297
+ "invalid write to PMREBS register, ignored");
62
} BdrvCoDrainData;
298
+ return;
63
299
+ case 0xE10: /* PMRSWTP */
64
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
300
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
65
}
301
+ "invalid write to PMRSWTP register, ignored");
66
302
+ return;
67
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
303
+ case 0xE14: /* TODO PMRMSC */
68
-bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent)
304
+ break;
69
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
305
default:
70
+ BdrvChild *ignore_parent)
306
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
71
{
307
"invalid MMIO write,"
72
+ BdrvChild *child, *next;
308
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
73
+
74
if (bdrv_parent_drained_poll(bs, ignore_parent)) {
75
return true;
76
}
309
}
77
310
78
- return atomic_read(&bs->in_flight);
311
if (addr < sizeof(n->bar)) {
79
+ if (atomic_read(&bs->in_flight)) {
312
+ /*
80
+ return true;
313
+ * When PMRWBM bit 1 is set then read from
81
+ }
314
+ * from PMRSTS should ensure prior writes
82
+
315
+ * made it to persistent media
83
+ if (recursive) {
316
+ */
84
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
317
+ if (addr == 0xE08 &&
85
+ if (bdrv_drain_poll(child->bs, recursive, child)) {
318
+ (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
86
+ return true;
319
+ qemu_ram_writeback(n->pmrdev->mr.ram_block,
87
+ }
320
+ 0, n->pmrdev->size);
88
+ }
321
+ }
89
+ }
322
memcpy(&val, ptr + addr, size);
90
+
91
+ return false;
92
}
93
94
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
95
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
96
BdrvChild *ignore_parent)
97
{
98
/* Execute pending BHs first and check everything else only after the BHs
99
* have executed. */
100
while (aio_poll(bs->aio_context, false));
101
102
- return bdrv_drain_poll(bs, ignore_parent);
103
+ return bdrv_drain_poll(bs, recursive, ignore_parent);
104
}
105
106
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
107
- BdrvChild *parent);
108
+ BdrvChild *parent, bool poll);
109
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
110
BdrvChild *parent);
111
112
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
113
114
bdrv_dec_in_flight(bs);
115
if (data->begin) {
116
- bdrv_do_drained_begin(bs, data->recursive, data->parent);
117
+ bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll);
118
} else {
323
} else {
119
bdrv_do_drained_end(bs, data->recursive, data->parent);
324
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
120
}
325
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
121
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
326
error_setg(errp, "serial property not set");
122
123
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
124
bool begin, bool recursive,
125
- BdrvChild *parent)
126
+ BdrvChild *parent, bool poll)
127
{
128
BdrvCoDrainData data;
129
130
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
131
.begin = begin,
132
.recursive = recursive,
133
.parent = parent,
134
+ .poll = poll,
135
};
136
bdrv_inc_in_flight(bs);
137
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
138
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
139
}
140
141
void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
142
- BdrvChild *parent)
143
+ BdrvChild *parent, bool poll)
144
{
145
BdrvChild *child, *next;
146
147
if (qemu_in_coroutine()) {
148
- bdrv_co_yield_to_drain(bs, true, recursive, parent);
149
+ bdrv_co_yield_to_drain(bs, true, recursive, parent, poll);
150
return;
327
return;
151
}
328
}
152
329
+
153
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
330
+ if (!n->cmb_size_mb && n->pmrdev) {
154
bdrv_parent_drained_begin(bs, parent);
331
+ if (host_memory_backend_is_mapped(n->pmrdev)) {
155
bdrv_drain_invoke(bs, true);
332
+ char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
156
333
+ error_setg(errp, "can't use already busy memdev: %s", path);
157
- /* Wait for drained requests to finish */
334
+ g_free(path);
158
- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
335
+ return;
159
-
336
+ }
160
if (recursive) {
337
+
161
bs->recursive_quiesce_counter++;
338
+ if (!is_power_of_2(n->pmrdev->size)) {
162
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
339
+ error_setg(errp, "pmr backend size needs to be power of 2 in size");
163
- bdrv_do_drained_begin(child->bs, true, child);
340
+ return;
164
+ bdrv_do_drained_begin(child->bs, true, child, false);
341
+ }
165
}
342
+
343
+ host_memory_backend_set_mapped(n->pmrdev, true);
344
+ }
345
+
346
blkconf_blocksizes(&n->conf);
347
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
348
false, errp)) {
349
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
350
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
351
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
352
353
+ } else if (n->pmrdev) {
354
+ /* Controller Capabilities register */
355
+ NVME_CAP_SET_PMRS(n->bar.cap, 1);
356
+
357
+ /* PMR Capabities register */
358
+ n->bar.pmrcap = 0;
359
+ NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
360
+ NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
361
+ NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
362
+ NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
363
+ /* Turn on bit 1 support */
364
+ NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
365
+ NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
366
+ NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
367
+
368
+ /* PMR Control register */
369
+ n->bar.pmrctl = 0;
370
+ NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
371
+
372
+ /* PMR Status register */
373
+ n->bar.pmrsts = 0;
374
+ NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
375
+ NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
376
+ NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
377
+ NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
378
+
379
+ /* PMR Elasticity Buffer Size register */
380
+ n->bar.pmrebs = 0;
381
+ NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
382
+ NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
383
+ NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
384
+
385
+ /* PMR Sustained Write Throughput register */
386
+ n->bar.pmrswtp = 0;
387
+ NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
388
+ NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
389
+
390
+ /* PMR Memory Space Control register */
391
+ n->bar.pmrmsc = 0;
392
+ NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
393
+ NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
394
+
395
+ pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
396
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
397
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
166
}
398
}
167
+
399
168
+ /*
400
for (i = 0; i < n->num_namespaces; i++) {
169
+ * Wait for drained requests to finish.
401
@@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev)
170
+ *
402
if (n->cmb_size_mb) {
171
+ * Calling BDRV_POLL_WHILE() only once for the top-level node is okay: The
403
g_free(n->cmbuf);
172
+ * call is needed so things in this AioContext can make progress even
404
}
173
+ * though we don't return to the main AioContext loop - this automatically
405
+
174
+ * includes other nodes in the same AioContext and therefore all child
406
+ if (n->pmrdev) {
175
+ * nodes.
407
+ host_memory_backend_set_mapped(n->pmrdev, false);
176
+ */
177
+ if (poll) {
178
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
179
+ }
408
+ }
409
msix_uninit_exclusive_bar(pci_dev);
180
}
410
}
181
411
182
void bdrv_drained_begin(BlockDriverState *bs)
412
static Property nvme_props[] = {
183
{
413
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
184
- bdrv_do_drained_begin(bs, false, NULL);
414
+ DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
185
+ bdrv_do_drained_begin(bs, false, NULL, true);
415
+ HostMemoryBackend *),
186
}
416
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
187
417
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
188
void bdrv_subtree_drained_begin(BlockDriverState *bs)
418
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
189
{
419
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
190
- bdrv_do_drained_begin(bs, true, NULL);
420
index XXXXXXX..XXXXXXX 100644
191
+ bdrv_do_drained_begin(bs, true, NULL, true);
421
--- a/hw/block/Makefile.objs
192
}
422
+++ b/hw/block/Makefile.objs
193
423
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
194
void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
424
common-obj-$(CONFIG_XEN) += xen-block.o
195
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
425
common-obj-$(CONFIG_ECC) += ecc.o
196
int old_quiesce_counter;
426
common-obj-$(CONFIG_ONENAND) += onenand.o
197
427
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
198
if (qemu_in_coroutine()) {
428
common-obj-$(CONFIG_SWIM) += swim.o
199
- bdrv_co_yield_to_drain(bs, false, recursive, parent);
429
200
+ bdrv_co_yield_to_drain(bs, false, recursive, parent, false);
430
common-obj-$(CONFIG_SH4) += tc58128.o
201
return;
431
202
}
432
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
203
assert(bs->quiesce_counter > 0);
433
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
204
@@ -XXX,XX +XXX,XX @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
434
+obj-$(CONFIG_NVME_PCI) += nvme.o
205
int i;
435
206
436
obj-y += dataplane/
207
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
437
diff --git a/hw/block/trace-events b/hw/block/trace-events
208
- bdrv_do_drained_begin(child->bs, true, child);
438
index XXXXXXX..XXXXXXX 100644
209
+ bdrv_do_drained_begin(child->bs, true, child, true);
439
--- a/hw/block/trace-events
210
}
440
+++ b/hw/block/trace-events
211
}
441
@@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
212
442
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
213
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
443
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
214
AioContext *aio_context = bdrv_get_aio_context(bs);
444
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
215
445
+nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
216
aio_context_acquire(aio_context);
446
+nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
217
- bdrv_do_drained_begin(bs, true, NULL);
447
+nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
218
+ bdrv_do_drained_begin(bs, true, NULL, true);
448
+nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
219
aio_context_release(aio_context);
449
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
220
}
450
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
221
451
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
222
--
452
--
223
2.13.6
453
2.25.3
224
454
225
455
diff view generated by jsdifflib
1
All involved nodes are already idle, we called bdrv_do_drain_begin() on
1
The QMP handler qmp_object_add() and the implementation of --object in
2
them.
2
qemu-storage-daemon can share most of the code. Currently,
3
qemu-storage-daemon calls qmp_object_add(), but this is not correct
4
because different visitors need to be used.
3
5
4
The comment in the code suggested that this was not correct because the
6
As a first step towards a fix, make qmp_object_add() a wrapper around a
5
completion of a request on one node could spawn a new request on a
7
new function user_creatable_add_dict() that can get an additional
6
different node (which might have been drained before, so we wouldn't
8
parameter. The handling of "props" is only required for compatibility
7
drain the new request). In reality, new requests to different nodes
9
and not required for the qemu-storage-daemon command line, so it stays
8
aren't spawned out of nothing, but only in the context of a parent
10
in qmp_object_add().
9
request, and they aren't submitted to random nodes, but only to child
10
nodes. As long as we still poll for the completion of the parent request
11
(which we do), draining each root node separately is good enough.
12
13
Remove the additional polling code from bdrv_drain_all_begin() and
14
replace it with an assertion that all nodes are already idle after we
15
drained them separately.
16
11
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
13
---
20
block/io.c | 41 ++++++++++++-----------------------------
14
include/qom/object_interfaces.h | 12 ++++++++++++
21
1 file changed, 12 insertions(+), 29 deletions(-)
15
qom/object_interfaces.c | 27 +++++++++++++++++++++++++++
16
qom/qom-qmp-cmds.c | 24 +-----------------------
17
3 files changed, 40 insertions(+), 23 deletions(-)
22
18
23
diff --git a/block/io.c b/block/io.c
19
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
24
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
25
--- a/block/io.c
21
--- a/include/qom/object_interfaces.h
26
+++ b/block/io.c
22
+++ b/include/qom/object_interfaces.h
27
@@ -XXX,XX +XXX,XX @@ void bdrv_drain(BlockDriverState *bs)
23
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
28
bdrv_drained_end(bs);
24
const QDict *qdict,
25
Visitor *v, Error **errp);
26
27
+/**
28
+ * user_creatable_add_dict:
29
+ * @qdict: the object definition
30
+ * @errp: if an error occurs, a pointer to an area to store the error
31
+ *
32
+ * Create an instance of the user creatable object that is defined by
33
+ * @qdict. The object type is taken from the QDict key 'qom-type', its
34
+ * ID from the key 'id'. The remaining entries in @qdict are used to
35
+ * initialize the object properties.
36
+ */
37
+void user_creatable_add_dict(QDict *qdict, Error **errp);
38
+
39
/**
40
* user_creatable_add_opts:
41
* @opts: the object definition
42
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qom/object_interfaces.c
45
+++ b/qom/object_interfaces.c
46
@@ -XXX,XX +XXX,XX @@
47
#include "qapi/qmp/qerror.h"
48
#include "qapi/qmp/qjson.h"
49
#include "qapi/qmp/qstring.h"
50
+#include "qapi/qobject-input-visitor.h"
51
#include "qom/object_interfaces.h"
52
#include "qemu/help_option.h"
53
#include "qemu/module.h"
54
@@ -XXX,XX +XXX,XX @@ out:
55
return obj;
29
}
56
}
30
57
31
+static void bdrv_drain_assert_idle(BlockDriverState *bs)
58
+void user_creatable_add_dict(QDict *qdict, Error **errp)
32
+{
59
+{
33
+ BdrvChild *child, *next;
60
+ Visitor *v;
61
+ Object *obj;
62
+ g_autofree char *type = NULL;
63
+ g_autofree char *id = NULL;
34
+
64
+
35
+ assert(atomic_read(&bs->in_flight) == 0);
65
+ type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
36
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
66
+ if (!type) {
37
+ bdrv_drain_assert_idle(child->bs);
67
+ error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
68
+ return;
38
+ }
69
+ }
70
+ qdict_del(qdict, "qom-type");
71
+
72
+ id = g_strdup(qdict_get_try_str(qdict, "id"));
73
+ if (!id) {
74
+ error_setg(errp, QERR_MISSING_PARAMETER, "id");
75
+ return;
76
+ }
77
+ qdict_del(qdict, "id");
78
+
79
+ v = qobject_input_visitor_new(QOBJECT(qdict));
80
+ obj = user_creatable_add_type(type, id, qdict, v, errp);
81
+ visit_free(v);
82
+ object_unref(obj);
39
+}
83
+}
40
+
84
41
/*
85
Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
42
* Wait for pending requests to complete across all BlockDriverStates
43
*
44
@@ -XXX,XX +XXX,XX @@ void bdrv_drain(BlockDriverState *bs)
45
*/
46
void bdrv_drain_all_begin(void)
47
{
86
{
48
- /* Always run first iteration so any pending completion BHs run */
87
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
49
- bool waited = true;
88
index XXXXXXX..XXXXXXX 100644
50
BlockDriverState *bs;
89
--- a/qom/qom-qmp-cmds.c
51
BdrvNextIterator it;
90
+++ b/qom/qom-qmp-cmds.c
52
- GSList *aio_ctxs = NULL, *ctx;
91
@@ -XXX,XX +XXX,XX @@
53
92
#include "qapi/qapi-commands-qom.h"
54
/* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
93
#include "qapi/qmp/qdict.h"
55
* or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
94
#include "qapi/qmp/qerror.h"
56
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
95
-#include "qapi/qobject-input-visitor.h"
57
aio_context_acquire(aio_context);
96
#include "qemu/cutils.h"
58
bdrv_do_drained_begin(bs, true, NULL);
97
#include "qom/object_interfaces.h"
59
aio_context_release(aio_context);
98
#include "qom/qom-qobject.h"
99
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
100
{
101
QObject *props;
102
QDict *pdict;
103
- Visitor *v;
104
- Object *obj;
105
- g_autofree char *type = NULL;
106
- g_autofree char *id = NULL;
60
-
107
-
61
- if (!g_slist_find(aio_ctxs, aio_context)) {
108
- type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
62
- aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
109
- if (!type) {
63
- }
110
- error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
111
- return;
112
- }
113
- qdict_del(qdict, "qom-type");
114
-
115
- id = g_strdup(qdict_get_try_str(qdict, "id"));
116
- if (!id) {
117
- error_setg(errp, QERR_MISSING_PARAMETER, "id");
118
- return;
119
- }
120
- qdict_del(qdict, "id");
121
122
props = qdict_get(qdict, "props");
123
if (props) {
124
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
125
qobject_unref(pdict);
64
}
126
}
65
127
66
- /* Note that completion of an asynchronous I/O operation can trigger any
128
- v = qobject_input_visitor_new(QOBJECT(qdict));
67
- * number of other I/O operations on other devices---for example a
129
- obj = user_creatable_add_type(type, id, qdict, v, errp);
68
- * coroutine can submit an I/O request to another device in response to
130
- visit_free(v);
69
- * request completion. Therefore we must keep looping until there was no
131
- object_unref(obj);
70
- * more activity rather than simply draining each device independently.
132
+ user_creatable_add_dict(qdict, errp);
71
- */
72
- while (waited) {
73
- waited = false;
74
-
75
- for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
76
- AioContext *aio_context = ctx->data;
77
-
78
- aio_context_acquire(aio_context);
79
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
80
- if (aio_context == bdrv_get_aio_context(bs)) {
81
- waited |= bdrv_drain_recurse(bs);
82
- }
83
- }
84
- aio_context_release(aio_context);
85
- }
86
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
87
+ bdrv_drain_assert_idle(bs);
88
}
89
-
90
- g_slist_free(aio_ctxs);
91
}
133
}
92
134
93
void bdrv_drain_all_end(void)
135
void qmp_object_del(const char *id, Error **errp)
94
--
136
--
95
2.13.6
137
2.25.3
96
138
97
139
diff view generated by jsdifflib
Deleted patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
---
3
tests/test-bdrv-drain.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++
4
1 file changed, 130 insertions(+)
5
1
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tests/test-bdrv-drain.c
9
+++ b/tests/test-bdrv-drain.c
10
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_drain_subtree(void)
11
}
12
13
14
+struct detach_by_parent_data {
15
+ BlockDriverState *parent_b;
16
+ BdrvChild *child_b;
17
+ BlockDriverState *c;
18
+ BdrvChild *child_c;
19
+};
20
+
21
+static void detach_by_parent_aio_cb(void *opaque, int ret)
22
+{
23
+ struct detach_by_parent_data *data = opaque;
24
+
25
+ g_assert_cmpint(ret, ==, 0);
26
+ bdrv_unref_child(data->parent_b, data->child_b);
27
+
28
+ bdrv_ref(data->c);
29
+ data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C",
30
+ &child_file, &error_abort);
31
+}
32
+
33
+/*
34
+ * Initial graph:
35
+ *
36
+ * PA PB
37
+ * \ / \
38
+ * A B C
39
+ *
40
+ * PA has a pending write request whose callback changes the child nodes of PB:
41
+ * It removes B and adds C instead. The subtree of PB is drained, which will
42
+ * indirectly drain the write request, too.
43
+ */
44
+static void test_detach_by_parent_cb(void)
45
+{
46
+ BlockBackend *blk;
47
+ BlockDriverState *parent_a, *parent_b, *a, *b, *c;
48
+ BdrvChild *child_a, *child_b;
49
+ BlockAIOCB *acb;
50
+ struct detach_by_parent_data data;
51
+
52
+ QEMUIOVector qiov;
53
+ struct iovec iov = {
54
+ .iov_base = NULL,
55
+ .iov_len = 0,
56
+ };
57
+ qemu_iovec_init_external(&qiov, &iov, 1);
58
+
59
+ /* Create all involved nodes */
60
+ parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
61
+ &error_abort);
62
+ parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0,
63
+ &error_abort);
64
+
65
+ a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort);
66
+ b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort);
67
+ c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
68
+
69
+ /* blk is a BB for parent-a */
70
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
71
+ blk_insert_bs(blk, parent_a, &error_abort);
72
+ bdrv_unref(parent_a);
73
+
74
+ /* Set child relationships */
75
+ bdrv_ref(b);
76
+ bdrv_ref(a);
77
+ child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_file, &error_abort);
78
+ child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, &error_abort);
79
+
80
+ bdrv_ref(a);
81
+ bdrv_attach_child(parent_a, a, "PA-A", &child_file, &error_abort);
82
+
83
+ g_assert_cmpint(parent_a->refcnt, ==, 1);
84
+ g_assert_cmpint(parent_b->refcnt, ==, 1);
85
+ g_assert_cmpint(a->refcnt, ==, 3);
86
+ g_assert_cmpint(b->refcnt, ==, 2);
87
+ g_assert_cmpint(c->refcnt, ==, 1);
88
+
89
+ g_assert(QLIST_FIRST(&parent_b->children) == child_a);
90
+ g_assert(QLIST_NEXT(child_a, next) == child_b);
91
+ g_assert(QLIST_NEXT(child_b, next) == NULL);
92
+
93
+ /* Start the evil write request */
94
+ data = (struct detach_by_parent_data) {
95
+ .parent_b = parent_b,
96
+ .child_b = child_b,
97
+ .c = c,
98
+ };
99
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, &data);
100
+ g_assert(acb != NULL);
101
+
102
+ /* Drain and check the expected result */
103
+ bdrv_subtree_drained_begin(parent_b);
104
+
105
+ g_assert(data.child_c != NULL);
106
+
107
+ g_assert_cmpint(parent_a->refcnt, ==, 1);
108
+ g_assert_cmpint(parent_b->refcnt, ==, 1);
109
+ g_assert_cmpint(a->refcnt, ==, 3);
110
+ g_assert_cmpint(b->refcnt, ==, 1);
111
+ g_assert_cmpint(c->refcnt, ==, 2);
112
+
113
+ g_assert(QLIST_FIRST(&parent_b->children) == data.child_c);
114
+ g_assert(QLIST_NEXT(data.child_c, next) == child_a);
115
+ g_assert(QLIST_NEXT(child_a, next) == NULL);
116
+
117
+ g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
118
+ g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
119
+ g_assert_cmpint(a->quiesce_counter, ==, 1);
120
+ g_assert_cmpint(b->quiesce_counter, ==, 0);
121
+ g_assert_cmpint(c->quiesce_counter, ==, 1);
122
+
123
+ bdrv_subtree_drained_end(parent_b);
124
+
125
+ bdrv_unref(parent_b);
126
+ blk_unref(blk);
127
+
128
+ /* XXX Once bdrv_close() unref's children instead of just detaching them,
129
+ * this won't be necessary any more. */
130
+ bdrv_unref(a);
131
+ bdrv_unref(a);
132
+ bdrv_unref(c);
133
+
134
+ g_assert_cmpint(a->refcnt, ==, 1);
135
+ g_assert_cmpint(b->refcnt, ==, 1);
136
+ g_assert_cmpint(c->refcnt, ==, 1);
137
+ bdrv_unref(a);
138
+ bdrv_unref(b);
139
+ bdrv_unref(c);
140
+}
141
+
142
+
143
int main(int argc, char **argv)
144
{
145
int ret;
146
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
147
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
148
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
149
g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
150
+ g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
151
152
ret = g_test_run();
153
qemu_event_destroy(&done_event);
154
--
155
2.13.6
156
157
diff view generated by jsdifflib
Deleted patch
1
We cannot allow aio_poll() in bdrv_drain_invoke(begin=true) until we're
2
done with propagating the drain through the graph and are doing the
3
single final BDRV_POLL_WHILE().
4
1
5
Just schedule the coroutine with the callback and increase bs->in_flight
6
to make sure that the polling phase will wait for it.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
block/io.c | 28 +++++++++++++++++++++++-----
11
1 file changed, 23 insertions(+), 5 deletions(-)
12
13
diff --git a/block/io.c b/block/io.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/io.c
16
+++ b/block/io.c
17
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
18
19
/* Set data->done before reading bs->wakeup. */
20
atomic_mb_set(&data->done, true);
21
- bdrv_wakeup(bs);
22
+ bdrv_dec_in_flight(bs);
23
+
24
+ if (data->begin) {
25
+ g_free(data);
26
+ }
27
}
28
29
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
30
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
31
{
32
- BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
33
+ BdrvCoDrainData *data;
34
35
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
36
(!begin && !bs->drv->bdrv_co_drain_end)) {
37
return;
38
}
39
40
- data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
41
- bdrv_coroutine_enter(bs, data.co);
42
- BDRV_POLL_WHILE(bs, !data.done);
43
+ data = g_new(BdrvCoDrainData, 1);
44
+ *data = (BdrvCoDrainData) {
45
+ .bs = bs,
46
+ .done = false,
47
+ .begin = begin
48
+ };
49
+
50
+ /* Make sure the driver callback completes during the polling phase for
51
+ * drain_begin. */
52
+ bdrv_inc_in_flight(bs);
53
+ data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
54
+ aio_co_schedule(bdrv_get_aio_context(bs), data->co);
55
+
56
+ if (!begin) {
57
+ BDRV_POLL_WHILE(bs, !data->done);
58
+ g_free(data);
59
+ }
60
}
61
62
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
63
--
64
2.13.6
65
66
diff view generated by jsdifflib
Deleted patch
1
This adds a test case that goes wrong if bdrv_drain_invoke() calls
2
aio_poll().
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
---
6
tests/test-bdrv-drain.c | 102 +++++++++++++++++++++++++++++++++++++++++-------
7
1 file changed, 88 insertions(+), 14 deletions(-)
8
9
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tests/test-bdrv-drain.c
12
+++ b/tests/test-bdrv-drain.c
13
@@ -XXX,XX +XXX,XX @@ static QemuEvent done_event;
14
typedef struct BDRVTestState {
15
int drain_count;
16
AioContext *bh_indirection_ctx;
17
+ bool sleep_in_drain_begin;
18
} BDRVTestState;
19
20
static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
21
{
22
BDRVTestState *s = bs->opaque;
23
s->drain_count++;
24
+ if (s->sleep_in_drain_begin) {
25
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
26
+ }
27
}
28
29
static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
30
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
31
return 0;
32
}
33
34
+static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c,
35
+ const BdrvChildRole *role,
36
+ BlockReopenQueue *reopen_queue,
37
+ uint64_t perm, uint64_t shared,
38
+ uint64_t *nperm, uint64_t *nshared)
39
+{
40
+ /* bdrv_format_default_perms() accepts only these two, so disguise
41
+ * detach_by_driver_cb_role as one of them. */
42
+ if (role != &child_file && role != &child_backing) {
43
+ role = &child_file;
44
+ }
45
+
46
+ bdrv_format_default_perms(bs, c, role, reopen_queue, perm, shared,
47
+ nperm, nshared);
48
+}
49
+
50
static BlockDriver bdrv_test = {
51
.format_name = "test",
52
.instance_size = sizeof(BDRVTestState),
53
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
54
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
55
.bdrv_co_drain_end = bdrv_test_co_drain_end,
56
57
- .bdrv_child_perm = bdrv_format_default_perms,
58
+ .bdrv_child_perm = bdrv_test_child_perm,
59
};
60
61
static void aio_ret_cb(void *opaque, int ret)
62
@@ -XXX,XX +XXX,XX @@ struct detach_by_parent_data {
63
BdrvChild *child_b;
64
BlockDriverState *c;
65
BdrvChild *child_c;
66
+ bool by_parent_cb;
67
};
68
+static struct detach_by_parent_data detach_by_parent_data;
69
70
-static void detach_by_parent_aio_cb(void *opaque, int ret)
71
+static void detach_indirect_bh(void *opaque)
72
{
73
struct detach_by_parent_data *data = opaque;
74
75
- g_assert_cmpint(ret, ==, 0);
76
bdrv_unref_child(data->parent_b, data->child_b);
77
78
bdrv_ref(data->c);
79
@@ -XXX,XX +XXX,XX @@ static void detach_by_parent_aio_cb(void *opaque, int ret)
80
&child_file, &error_abort);
81
}
82
83
+static void detach_by_parent_aio_cb(void *opaque, int ret)
84
+{
85
+ struct detach_by_parent_data *data = &detach_by_parent_data;
86
+
87
+ g_assert_cmpint(ret, ==, 0);
88
+ if (data->by_parent_cb) {
89
+ detach_indirect_bh(data);
90
+ }
91
+}
92
+
93
+static void detach_by_driver_cb_drained_begin(BdrvChild *child)
94
+{
95
+ aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
96
+ detach_indirect_bh, &detach_by_parent_data);
97
+ child_file.drained_begin(child);
98
+}
99
+
100
+static BdrvChildRole detach_by_driver_cb_role;
101
+
102
/*
103
* Initial graph:
104
*
105
@@ -XXX,XX +XXX,XX @@ static void detach_by_parent_aio_cb(void *opaque, int ret)
106
* \ / \
107
* A B C
108
*
109
- * PA has a pending write request whose callback changes the child nodes of PB:
110
- * It removes B and adds C instead. The subtree of PB is drained, which will
111
- * indirectly drain the write request, too.
112
+ * by_parent_cb == true: Test that parent callbacks don't poll
113
+ *
114
+ * PA has a pending write request whose callback changes the child nodes of
115
+ * PB: It removes B and adds C instead. The subtree of PB is drained, which
116
+ * will indirectly drain the write request, too.
117
+ *
118
+ * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
119
+ *
120
+ * PA's BdrvChildRole has a .drained_begin callback that schedules a BH
121
+ * that does the same graph change. If bdrv_drain_invoke() calls it, the
122
+ * state is messed up, but if it is only polled in the single
123
+ * BDRV_POLL_WHILE() at the end of the drain, this should work fine.
124
*/
125
-static void test_detach_by_parent_cb(void)
126
+static void test_detach_indirect(bool by_parent_cb)
127
{
128
BlockBackend *blk;
129
BlockDriverState *parent_a, *parent_b, *a, *b, *c;
130
BdrvChild *child_a, *child_b;
131
BlockAIOCB *acb;
132
- struct detach_by_parent_data data;
133
134
QEMUIOVector qiov;
135
struct iovec iov = {
136
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
137
};
138
qemu_iovec_init_external(&qiov, &iov, 1);
139
140
+ if (!by_parent_cb) {
141
+ detach_by_driver_cb_role = child_file;
142
+ detach_by_driver_cb_role.drained_begin =
143
+ detach_by_driver_cb_drained_begin;
144
+ }
145
+
146
/* Create all involved nodes */
147
parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
148
&error_abort);
149
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
150
blk_insert_bs(blk, parent_a, &error_abort);
151
bdrv_unref(parent_a);
152
153
+ /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver
154
+ * callback must not return immediately. */
155
+ if (!by_parent_cb) {
156
+ BDRVTestState *s = parent_a->opaque;
157
+ s->sleep_in_drain_begin = true;
158
+ }
159
+
160
/* Set child relationships */
161
bdrv_ref(b);
162
bdrv_ref(a);
163
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
164
child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, &error_abort);
165
166
bdrv_ref(a);
167
- bdrv_attach_child(parent_a, a, "PA-A", &child_file, &error_abort);
168
+ bdrv_attach_child(parent_a, a, "PA-A",
169
+ by_parent_cb ? &child_file : &detach_by_driver_cb_role,
170
+ &error_abort);
171
172
g_assert_cmpint(parent_a->refcnt, ==, 1);
173
g_assert_cmpint(parent_b->refcnt, ==, 1);
174
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
175
g_assert(QLIST_NEXT(child_b, next) == NULL);
176
177
/* Start the evil write request */
178
- data = (struct detach_by_parent_data) {
179
+ detach_by_parent_data = (struct detach_by_parent_data) {
180
.parent_b = parent_b,
181
.child_b = child_b,
182
.c = c,
183
+ .by_parent_cb = by_parent_cb,
184
};
185
- acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, &data);
186
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
187
g_assert(acb != NULL);
188
189
/* Drain and check the expected result */
190
bdrv_subtree_drained_begin(parent_b);
191
192
- g_assert(data.child_c != NULL);
193
+ g_assert(detach_by_parent_data.child_c != NULL);
194
195
g_assert_cmpint(parent_a->refcnt, ==, 1);
196
g_assert_cmpint(parent_b->refcnt, ==, 1);
197
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
198
g_assert_cmpint(b->refcnt, ==, 1);
199
g_assert_cmpint(c->refcnt, ==, 2);
200
201
- g_assert(QLIST_FIRST(&parent_b->children) == data.child_c);
202
- g_assert(QLIST_NEXT(data.child_c, next) == child_a);
203
+ g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c);
204
+ g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a);
205
g_assert(QLIST_NEXT(child_a, next) == NULL);
206
207
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
208
@@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void)
209
bdrv_unref(c);
210
}
211
212
+static void test_detach_by_parent_cb(void)
213
+{
214
+ test_detach_indirect(true);
215
+}
216
+
217
+static void test_detach_by_driver_cb(void)
218
+{
219
+ test_detach_indirect(false);
220
+}
221
222
int main(int argc, char **argv)
223
{
224
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
225
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
226
g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
227
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
228
+ g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
229
230
ret = g_test_run();
231
qemu_event_destroy(&done_event);
232
--
233
2.13.6
234
235
diff view generated by jsdifflib
Deleted patch
1
bdrv_drain_all() wants to have a single polling loop for draining the
2
in-flight requests of all nodes. This means that the AIO_WAIT_WHILE()
3
condition relies on activity in multiple AioContexts, which is polled
4
from the mainloop context. We must therefore call AIO_WAIT_WHILE() from
5
the mainloop thread and use the AioWait notification mechanism.
6
1
7
Just randomly picking the AioContext of any non-mainloop thread would
8
work, but instead of bothering to find such a context in the caller, we
9
can just as well accept NULL for ctx.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
include/block/aio-wait.h | 13 +++++++++----
14
1 file changed, 9 insertions(+), 4 deletions(-)
15
16
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/aio-wait.h
19
+++ b/include/block/aio-wait.h
20
@@ -XXX,XX +XXX,XX @@ typedef struct {
21
/**
22
* AIO_WAIT_WHILE:
23
* @wait: the aio wait object
24
- * @ctx: the aio context
25
+ * @ctx: the aio context, or NULL if multiple aio contexts (for which the
26
+ * caller does not hold a lock) are involved in the polling condition.
27
* @cond: wait while this conditional expression is true
28
*
29
* Wait while a condition is true. Use this to implement synchronous
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
31
bool waited_ = false; \
32
AioWait *wait_ = (wait); \
33
AioContext *ctx_ = (ctx); \
34
- if (in_aio_context_home_thread(ctx_)) { \
35
+ if (ctx_ && in_aio_context_home_thread(ctx_)) { \
36
while ((cond)) { \
37
aio_poll(ctx_, true); \
38
waited_ = true; \
39
@@ -XXX,XX +XXX,XX @@ typedef struct {
40
/* Increment wait_->num_waiters before evaluating cond. */ \
41
atomic_inc(&wait_->num_waiters); \
42
while ((cond)) { \
43
- aio_context_release(ctx_); \
44
+ if (ctx_) { \
45
+ aio_context_release(ctx_); \
46
+ } \
47
aio_poll(qemu_get_aio_context(), true); \
48
- aio_context_acquire(ctx_); \
49
+ if (ctx_) { \
50
+ aio_context_acquire(ctx_); \
51
+ } \
52
waited_ = true; \
53
} \
54
atomic_dec(&wait_->num_waiters); \
55
--
56
2.13.6
57
58
diff view generated by jsdifflib
Deleted patch
1
Before we can introduce a single polling loop for all nodes in
2
bdrv_drain_all_begin(), we must make sure to run it outside of coroutine
3
context like we already do for bdrv_do_drained_begin().
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
block/io.c | 22 +++++++++++++++++-----
8
1 file changed, 17 insertions(+), 5 deletions(-)
9
10
diff --git a/block/io.c b/block/io.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
13
+++ b/block/io.c
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
15
Coroutine *co = data->co;
16
BlockDriverState *bs = data->bs;
17
18
- bdrv_dec_in_flight(bs);
19
- if (data->begin) {
20
- bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll);
21
+ if (bs) {
22
+ bdrv_dec_in_flight(bs);
23
+ if (data->begin) {
24
+ bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll);
25
+ } else {
26
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
27
+ }
28
} else {
29
- bdrv_do_drained_end(bs, data->recursive, data->parent);
30
+ assert(data->begin);
31
+ bdrv_drain_all_begin();
32
}
33
34
data->done = true;
35
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
36
.parent = parent,
37
.poll = poll,
38
};
39
- bdrv_inc_in_flight(bs);
40
+ if (bs) {
41
+ bdrv_inc_in_flight(bs);
42
+ }
43
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
44
bdrv_co_drain_bh_cb, &data);
45
46
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
47
BlockDriverState *bs;
48
BdrvNextIterator it;
49
50
+ if (qemu_in_coroutine()) {
51
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true);
52
+ return;
53
+ }
54
+
55
/* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
56
* or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
57
* nodes in several different AioContexts, so make sure we're in the main
58
--
59
2.13.6
60
61
diff view generated by jsdifflib
1
In the future, bdrv_drained_all_begin/end() will drain all invidiual
1
After processing the option string with the keyval parser, we get a
2
nodes separately rather than whole subtrees. This means that we don't
2
QDict that contains only strings. This QDict must be fed to a keyval
3
want to propagate the drain to all parents any more: If the parent is a
3
visitor which converts the strings into the right data types.
4
BDS, it will already be drained separately. Recursing to all parents is
5
unnecessary work and would make it an O(n²) operation.
6
4
7
Prepare the drain function for the changed drain_all by adding an
5
qmp_object_add(), however, uses the normal QObject input visitor, which
8
ignore_bds_parents parameter to the internal implementation that
6
expects a QDict where all properties already have the QType that matches
9
prevents the propagation of the drain to BDS parents. We still (have to)
7
the data type required by the QOM object type.
10
propagate it to non-BDS parents like BlockBackends or Jobs because those
11
are not drained separately.
12
8
9
Change the --object implementation in qemu-storage-daemon so that it
10
doesn't call qmp_object_add(), but calls user_creatable_add_dict()
11
directly instead and pass it a new keyval boolean that decides which
12
visitor must be used.
13
14
Reported-by: Coiby Xu <coiby.xu@gmail.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
16
---
15
include/block/block.h | 16 ++++++---
17
include/qom/object_interfaces.h | 6 +++++-
16
include/block/block_int.h | 6 ++++
18
qemu-storage-daemon.c | 4 +---
17
block.c | 11 +++---
19
qom/object_interfaces.c | 8 ++++++--
18
block/io.c | 88 ++++++++++++++++++++++++++++-------------------
20
qom/qom-qmp-cmds.c | 2 +-
19
block/vvfat.c | 1 +
21
4 files changed, 13 insertions(+), 7 deletions(-)
20
5 files changed, 78 insertions(+), 44 deletions(-)
21
22
22
diff --git a/include/block/block.h b/include/block/block.h
23
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
23
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
24
--- a/include/block/block.h
25
--- a/include/qom/object_interfaces.h
25
+++ b/include/block/block.h
26
+++ b/include/qom/object_interfaces.h
26
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs);
27
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
27
* Begin a quiesced section of all users of @bs. This is part of
28
/**
28
* bdrv_drained_begin.
29
* user_creatable_add_dict:
30
* @qdict: the object definition
31
+ * @keyval: if true, use a keyval visitor for processing @qdict (i.e.
32
+ * assume that all @qdict values are strings); otherwise, use
33
+ * the normal QObject visitor (i.e. assume all @qdict values
34
+ * have the QType expected by the QOM object type)
35
* @errp: if an error occurs, a pointer to an area to store the error
36
*
37
* Create an instance of the user creatable object that is defined by
38
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
39
* ID from the key 'id'. The remaining entries in @qdict are used to
40
* initialize the object properties.
29
*/
41
*/
30
-void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
42
-void user_creatable_add_dict(QDict *qdict, Error **errp);
31
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
43
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp);
32
+ bool ignore_bds_parents);
33
44
34
/**
45
/**
35
* bdrv_parent_drained_end:
46
* user_creatable_add_opts:
36
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
47
diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c
37
* End a quiesced section of all users of @bs. This is part of
38
* bdrv_drained_end.
39
*/
40
-void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
41
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
42
+ bool ignore_bds_parents);
43
44
/**
45
* bdrv_drain_poll:
46
*
47
* Poll for pending requests in @bs, its parents (except for @ignore_parent),
48
- * and if @recursive is true its children as well.
49
+ * and if @recursive is true its children as well (used for subtree drain).
50
+ *
51
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
52
+ * ignore the drain request because they will be drained separately (used for
53
+ * drain_all).
54
*
55
* This is part of bdrv_drained_begin.
56
*/
57
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
58
- BdrvChild *ignore_parent);
59
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
60
61
/**
62
* bdrv_drained_begin:
63
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs);
64
* running requests to complete.
65
*/
66
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
67
- BdrvChild *parent);
68
+ BdrvChild *parent, bool ignore_bds_parents);
69
70
/**
71
* Like bdrv_drained_begin, but recursively begins a quiesced section for
72
diff --git a/include/block/block_int.h b/include/block/block_int.h
73
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
74
--- a/include/block/block_int.h
49
--- a/qemu-storage-daemon.c
75
+++ b/include/block/block_int.h
50
+++ b/qemu-storage-daemon.c
76
@@ -XXX,XX +XXX,XX @@ struct BdrvChildRole {
51
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
77
* points to. */
52
QemuOpts *opts;
78
bool stay_at_node;
53
const char *type;
79
54
QDict *args;
80
+ /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
55
- QObject *ret_data = NULL;
81
+ * will return it. This information is used for drain_all, where every node
56
82
+ * will be drained separately, so the drain only needs to be propagated to
57
/* FIXME The keyval parser rejects 'help' arguments, so we must
83
+ * non-BDS parents. */
58
* unconditionall try QemuOpts first. */
84
+ bool parent_is_bds;
59
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
85
+
60
qemu_opts_del(opts);
86
void (*inherit_options)(int *child_flags, QDict *child_options,
61
87
int parent_flags, QDict *parent_options);
62
args = keyval_parse(optarg, "qom-type", &error_fatal);
88
63
- qmp_object_add(args, &ret_data, &error_fatal);
89
diff --git a/block.c b/block.c
64
+ user_creatable_add_dict(args, true, &error_fatal);
65
qobject_unref(args);
66
- qobject_unref(ret_data);
67
break;
68
}
69
default:
70
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
90
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
91
--- a/block.c
72
--- a/qom/object_interfaces.c
92
+++ b/block.c
73
+++ b/qom/object_interfaces.c
93
@@ -XXX,XX +XXX,XX @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
74
@@ -XXX,XX +XXX,XX @@ out:
94
static void bdrv_child_cb_drained_begin(BdrvChild *child)
75
return obj;
76
}
77
78
-void user_creatable_add_dict(QDict *qdict, Error **errp)
79
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
95
{
80
{
96
BlockDriverState *bs = child->opaque;
81
Visitor *v;
97
- bdrv_do_drained_begin_quiesce(bs, NULL);
82
Object *obj;
98
+ bdrv_do_drained_begin_quiesce(bs, NULL, false);
83
@@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp)
84
}
85
qdict_del(qdict, "id");
86
87
- v = qobject_input_visitor_new(QOBJECT(qdict));
88
+ if (keyval) {
89
+ v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
90
+ } else {
91
+ v = qobject_input_visitor_new(QOBJECT(qdict));
92
+ }
93
obj = user_creatable_add_type(type, id, qdict, v, errp);
94
visit_free(v);
95
object_unref(obj);
96
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/qom/qom-qmp-cmds.c
99
+++ b/qom/qom-qmp-cmds.c
100
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
101
qobject_unref(pdict);
102
}
103
104
- user_creatable_add_dict(qdict, errp);
105
+ user_creatable_add_dict(qdict, false, errp);
99
}
106
}
100
107
101
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
108
void qmp_object_del(const char *id, Error **errp)
102
{
103
BlockDriverState *bs = child->opaque;
104
- return bdrv_drain_poll(bs, false, NULL);
105
+ return bdrv_drain_poll(bs, false, NULL, false);
106
}
107
108
static void bdrv_child_cb_drained_end(BdrvChild *child)
109
@@ -XXX,XX +XXX,XX @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
110
}
111
112
const BdrvChildRole child_file = {
113
+ .parent_is_bds = true,
114
.get_parent_desc = bdrv_child_get_parent_desc,
115
.inherit_options = bdrv_inherited_options,
116
.drained_begin = bdrv_child_cb_drained_begin,
117
@@ -XXX,XX +XXX,XX @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
118
}
119
120
const BdrvChildRole child_format = {
121
+ .parent_is_bds = true,
122
.get_parent_desc = bdrv_child_get_parent_desc,
123
.inherit_options = bdrv_inherited_fmt_options,
124
.drained_begin = bdrv_child_cb_drained_begin,
125
@@ -XXX,XX +XXX,XX @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
126
}
127
128
const BdrvChildRole child_backing = {
129
+ .parent_is_bds = true,
130
.get_parent_desc = bdrv_child_get_parent_desc,
131
.attach = bdrv_backing_attach,
132
.detach = bdrv_backing_detach,
133
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
134
AioContext *ctx = bdrv_get_aio_context(bs);
135
136
aio_disable_external(ctx);
137
- bdrv_parent_drained_begin(bs, NULL);
138
+ bdrv_parent_drained_begin(bs, NULL, false);
139
bdrv_drain(bs); /* ensure there are no in-flight requests */
140
141
while (aio_poll(ctx, false)) {
142
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
143
*/
144
aio_context_acquire(new_context);
145
bdrv_attach_aio_context(bs, new_context);
146
- bdrv_parent_drained_end(bs, NULL);
147
+ bdrv_parent_drained_end(bs, NULL, false);
148
aio_enable_external(ctx);
149
aio_context_release(new_context);
150
}
151
diff --git a/block/io.c b/block/io.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/block/io.c
154
+++ b/block/io.c
155
@@ -XXX,XX +XXX,XX @@
156
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
157
int64_t offset, int bytes, BdrvRequestFlags flags);
158
159
-void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
160
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
161
+ bool ignore_bds_parents)
162
{
163
BdrvChild *c, *next;
164
165
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
166
- if (c == ignore) {
167
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
168
continue;
169
}
170
if (c->role->drained_begin) {
171
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
172
}
173
}
174
175
-void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
176
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
177
+ bool ignore_bds_parents)
178
{
179
BdrvChild *c, *next;
180
181
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
182
- if (c == ignore) {
183
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
184
continue;
185
}
186
if (c->role->drained_end) {
187
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
188
}
189
}
190
191
-static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore)
192
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
193
+ bool ignore_bds_parents)
194
{
195
BdrvChild *c, *next;
196
bool busy = false;
197
198
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
199
- if (c == ignore) {
200
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
201
continue;
202
}
203
if (c->role->drained_poll) {
204
@@ -XXX,XX +XXX,XX @@ typedef struct {
205
bool recursive;
206
bool poll;
207
BdrvChild *parent;
208
+ bool ignore_bds_parents;
209
} BdrvCoDrainData;
210
211
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
212
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
213
214
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
215
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
216
- BdrvChild *ignore_parent)
217
+ BdrvChild *ignore_parent, bool ignore_bds_parents)
218
{
219
BdrvChild *child, *next;
220
221
- if (bdrv_parent_drained_poll(bs, ignore_parent)) {
222
+ if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
223
return true;
224
}
225
226
@@ -XXX,XX +XXX,XX @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
227
}
228
229
if (recursive) {
230
+ assert(!ignore_bds_parents);
231
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
232
- if (bdrv_drain_poll(child->bs, recursive, child)) {
233
+ if (bdrv_drain_poll(child->bs, recursive, child, false)) {
234
return true;
235
}
236
}
237
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
238
* have executed. */
239
while (aio_poll(bs->aio_context, false));
240
241
- return bdrv_drain_poll(bs, recursive, ignore_parent);
242
+ return bdrv_drain_poll(bs, recursive, ignore_parent, false);
243
}
244
245
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
246
- BdrvChild *parent, bool poll);
247
+ BdrvChild *parent, bool ignore_bds_parents,
248
+ bool poll);
249
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
250
- BdrvChild *parent);
251
+ BdrvChild *parent, bool ignore_bds_parents);
252
253
static void bdrv_co_drain_bh_cb(void *opaque)
254
{
255
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
256
if (bs) {
257
bdrv_dec_in_flight(bs);
258
if (data->begin) {
259
- bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll);
260
+ bdrv_do_drained_begin(bs, data->recursive, data->parent,
261
+ data->ignore_bds_parents, data->poll);
262
} else {
263
- bdrv_do_drained_end(bs, data->recursive, data->parent);
264
+ bdrv_do_drained_end(bs, data->recursive, data->parent,
265
+ data->ignore_bds_parents);
266
}
267
} else {
268
assert(data->begin);
269
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
270
271
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
272
bool begin, bool recursive,
273
- BdrvChild *parent, bool poll)
274
+ BdrvChild *parent,
275
+ bool ignore_bds_parents,
276
+ bool poll)
277
{
278
BdrvCoDrainData data;
279
280
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
281
.begin = begin,
282
.recursive = recursive,
283
.parent = parent,
284
+ .ignore_bds_parents = ignore_bds_parents,
285
.poll = poll,
286
};
287
if (bs) {
288
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
289
}
290
291
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
292
- BdrvChild *parent)
293
+ BdrvChild *parent, bool ignore_bds_parents)
294
{
295
assert(!qemu_in_coroutine());
296
297
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
298
aio_disable_external(bdrv_get_aio_context(bs));
299
}
300
301
- bdrv_parent_drained_begin(bs, parent);
302
+ bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
303
bdrv_drain_invoke(bs, true);
304
}
305
306
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
307
- BdrvChild *parent, bool poll)
308
+ BdrvChild *parent, bool ignore_bds_parents,
309
+ bool poll)
310
{
311
BdrvChild *child, *next;
312
313
if (qemu_in_coroutine()) {
314
- bdrv_co_yield_to_drain(bs, true, recursive, parent, poll);
315
+ bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
316
+ poll);
317
return;
318
}
319
320
- bdrv_do_drained_begin_quiesce(bs, parent);
321
+ bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
322
323
if (recursive) {
324
+ assert(!ignore_bds_parents);
325
bs->recursive_quiesce_counter++;
326
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
327
- bdrv_do_drained_begin(child->bs, true, child, false);
328
+ bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
329
+ false);
330
}
331
}
332
333
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
334
* nodes.
335
*/
336
if (poll) {
337
+ assert(!ignore_bds_parents);
338
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
339
}
340
}
341
342
void bdrv_drained_begin(BlockDriverState *bs)
343
{
344
- bdrv_do_drained_begin(bs, false, NULL, true);
345
+ bdrv_do_drained_begin(bs, false, NULL, false, true);
346
}
347
348
void bdrv_subtree_drained_begin(BlockDriverState *bs)
349
{
350
- bdrv_do_drained_begin(bs, true, NULL, true);
351
+ bdrv_do_drained_begin(bs, true, NULL, false, true);
352
}
353
354
-void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
355
- BdrvChild *parent)
356
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
357
+ BdrvChild *parent, bool ignore_bds_parents)
358
{
359
BdrvChild *child, *next;
360
int old_quiesce_counter;
361
362
if (qemu_in_coroutine()) {
363
- bdrv_co_yield_to_drain(bs, false, recursive, parent, false);
364
+ bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
365
+ false);
366
return;
367
}
368
assert(bs->quiesce_counter > 0);
369
@@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
370
371
/* Re-enable things in child-to-parent order */
372
bdrv_drain_invoke(bs, false);
373
- bdrv_parent_drained_end(bs, parent);
374
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
375
if (old_quiesce_counter == 1) {
376
aio_enable_external(bdrv_get_aio_context(bs));
377
}
378
379
if (recursive) {
380
+ assert(!ignore_bds_parents);
381
bs->recursive_quiesce_counter--;
382
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
383
- bdrv_do_drained_end(child->bs, true, child);
384
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
385
}
386
}
387
}
388
389
void bdrv_drained_end(BlockDriverState *bs)
390
{
391
- bdrv_do_drained_end(bs, false, NULL);
392
+ bdrv_do_drained_end(bs, false, NULL, false);
393
}
394
395
void bdrv_subtree_drained_end(BlockDriverState *bs)
396
{
397
- bdrv_do_drained_end(bs, true, NULL);
398
+ bdrv_do_drained_end(bs, true, NULL, false);
399
}
400
401
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
402
@@ -XXX,XX +XXX,XX @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
403
int i;
404
405
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
406
- bdrv_do_drained_begin(child->bs, true, child, true);
407
+ bdrv_do_drained_begin(child->bs, true, child, false, true);
408
}
409
}
410
411
@@ -XXX,XX +XXX,XX @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
412
int i;
413
414
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
415
- bdrv_do_drained_end(child->bs, true, child);
416
+ bdrv_do_drained_end(child->bs, true, child, false);
417
}
418
}
419
420
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
421
BdrvNextIterator it;
422
423
if (qemu_in_coroutine()) {
424
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true);
425
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
426
return;
427
}
428
429
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
430
AioContext *aio_context = bdrv_get_aio_context(bs);
431
432
aio_context_acquire(aio_context);
433
- bdrv_do_drained_begin(bs, true, NULL, true);
434
+ bdrv_do_drained_begin(bs, true, NULL, false, true);
435
aio_context_release(aio_context);
436
}
437
438
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
439
AioContext *aio_context = bdrv_get_aio_context(bs);
440
441
aio_context_acquire(aio_context);
442
- bdrv_do_drained_end(bs, true, NULL);
443
+ bdrv_do_drained_end(bs, true, NULL, false);
444
aio_context_release(aio_context);
445
}
446
}
447
diff --git a/block/vvfat.c b/block/vvfat.c
448
index XXXXXXX..XXXXXXX 100644
449
--- a/block/vvfat.c
450
+++ b/block/vvfat.c
451
@@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(int *child_flags, QDict *child_options,
452
}
453
454
static const BdrvChildRole child_vvfat_qcow = {
455
+ .parent_is_bds = true,
456
.inherit_options = vvfat_qcow_options,
457
};
458
459
--
109
--
460
2.13.6
110
2.25.3
461
111
462
112
diff view generated by jsdifflib
Deleted patch
1
This tests both adding and remove a node between bdrv_drain_all_begin()
2
and bdrv_drain_all_end(), and enabled the existing detach test for
3
drain_all.
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/test-bdrv-drain.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++--
8
1 file changed, 73 insertions(+), 2 deletions(-)
9
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tests/test-bdrv-drain.c
13
+++ b/tests/test-bdrv-drain.c
14
@@ -XXX,XX +XXX,XX @@ static void test_multiparent(void)
15
blk_unref(blk_b);
16
}
17
18
-static void test_graph_change(void)
19
+static void test_graph_change_drain_subtree(void)
20
{
21
BlockBackend *blk_a, *blk_b;
22
BlockDriverState *bs_a, *bs_b, *backing;
23
@@ -XXX,XX +XXX,XX @@ static void test_graph_change(void)
24
blk_unref(blk_b);
25
}
26
27
+static void test_graph_change_drain_all(void)
28
+{
29
+ BlockBackend *blk_a, *blk_b;
30
+ BlockDriverState *bs_a, *bs_b;
31
+ BDRVTestState *a_s, *b_s;
32
+
33
+ /* Create node A with a BlockBackend */
34
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
35
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
36
+ &error_abort);
37
+ a_s = bs_a->opaque;
38
+ blk_insert_bs(blk_a, bs_a, &error_abort);
39
+
40
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
41
+ g_assert_cmpint(a_s->drain_count, ==, 0);
42
+
43
+ /* Call bdrv_drain_all_begin() */
44
+ bdrv_drain_all_begin();
45
+
46
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
47
+ g_assert_cmpint(a_s->drain_count, ==, 1);
48
+
49
+ /* Create node B with a BlockBackend */
50
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
51
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
52
+ &error_abort);
53
+ b_s = bs_b->opaque;
54
+ blk_insert_bs(blk_b, bs_b, &error_abort);
55
+
56
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
57
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
58
+ g_assert_cmpint(a_s->drain_count, ==, 1);
59
+ g_assert_cmpint(b_s->drain_count, ==, 1);
60
+
61
+ /* Unref and finally delete node A */
62
+ blk_unref(blk_a);
63
+
64
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
65
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
66
+ g_assert_cmpint(a_s->drain_count, ==, 1);
67
+ g_assert_cmpint(b_s->drain_count, ==, 1);
68
+
69
+ bdrv_unref(bs_a);
70
+
71
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
72
+ g_assert_cmpint(b_s->drain_count, ==, 1);
73
+
74
+ /* End the drained section */
75
+ bdrv_drain_all_end();
76
+
77
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
78
+ g_assert_cmpint(b_s->drain_count, ==, 0);
79
+
80
+ bdrv_unref(bs_b);
81
+ blk_unref(blk_b);
82
+}
83
+
84
struct test_iothread_data {
85
BlockDriverState *bs;
86
enum drain_type drain_type;
87
@@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
88
bdrv_subtree_drained_begin(bs);
89
bdrv_subtree_drained_end(bs);
90
break;
91
+ case BDRV_DRAIN_ALL:
92
+ bdrv_drain_all_begin();
93
+ bdrv_drain_all_end();
94
+ break;
95
default:
96
g_assert_not_reached();
97
}
98
@@ -XXX,XX +XXX,XX @@ static void test_delete_by_drain(void)
99
do_test_delete_by_drain(false, BDRV_DRAIN);
100
}
101
102
+static void test_detach_by_drain_all(void)
103
+{
104
+ do_test_delete_by_drain(true, BDRV_DRAIN_ALL);
105
+}
106
+
107
static void test_detach_by_drain(void)
108
{
109
do_test_delete_by_drain(true, BDRV_DRAIN);
110
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
111
112
g_test_add_func("/bdrv-drain/nested", test_nested);
113
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
114
- g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
115
+
116
+ g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
117
+ test_graph_change_drain_subtree);
118
+ g_test_add_func("/bdrv-drain/graph-change/drain_all",
119
+ test_graph_change_drain_all);
120
121
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
122
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
123
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
124
test_blockjob_drain_subtree);
125
126
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
127
+ g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
128
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
129
g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
130
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
131
--
132
2.13.6
133
134
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
When converting mirror's I/O to coroutines, we are going to need a point
4
where these coroutines are created. mirror_perform() is going to be
5
that point.
6
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Jeff Cody <jcody@redhat.com>
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
12
Message-id: 20180613181823.13618-2-mreitz@redhat.com
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
15
block/mirror.c | 51 +++++++++++++++++++++++++++++----------------------
16
1 file changed, 29 insertions(+), 22 deletions(-)
17
18
diff --git a/block/mirror.c b/block/mirror.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/mirror.c
21
+++ b/block/mirror.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp {
23
uint64_t bytes;
24
} MirrorOp;
25
26
+typedef enum MirrorMethod {
27
+ MIRROR_METHOD_COPY,
28
+ MIRROR_METHOD_ZERO,
29
+ MIRROR_METHOD_DISCARD,
30
+} MirrorMethod;
31
+
32
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
33
int error)
34
{
35
@@ -XXX,XX +XXX,XX @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
36
}
37
}
38
39
+static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
40
+ unsigned bytes, MirrorMethod mirror_method)
41
+{
42
+ switch (mirror_method) {
43
+ case MIRROR_METHOD_COPY:
44
+ return mirror_do_read(s, offset, bytes);
45
+ case MIRROR_METHOD_ZERO:
46
+ case MIRROR_METHOD_DISCARD:
47
+ mirror_do_zero_or_discard(s, offset, bytes,
48
+ mirror_method == MIRROR_METHOD_DISCARD);
49
+ return bytes;
50
+ default:
51
+ abort();
52
+ }
53
+}
54
+
55
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
56
{
57
BlockDriverState *source = s->source;
58
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
59
int ret;
60
int64_t io_bytes;
61
int64_t io_bytes_acct;
62
- enum MirrorMethod {
63
- MIRROR_METHOD_COPY,
64
- MIRROR_METHOD_ZERO,
65
- MIRROR_METHOD_DISCARD
66
- } mirror_method = MIRROR_METHOD_COPY;
67
+ MirrorMethod mirror_method = MIRROR_METHOD_COPY;
68
69
assert(!(offset % s->granularity));
70
ret = bdrv_block_status_above(source, NULL, offset,
71
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
72
}
73
74
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
75
- switch (mirror_method) {
76
- case MIRROR_METHOD_COPY:
77
- io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
78
- break;
79
- case MIRROR_METHOD_ZERO:
80
- case MIRROR_METHOD_DISCARD:
81
- mirror_do_zero_or_discard(s, offset, io_bytes,
82
- mirror_method == MIRROR_METHOD_DISCARD);
83
- if (write_zeroes_ok) {
84
- io_bytes_acct = 0;
85
- } else {
86
- io_bytes_acct = io_bytes;
87
- }
88
- break;
89
- default:
90
- abort();
91
+ io_bytes = mirror_perform(s, offset, io_bytes, mirror_method);
92
+ if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) {
93
+ io_bytes_acct = 0;
94
+ } else {
95
+ io_bytes_acct = io_bytes;
96
}
97
assert(io_bytes);
98
offset += io_bytes;
99
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
100
continue;
101
}
102
103
- mirror_do_zero_or_discard(s, offset, bytes, false);
104
+ mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO);
105
offset += bytes;
106
}
107
108
--
109
2.13.6
110
111
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
In order to talk to the source BDS (and maybe in the future to the
4
target BDS as well) directly, we need to convert our existing AIO
5
requests into coroutine I/O requests.
6
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Message-id: 20180613181823.13618-3-mreitz@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
block/mirror.c | 152 ++++++++++++++++++++++++++++++++++-----------------------
13
1 file changed, 90 insertions(+), 62 deletions(-)
14
15
diff --git a/block/mirror.c b/block/mirror.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/mirror.c
18
+++ b/block/mirror.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp {
20
QEMUIOVector qiov;
21
int64_t offset;
22
uint64_t bytes;
23
+
24
+ /* The pointee is set by mirror_co_read(), mirror_co_zero(), and
25
+ * mirror_co_discard() before yielding for the first time */
26
+ int64_t *bytes_handled;
27
} MirrorOp;
28
29
typedef enum MirrorMethod {
30
@@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
31
}
32
}
33
34
-static void mirror_iteration_done(MirrorOp *op, int ret)
35
+static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
36
{
37
MirrorBlockJob *s = op->s;
38
struct iovec *iov;
39
@@ -XXX,XX +XXX,XX @@ static void mirror_iteration_done(MirrorOp *op, int ret)
40
}
41
}
42
43
-static void mirror_write_complete(void *opaque, int ret)
44
+static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
45
{
46
- MirrorOp *op = opaque;
47
MirrorBlockJob *s = op->s;
48
49
aio_context_acquire(blk_get_aio_context(s->common.blk));
50
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
51
aio_context_release(blk_get_aio_context(s->common.blk));
52
}
53
54
-static void mirror_read_complete(void *opaque, int ret)
55
+static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
56
{
57
- MirrorOp *op = opaque;
58
MirrorBlockJob *s = op->s;
59
60
aio_context_acquire(blk_get_aio_context(s->common.blk));
61
@@ -XXX,XX +XXX,XX @@ static void mirror_read_complete(void *opaque, int ret)
62
63
mirror_iteration_done(op, ret);
64
} else {
65
- blk_aio_pwritev(s->target, op->offset, &op->qiov,
66
- 0, mirror_write_complete, op);
67
+ ret = blk_co_pwritev(s->target, op->offset,
68
+ op->qiov.size, &op->qiov, 0);
69
+ mirror_write_complete(op, ret);
70
}
71
aio_context_release(blk_get_aio_context(s->common.blk));
72
}
73
@@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
74
s->waiting_for_io = false;
75
}
76
77
-/* Submit async read while handling COW.
78
- * Returns: The number of bytes copied after and including offset,
79
- * excluding any bytes copied prior to offset due to alignment.
80
- * This will be @bytes if no alignment is necessary, or
81
- * (new_end - offset) if tail is rounded up or down due to
82
- * alignment or buffer limit.
83
+/* Perform a mirror copy operation.
84
+ *
85
+ * *op->bytes_handled is set to the number of bytes copied after and
86
+ * including offset, excluding any bytes copied prior to offset due
87
+ * to alignment. This will be op->bytes if no alignment is necessary,
88
+ * or (new_end - op->offset) if the tail is rounded up or down due to
89
+ * alignment or buffer limit.
90
*/
91
-static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
92
- uint64_t bytes)
93
+static void coroutine_fn mirror_co_read(void *opaque)
94
{
95
+ MirrorOp *op = opaque;
96
+ MirrorBlockJob *s = op->s;
97
BlockBackend *source = s->common.blk;
98
int nb_chunks;
99
uint64_t ret;
100
- MirrorOp *op;
101
uint64_t max_bytes;
102
103
max_bytes = s->granularity * s->max_iov;
104
105
/* We can only handle as much as buf_size at a time. */
106
- bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
107
- assert(bytes);
108
- assert(bytes < BDRV_REQUEST_MAX_BYTES);
109
- ret = bytes;
110
+ op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes));
111
+ assert(op->bytes);
112
+ assert(op->bytes < BDRV_REQUEST_MAX_BYTES);
113
+ *op->bytes_handled = op->bytes;
114
115
if (s->cow_bitmap) {
116
- ret += mirror_cow_align(s, &offset, &bytes);
117
+ *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes);
118
}
119
- assert(bytes <= s->buf_size);
120
+ /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */
121
+ assert(*op->bytes_handled <= UINT_MAX);
122
+ assert(op->bytes <= s->buf_size);
123
/* The offset is granularity-aligned because:
124
* 1) Caller passes in aligned values;
125
* 2) mirror_cow_align is used only when target cluster is larger. */
126
- assert(QEMU_IS_ALIGNED(offset, s->granularity));
127
+ assert(QEMU_IS_ALIGNED(op->offset, s->granularity));
128
/* The range is sector-aligned, since bdrv_getlength() rounds up. */
129
- assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
130
- nb_chunks = DIV_ROUND_UP(bytes, s->granularity);
131
+ assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE));
132
+ nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
133
134
while (s->buf_free_count < nb_chunks) {
135
- trace_mirror_yield_in_flight(s, offset, s->in_flight);
136
+ trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
137
mirror_wait_for_io(s);
138
}
139
140
- /* Allocate a MirrorOp that is used as an AIO callback. */
141
- op = g_new(MirrorOp, 1);
142
- op->s = s;
143
- op->offset = offset;
144
- op->bytes = bytes;
145
-
146
/* Now make a QEMUIOVector taking enough granularity-sized chunks
147
* from s->buf_free.
148
*/
149
qemu_iovec_init(&op->qiov, nb_chunks);
150
while (nb_chunks-- > 0) {
151
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
152
- size_t remaining = bytes - op->qiov.size;
153
+ size_t remaining = op->bytes - op->qiov.size;
154
155
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
156
s->buf_free_count--;
157
@@ -XXX,XX +XXX,XX @@ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
158
159
/* Copy the dirty cluster. */
160
s->in_flight++;
161
- s->bytes_in_flight += bytes;
162
- trace_mirror_one_iteration(s, offset, bytes);
163
+ s->bytes_in_flight += op->bytes;
164
+ trace_mirror_one_iteration(s, op->offset, op->bytes);
165
166
- blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
167
- return ret;
168
+ ret = blk_co_preadv(source, op->offset, op->bytes, &op->qiov, 0);
169
+ mirror_read_complete(op, ret);
170
}
171
172
-static void mirror_do_zero_or_discard(MirrorBlockJob *s,
173
- int64_t offset,
174
- uint64_t bytes,
175
- bool is_discard)
176
+static void coroutine_fn mirror_co_zero(void *opaque)
177
{
178
- MirrorOp *op;
179
+ MirrorOp *op = opaque;
180
+ int ret;
181
182
- /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
183
- * so the freeing in mirror_iteration_done is nop. */
184
- op = g_new0(MirrorOp, 1);
185
- op->s = s;
186
- op->offset = offset;
187
- op->bytes = bytes;
188
+ op->s->in_flight++;
189
+ op->s->bytes_in_flight += op->bytes;
190
+ *op->bytes_handled = op->bytes;
191
192
- s->in_flight++;
193
- s->bytes_in_flight += bytes;
194
- if (is_discard) {
195
- blk_aio_pdiscard(s->target, offset,
196
- op->bytes, mirror_write_complete, op);
197
- } else {
198
- blk_aio_pwrite_zeroes(s->target, offset,
199
- op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
200
- mirror_write_complete, op);
201
- }
202
+ ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
203
+ op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
204
+ mirror_write_complete(op, ret);
205
+}
206
+
207
+static void coroutine_fn mirror_co_discard(void *opaque)
208
+{
209
+ MirrorOp *op = opaque;
210
+ int ret;
211
+
212
+ op->s->in_flight++;
213
+ op->s->bytes_in_flight += op->bytes;
214
+ *op->bytes_handled = op->bytes;
215
+
216
+ ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes);
217
+ mirror_write_complete(op, ret);
218
}
219
220
static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
221
unsigned bytes, MirrorMethod mirror_method)
222
{
223
+ MirrorOp *op;
224
+ Coroutine *co;
225
+ int64_t bytes_handled = -1;
226
+
227
+ op = g_new(MirrorOp, 1);
228
+ *op = (MirrorOp){
229
+ .s = s,
230
+ .offset = offset,
231
+ .bytes = bytes,
232
+ .bytes_handled = &bytes_handled,
233
+ };
234
+
235
switch (mirror_method) {
236
case MIRROR_METHOD_COPY:
237
- return mirror_do_read(s, offset, bytes);
238
+ co = qemu_coroutine_create(mirror_co_read, op);
239
+ break;
240
case MIRROR_METHOD_ZERO:
241
+ co = qemu_coroutine_create(mirror_co_zero, op);
242
+ break;
243
case MIRROR_METHOD_DISCARD:
244
- mirror_do_zero_or_discard(s, offset, bytes,
245
- mirror_method == MIRROR_METHOD_DISCARD);
246
- return bytes;
247
+ co = qemu_coroutine_create(mirror_co_discard, op);
248
+ break;
249
default:
250
abort();
251
}
252
+
253
+ qemu_coroutine_enter(co);
254
+ /* At this point, ownership of op has been moved to the coroutine
255
+ * and the object may already be freed */
256
+
257
+ /* Assert that this value has been set */
258
+ assert(bytes_handled >= 0);
259
+
260
+ /* Same assertion as in mirror_co_read() (and for mirror_co_read()
261
+ * and mirror_co_discard(), bytes_handled == op->bytes, which
262
+ * is the @bytes parameter given to this function) */
263
+ assert(bytes_handled <= UINT_MAX);
264
+ return bytes_handled;
265
}
266
267
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
268
--
269
2.13.6
270
271
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
Attach a CoQueue to each in-flight operation so if we need to wait for
4
any we can use it to wait instead of just blindly yielding and hoping
5
for some operation to wake us.
6
7
A later patch will use this infrastructure to allow requests accessing
8
the same area of the virtual disk to specifically wait for each other.
9
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Message-id: 20180613181823.13618-4-mreitz@redhat.com
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
15
block/mirror.c | 34 +++++++++++++++++++++++-----------
16
1 file changed, 23 insertions(+), 11 deletions(-)
17
18
diff --git a/block/mirror.c b/block/mirror.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/mirror.c
21
+++ b/block/mirror.c
22
@@ -XXX,XX +XXX,XX @@
23
24
#include "qemu/osdep.h"
25
#include "qemu/cutils.h"
26
+#include "qemu/coroutine.h"
27
#include "trace.h"
28
#include "block/blockjob_int.h"
29
#include "block/block_int.h"
30
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBuffer {
31
QSIMPLEQ_ENTRY(MirrorBuffer) next;
32
} MirrorBuffer;
33
34
+typedef struct MirrorOp MirrorOp;
35
+
36
typedef struct MirrorBlockJob {
37
BlockJob common;
38
BlockBackend *target;
39
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
40
unsigned long *in_flight_bitmap;
41
int in_flight;
42
int64_t bytes_in_flight;
43
+ QTAILQ_HEAD(MirrorOpList, MirrorOp) ops_in_flight;
44
int ret;
45
bool unmap;
46
- bool waiting_for_io;
47
int target_cluster_size;
48
int max_iov;
49
bool initial_zeroing_ongoing;
50
} MirrorBlockJob;
51
52
-typedef struct MirrorOp {
53
+struct MirrorOp {
54
MirrorBlockJob *s;
55
QEMUIOVector qiov;
56
int64_t offset;
57
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp {
58
/* The pointee is set by mirror_co_read(), mirror_co_zero(), and
59
* mirror_co_discard() before yielding for the first time */
60
int64_t *bytes_handled;
61
-} MirrorOp;
62
+
63
+ CoQueue waiting_requests;
64
+
65
+ QTAILQ_ENTRY(MirrorOp) next;
66
+};
67
68
typedef enum MirrorMethod {
69
MIRROR_METHOD_COPY,
70
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
71
72
chunk_num = op->offset / s->granularity;
73
nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
74
+
75
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
76
+ QTAILQ_REMOVE(&s->ops_in_flight, op, next);
77
if (ret >= 0) {
78
if (s->cow_bitmap) {
79
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
80
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
81
}
82
}
83
qemu_iovec_destroy(&op->qiov);
84
- g_free(op);
85
86
- if (s->waiting_for_io) {
87
- qemu_coroutine_enter(s->common.job.co);
88
- }
89
+ qemu_co_queue_restart_all(&op->waiting_requests);
90
+ g_free(op);
91
}
92
93
static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
94
@@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
95
96
static inline void mirror_wait_for_io(MirrorBlockJob *s)
97
{
98
- assert(!s->waiting_for_io);
99
- s->waiting_for_io = true;
100
- qemu_coroutine_yield();
101
- s->waiting_for_io = false;
102
+ MirrorOp *op;
103
+
104
+ op = QTAILQ_FIRST(&s->ops_in_flight);
105
+ assert(op);
106
+ qemu_co_queue_wait(&op->waiting_requests, NULL);
107
}
108
109
/* Perform a mirror copy operation.
110
@@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
111
.bytes = bytes,
112
.bytes_handled = &bytes_handled,
113
};
114
+ qemu_co_queue_init(&op->waiting_requests);
115
116
switch (mirror_method) {
117
case MIRROR_METHOD_COPY:
118
@@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
119
abort();
120
}
121
122
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
123
qemu_coroutine_enter(co);
124
/* At this point, ownership of op has been moved to the coroutine
125
* and the object may already be freed */
126
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
127
}
128
}
129
130
+ QTAILQ_INIT(&s->ops_in_flight);
131
+
132
trace_mirror_start(bs, s, opaque);
133
job_start(&s->common.job);
134
return;
135
--
136
2.13.6
137
138
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
This patch makes the mirror code differentiate between simply waiting
4
for any operation to complete (mirror_wait_for_free_in_flight_slot())
5
and specifically waiting for all operations touching a certain range of
6
the virtual disk to complete (mirror_wait_on_conflicts()).
7
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20180613181823.13618-5-mreitz@redhat.com
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
13
block/mirror.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++----------
14
1 file changed, 84 insertions(+), 18 deletions(-)
15
16
diff --git a/block/mirror.c b/block/mirror.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/mirror.c
19
+++ b/block/mirror.c
20
@@ -XXX,XX +XXX,XX @@
21
#include "qemu/osdep.h"
22
#include "qemu/cutils.h"
23
#include "qemu/coroutine.h"
24
+#include "qemu/range.h"
25
#include "trace.h"
26
#include "block/blockjob_int.h"
27
#include "block/block_int.h"
28
@@ -XXX,XX +XXX,XX @@ struct MirrorOp {
29
* mirror_co_discard() before yielding for the first time */
30
int64_t *bytes_handled;
31
32
+ bool is_pseudo_op;
33
CoQueue waiting_requests;
34
35
QTAILQ_ENTRY(MirrorOp) next;
36
@@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
37
}
38
}
39
40
+static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
41
+ MirrorBlockJob *s,
42
+ uint64_t offset,
43
+ uint64_t bytes)
44
+{
45
+ uint64_t self_start_chunk = offset / s->granularity;
46
+ uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
47
+ uint64_t self_nb_chunks = self_end_chunk - self_start_chunk;
48
+
49
+ while (find_next_bit(s->in_flight_bitmap, self_end_chunk,
50
+ self_start_chunk) < self_end_chunk &&
51
+ s->ret >= 0)
52
+ {
53
+ MirrorOp *op;
54
+
55
+ QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
56
+ uint64_t op_start_chunk = op->offset / s->granularity;
57
+ uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes,
58
+ s->granularity) -
59
+ op_start_chunk;
60
+
61
+ if (op == self) {
62
+ continue;
63
+ }
64
+
65
+ if (ranges_overlap(self_start_chunk, self_nb_chunks,
66
+ op_start_chunk, op_nb_chunks))
67
+ {
68
+ qemu_co_queue_wait(&op->waiting_requests, NULL);
69
+ break;
70
+ }
71
+ }
72
+ }
73
+}
74
+
75
static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
76
{
77
MirrorBlockJob *s = op->s;
78
@@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
79
return ret;
80
}
81
82
-static inline void mirror_wait_for_io(MirrorBlockJob *s)
83
+static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
84
{
85
MirrorOp *op;
86
87
- op = QTAILQ_FIRST(&s->ops_in_flight);
88
- assert(op);
89
- qemu_co_queue_wait(&op->waiting_requests, NULL);
90
+ QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
91
+ /* Do not wait on pseudo ops, because it may in turn wait on
92
+ * some other operation to start, which may in fact be the
93
+ * caller of this function. Since there is only one pseudo op
94
+ * at any given time, we will always find some real operation
95
+ * to wait on. */
96
+ if (!op->is_pseudo_op) {
97
+ qemu_co_queue_wait(&op->waiting_requests, NULL);
98
+ return;
99
+ }
100
+ }
101
+ abort();
102
}
103
104
/* Perform a mirror copy operation.
105
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque)
106
107
while (s->buf_free_count < nb_chunks) {
108
trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
109
- mirror_wait_for_io(s);
110
+ mirror_wait_for_free_in_flight_slot(s);
111
}
112
113
/* Now make a QEMUIOVector taking enough granularity-sized chunks
114
@@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
115
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
116
{
117
BlockDriverState *source = s->source;
118
- int64_t offset, first_chunk;
119
- uint64_t delay_ns = 0;
120
+ MirrorOp *pseudo_op;
121
+ int64_t offset;
122
+ uint64_t delay_ns = 0, ret = 0;
123
/* At least the first dirty chunk is mirrored in one iteration. */
124
int nb_chunks = 1;
125
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
126
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
127
}
128
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
129
130
- first_chunk = offset / s->granularity;
131
- while (test_bit(first_chunk, s->in_flight_bitmap)) {
132
- trace_mirror_yield_in_flight(s, offset, s->in_flight);
133
- mirror_wait_for_io(s);
134
- }
135
+ mirror_wait_on_conflicts(NULL, s, offset, 1);
136
137
job_pause_point(&s->common.job);
138
139
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
140
nb_chunks * s->granularity);
141
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
142
143
+ /* Before claiming an area in the in-flight bitmap, we have to
144
+ * create a MirrorOp for it so that conflicting requests can wait
145
+ * for it. mirror_perform() will create the real MirrorOps later,
146
+ * for now we just create a pseudo operation that will wake up all
147
+ * conflicting requests once all real operations have been
148
+ * launched. */
149
+ pseudo_op = g_new(MirrorOp, 1);
150
+ *pseudo_op = (MirrorOp){
151
+ .offset = offset,
152
+ .bytes = nb_chunks * s->granularity,
153
+ .is_pseudo_op = true,
154
+ };
155
+ qemu_co_queue_init(&pseudo_op->waiting_requests);
156
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next);
157
+
158
bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
159
while (nb_chunks > 0 && offset < s->bdev_length) {
160
int ret;
161
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
162
163
while (s->in_flight >= MAX_IN_FLIGHT) {
164
trace_mirror_yield_in_flight(s, offset, s->in_flight);
165
- mirror_wait_for_io(s);
166
+ mirror_wait_for_free_in_flight_slot(s);
167
}
168
169
if (s->ret < 0) {
170
- return 0;
171
+ ret = 0;
172
+ goto fail;
173
}
174
175
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
176
@@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
177
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
178
delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
179
}
180
- return delay_ns;
181
+
182
+ ret = delay_ns;
183
+fail:
184
+ QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
185
+ qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
186
+ g_free(pseudo_op);
187
+
188
+ return ret;
189
}
190
191
static void mirror_free_init(MirrorBlockJob *s)
192
@@ -XXX,XX +XXX,XX @@ static void mirror_free_init(MirrorBlockJob *s)
193
static void mirror_wait_for_all_io(MirrorBlockJob *s)
194
{
195
while (s->in_flight > 0) {
196
- mirror_wait_for_io(s);
197
+ mirror_wait_for_free_in_flight_slot(s);
198
}
199
}
200
201
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
202
if (s->in_flight >= MAX_IN_FLIGHT) {
203
trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
204
s->in_flight);
205
- mirror_wait_for_io(s);
206
+ mirror_wait_for_free_in_flight_slot(s);
207
continue;
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque)
211
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
212
(cnt == 0 && s->in_flight > 0)) {
213
trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
214
- mirror_wait_for_io(s);
215
+ mirror_wait_for_free_in_flight_slot(s);
216
continue;
217
} else if (cnt != 0) {
218
delay_ns = mirror_iteration(s);
219
--
220
2.13.6
221
222
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
With this, the mirror_top_bs is no longer just a technically required
4
node in the BDS graph but actually represents the block job operation.
5
6
Also, drop MirrorBlockJob.source, as we can reach it through
7
mirror_top_bs->backing.
8
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
12
Message-id: 20180613181823.13618-6-mreitz@redhat.com
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
15
block/mirror.c | 14 ++++++--------
16
1 file changed, 6 insertions(+), 8 deletions(-)
17
18
diff --git a/block/mirror.c b/block/mirror.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/mirror.c
21
+++ b/block/mirror.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
23
BlockJob common;
24
BlockBackend *target;
25
BlockDriverState *mirror_top_bs;
26
- BlockDriverState *source;
27
BlockDriverState *base;
28
29
/* The name of the graph node to replace */
30
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque)
31
{
32
MirrorOp *op = opaque;
33
MirrorBlockJob *s = op->s;
34
- BlockBackend *source = s->common.blk;
35
int nb_chunks;
36
uint64_t ret;
37
uint64_t max_bytes;
38
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque)
39
s->bytes_in_flight += op->bytes;
40
trace_mirror_one_iteration(s, op->offset, op->bytes);
41
42
- ret = blk_co_preadv(source, op->offset, op->bytes, &op->qiov, 0);
43
+ ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
44
+ &op->qiov, 0);
45
mirror_read_complete(op, ret);
46
}
47
48
@@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
49
50
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
51
{
52
- BlockDriverState *source = s->source;
53
+ BlockDriverState *source = s->mirror_top_bs->backing->bs;
54
MirrorOp *pseudo_op;
55
int64_t offset;
56
uint64_t delay_ns = 0, ret = 0;
57
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque)
58
BlockJob *bjob = &s->common;
59
MirrorExitData *data = opaque;
60
AioContext *replace_aio_context = NULL;
61
- BlockDriverState *src = s->source;
62
+ BlockDriverState *src = s->mirror_top_bs->backing->bs;
63
BlockDriverState *target_bs = blk_bs(s->target);
64
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
65
Error *local_err = NULL;
66
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
67
{
68
int64_t offset;
69
BlockDriverState *base = s->base;
70
- BlockDriverState *bs = s->source;
71
+ BlockDriverState *bs = s->mirror_top_bs->backing->bs;
72
BlockDriverState *target_bs = blk_bs(s->target);
73
int ret;
74
int64_t count;
75
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque)
76
{
77
MirrorBlockJob *s = opaque;
78
MirrorExitData *data;
79
- BlockDriverState *bs = s->source;
80
+ BlockDriverState *bs = s->mirror_top_bs->backing->bs;
81
BlockDriverState *target_bs = blk_bs(s->target);
82
bool need_drain = true;
83
int64_t length;
84
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
85
/* The block job now has a reference to this node */
86
bdrv_unref(mirror_top_bs);
87
88
- s->source = bs;
89
s->mirror_top_bs = mirror_top_bs;
90
91
/* No resize for the target either; while the mirror is still running, a
92
--
93
2.13.6
94
95
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
Currently, bdrv_replace_node() refuses to create loops from one BDS to
4
itself if the BDS to be replaced is the backing node of the BDS to
5
replace it: Say there is a node A and a node B. Replacing B by A means
6
making all references to B point to A. If B is a child of A (i.e. A has
7
a reference to B), that would mean we would have to make this reference
8
point to A itself -- so we'd create a loop.
9
10
bdrv_replace_node() (through should_update_child()) refuses to do so if
11
B is the backing node of A. There is no reason why we should create
12
loops if B is not the backing node of A, though. The BDS graph should
13
never contain loops, so we should always refuse to create them.
14
15
If B is a child of A and B is to be replaced by A, we should simply
16
leave B in place there because it is the most sensible choice.
17
18
A more specific argument would be: Putting filter drivers into the BDS
19
graph is basically the same as appending an overlay to a backing chain.
20
But the main child BDS of a filter driver is not "backing" but "file",
21
so restricting the no-loop rule to backing nodes would fail here.
22
23
Signed-off-by: Max Reitz <mreitz@redhat.com>
24
Reviewed-by: Fam Zheng <famz@redhat.com>
25
Reviewed-by: Alberto Garcia <berto@igalia.com>
26
Message-id: 20180613181823.13618-7-mreitz@redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
28
---
29
block.c | 44 ++++++++++++++++++++++++++++++++++----------
30
1 file changed, 34 insertions(+), 10 deletions(-)
31
32
diff --git a/block.c b/block.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/block.c
35
+++ b/block.c
36
@@ -XXX,XX +XXX,XX @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
37
return false;
38
}
39
40
- if (c->role == &child_backing) {
41
- /* If @from is a backing file of @to, ignore the child to avoid
42
- * creating a loop. We only want to change the pointer of other
43
- * parents. */
44
- QLIST_FOREACH(to_c, &to->children, next) {
45
- if (to_c == c) {
46
- break;
47
- }
48
- }
49
- if (to_c) {
50
+ /* If the child @c belongs to the BDS @to, replacing the current
51
+ * c->bs by @to would mean to create a loop.
52
+ *
53
+ * Such a case occurs when appending a BDS to a backing chain.
54
+ * For instance, imagine the following chain:
55
+ *
56
+ * guest device -> node A -> further backing chain...
57
+ *
58
+ * Now we create a new BDS B which we want to put on top of this
59
+ * chain, so we first attach A as its backing node:
60
+ *
61
+ * node B
62
+ * |
63
+ * v
64
+ * guest device -> node A -> further backing chain...
65
+ *
66
+ * Finally we want to replace A by B. When doing that, we want to
67
+ * replace all pointers to A by pointers to B -- except for the
68
+ * pointer from B because (1) that would create a loop, and (2)
69
+ * that pointer should simply stay intact:
70
+ *
71
+ * guest device -> node B
72
+ * |
73
+ * v
74
+ * node A -> further backing chain...
75
+ *
76
+ * In general, when replacing a node A (c->bs) by a node B (@to),
77
+ * if A is a child of B, that means we cannot replace A by B there
78
+ * because that would create a loop. Silently detaching A from B
79
+ * is also not really an option. So overall just leaving A in
80
+ * place there is the most sensible choice. */
81
+ QLIST_FOREACH(to_c, &to->children, next) {
82
+ if (to_c == c) {
83
return false;
84
}
85
}
86
@@ -XXX,XX +XXX,XX @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
87
88
/* Put all parents into @list and calculate their cumulative permissions */
89
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
90
+ assert(c->bs == from);
91
if (!should_update_child(c, to)) {
92
continue;
93
}
94
--
95
2.13.6
96
97
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
This new parameter allows the caller to just query the next dirty
4
position without moving the iterator.
5
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Message-id: 20180613181823.13618-8-mreitz@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
include/qemu/hbitmap.h | 5 ++++-
13
block/backup.c | 2 +-
14
block/dirty-bitmap.c | 2 +-
15
tests/test-hbitmap.c | 26 +++++++++++++-------------
16
util/hbitmap.c | 10 +++++++---
17
5 files changed, 26 insertions(+), 19 deletions(-)
18
19
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/qemu/hbitmap.h
22
+++ b/include/qemu/hbitmap.h
23
@@ -XXX,XX +XXX,XX @@ void hbitmap_free_meta(HBitmap *hb);
24
/**
25
* hbitmap_iter_next:
26
* @hbi: HBitmapIter to operate on.
27
+ * @advance: If true, advance the iterator. Otherwise, the next call
28
+ * of this function will return the same result (if that
29
+ * position is still dirty).
30
*
31
* Return the next bit that is set in @hbi's associated HBitmap,
32
* or -1 if all remaining bits are zero.
33
*/
34
-int64_t hbitmap_iter_next(HBitmapIter *hbi);
35
+int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance);
36
37
/**
38
* hbitmap_iter_next_word:
39
diff --git a/block/backup.c b/block/backup.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/backup.c
42
+++ b/block/backup.c
43
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
44
HBitmapIter hbi;
45
46
hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
47
- while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
48
+ while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) {
49
do {
50
if (yield_and_check(job)) {
51
return 0;
52
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/block/dirty-bitmap.c
55
+++ b/block/dirty-bitmap.c
56
@@ -XXX,XX +XXX,XX @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
57
58
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
59
{
60
- return hbitmap_iter_next(&iter->hbi);
61
+ return hbitmap_iter_next(&iter->hbi, true);
62
}
63
64
/* Called within bdrv_dirty_bitmap_lock..unlock */
65
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/tests/test-hbitmap.c
68
+++ b/tests/test-hbitmap.c
69
@@ -XXX,XX +XXX,XX @@ static void hbitmap_test_check(TestHBitmapData *data,
70
71
i = first;
72
for (;;) {
73
- next = hbitmap_iter_next(&hbi);
74
+ next = hbitmap_iter_next(&hbi, true);
75
if (next < 0) {
76
next = data->size;
77
}
78
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
79
/* Note that hbitmap_test_check has to be invoked manually in this test. */
80
hbitmap_test_init(data, 131072 << 7, 7);
81
hbitmap_iter_init(&hbi, data->hb, 0);
82
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
83
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
84
85
hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
86
hbitmap_iter_init(&hbi, data->hb, 0);
87
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
88
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
89
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7);
90
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
91
92
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
93
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
94
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
95
96
hbitmap_test_set(data, (131072 << 7) - 8, 8);
97
hbitmap_iter_init(&hbi, data->hb, 0);
98
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
99
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
100
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
101
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7);
102
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7);
103
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
104
105
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
106
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
107
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
108
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7);
109
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
110
}
111
112
static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
113
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data,
114
for (i = 0; i < num_positions; i++) {
115
hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
116
hbitmap_iter_init(&iter, data->hb, 0);
117
- next = hbitmap_iter_next(&iter);
118
+ next = hbitmap_iter_next(&iter, true);
119
if (i == num_positions - 1) {
120
g_assert_cmpint(next, ==, -1);
121
} else {
122
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
123
124
hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1);
125
126
- hbitmap_iter_next(&hbi);
127
+ hbitmap_iter_next(&hbi, true);
128
129
hbitmap_reset_all(data->hb);
130
- hbitmap_iter_next(&hbi);
131
+ hbitmap_iter_next(&hbi, true);
132
}
133
134
static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
135
diff --git a/util/hbitmap.c b/util/hbitmap.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/util/hbitmap.c
138
+++ b/util/hbitmap.c
139
@@ -XXX,XX +XXX,XX @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
140
return cur;
141
}
142
143
-int64_t hbitmap_iter_next(HBitmapIter *hbi)
144
+int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance)
145
{
146
unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
147
hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
148
@@ -XXX,XX +XXX,XX @@ int64_t hbitmap_iter_next(HBitmapIter *hbi)
149
}
150
}
151
152
- /* The next call will resume work from the next bit. */
153
- hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
154
+ if (advance) {
155
+ /* The next call will resume work from the next bit. */
156
+ hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
157
+ } else {
158
+ hbi->cur[HBITMAP_LEVELS - 1] = cur;
159
+ }
160
item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
161
162
return item << hbi->granularity;
163
--
164
2.13.6
165
166
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
Add a function that wraps hbitmap_iter_next() and always calls it in
4
non-advancing mode first, and in advancing mode next. The result should
5
always be the same.
6
7
By using this function everywhere we called hbitmap_iter_next() before,
8
we should get good test coverage for non-advancing hbitmap_iter_next().
9
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Reviewed-by: John Snow <jsnow@redhat.com>
13
Message-id: 20180613181823.13618-9-mreitz@redhat.com
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
16
tests/test-hbitmap.c | 36 ++++++++++++++++++++++++------------
17
1 file changed, 24 insertions(+), 12 deletions(-)
18
19
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tests/test-hbitmap.c
22
+++ b/tests/test-hbitmap.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct TestHBitmapData {
24
} TestHBitmapData;
25
26
27
+static int64_t check_hbitmap_iter_next(HBitmapIter *hbi)
28
+{
29
+ int next0, next1;
30
+
31
+ next0 = hbitmap_iter_next(hbi, false);
32
+ next1 = hbitmap_iter_next(hbi, true);
33
+
34
+ g_assert_cmpint(next0, ==, next1);
35
+
36
+ return next0;
37
+}
38
+
39
/* Check that the HBitmap and the shadow bitmap contain the same data,
40
* ignoring the same "first" bits.
41
*/
42
@@ -XXX,XX +XXX,XX @@ static void hbitmap_test_check(TestHBitmapData *data,
43
44
i = first;
45
for (;;) {
46
- next = hbitmap_iter_next(&hbi, true);
47
+ next = check_hbitmap_iter_next(&hbi);
48
if (next < 0) {
49
next = data->size;
50
}
51
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
52
/* Note that hbitmap_test_check has to be invoked manually in this test. */
53
hbitmap_test_init(data, 131072 << 7, 7);
54
hbitmap_iter_init(&hbi, data->hb, 0);
55
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
56
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
57
58
hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
59
hbitmap_iter_init(&hbi, data->hb, 0);
60
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7);
61
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
62
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
63
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
64
65
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
66
g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
67
68
hbitmap_test_set(data, (131072 << 7) - 8, 8);
69
hbitmap_iter_init(&hbi, data->hb, 0);
70
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7);
71
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7);
72
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
73
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
74
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
75
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
76
77
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
78
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7);
79
- g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
80
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
81
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
82
}
83
84
static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
85
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data,
86
for (i = 0; i < num_positions; i++) {
87
hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
88
hbitmap_iter_init(&iter, data->hb, 0);
89
- next = hbitmap_iter_next(&iter, true);
90
+ next = check_hbitmap_iter_next(&iter);
91
if (i == num_positions - 1) {
92
g_assert_cmpint(next, ==, -1);
93
} else {
94
@@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
95
96
hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1);
97
98
- hbitmap_iter_next(&hbi, true);
99
+ check_hbitmap_iter_next(&hbi);
100
101
hbitmap_reset_all(data->hb);
102
- hbitmap_iter_next(&hbi, true);
103
+ check_hbitmap_iter_next(&hbi);
104
}
105
106
static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
107
--
108
2.13.6
109
110
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
This new function allows to look for a consecutively dirty area in a
4
dirty bitmap.
5
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Message-id: 20180613181823.13618-10-mreitz@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
include/block/dirty-bitmap.h | 2 ++
13
block/dirty-bitmap.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
14
2 files changed, 57 insertions(+)
15
16
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/dirty-bitmap.h
19
+++ b/include/block/dirty-bitmap.h
20
@@ -XXX,XX +XXX,XX @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
21
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
22
int64_t offset, int64_t bytes);
23
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
24
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
25
+ uint64_t *offset, int *bytes);
26
void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
27
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
28
int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
29
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/block/dirty-bitmap.c
32
+++ b/block/dirty-bitmap.c
33
@@ -XXX,XX +XXX,XX @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
34
return hbitmap_iter_next(&iter->hbi, true);
35
}
36
37
+/**
38
+ * Return the next consecutively dirty area in the dirty bitmap
39
+ * belonging to the given iterator @iter.
40
+ *
41
+ * @max_offset: Maximum value that may be returned for
42
+ * *offset + *bytes
43
+ * @offset: Will contain the start offset of the next dirty area
44
+ * @bytes: Will contain the length of the next dirty area
45
+ *
46
+ * Returns: True if a dirty area could be found before max_offset
47
+ * (which means that *offset and *bytes then contain valid
48
+ * values), false otherwise.
49
+ *
50
+ * Note that @iter is never advanced if false is returned. If an area
51
+ * is found (which means that true is returned), it will be advanced
52
+ * past that area.
53
+ */
54
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
55
+ uint64_t *offset, int *bytes)
56
+{
57
+ uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap);
58
+ uint64_t gran_max_offset;
59
+ int64_t ret;
60
+ int size;
61
+
62
+ if (max_offset == iter->bitmap->size) {
63
+ /* If max_offset points to the image end, round it up by the
64
+ * bitmap granularity */
65
+ gran_max_offset = ROUND_UP(max_offset, granularity);
66
+ } else {
67
+ gran_max_offset = max_offset;
68
+ }
69
+
70
+ ret = hbitmap_iter_next(&iter->hbi, false);
71
+ if (ret < 0 || ret + granularity > gran_max_offset) {
72
+ return false;
73
+ }
74
+
75
+ *offset = ret;
76
+ size = 0;
77
+
78
+ assert(granularity <= INT_MAX);
79
+
80
+ do {
81
+ /* Advance iterator */
82
+ ret = hbitmap_iter_next(&iter->hbi, true);
83
+ size += granularity;
84
+ } while (ret + granularity <= gran_max_offset &&
85
+ hbitmap_iter_next(&iter->hbi, false) == ret + granularity &&
86
+ size <= INT_MAX - granularity);
87
+
88
+ *bytes = MIN(size, max_offset - *offset);
89
+ return true;
90
+}
91
+
92
/* Called within bdrv_dirty_bitmap_lock..unlock */
93
void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
94
int64_t offset, int64_t bytes)
95
--
96
2.13.6
97
98
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
This will allow us to access the block job data when the mirror block
4
driver becomes more complex.
5
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Message-id: 20180613181823.13618-11-mreitz@redhat.com
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
block/mirror.c | 12 ++++++++++++
12
1 file changed, 12 insertions(+)
13
14
diff --git a/block/mirror.c b/block/mirror.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/mirror.c
17
+++ b/block/mirror.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
19
bool initial_zeroing_ongoing;
20
} MirrorBlockJob;
21
22
+typedef struct MirrorBDSOpaque {
23
+ MirrorBlockJob *job;
24
+} MirrorBDSOpaque;
25
+
26
struct MirrorOp {
27
MirrorBlockJob *s;
28
QEMUIOVector qiov;
29
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque)
30
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
31
BlockJob *bjob = &s->common;
32
MirrorExitData *data = opaque;
33
+ MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque;
34
AioContext *replace_aio_context = NULL;
35
BlockDriverState *src = s->mirror_top_bs->backing->bs;
36
BlockDriverState *target_bs = blk_bs(s->target);
37
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque)
38
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
39
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
40
41
+ bs_opaque->job = NULL;
42
job_completed(job, data->ret, NULL);
43
44
g_free(data);
45
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
46
Error **errp)
47
{
48
MirrorBlockJob *s;
49
+ MirrorBDSOpaque *bs_opaque;
50
BlockDriverState *mirror_top_bs;
51
bool target_graph_mod;
52
bool target_is_backing;
53
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
54
mirror_top_bs->total_sectors = bs->total_sectors;
55
mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
56
mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
57
+ bs_opaque = g_new0(MirrorBDSOpaque, 1);
58
+ mirror_top_bs->opaque = bs_opaque;
59
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
60
61
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
62
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
63
if (!s) {
64
goto fail;
65
}
66
+ bs_opaque->job = s;
67
+
68
/* The block job now has a reference to this node */
69
bdrv_unref(mirror_top_bs);
70
71
@@ -XXX,XX +XXX,XX @@ fail:
72
73
g_free(s->replaces);
74
blk_unref(s->target);
75
+ bs_opaque->job = NULL;
76
job_early_fail(&s->common.job);
77
}
78
79
--
80
2.13.6
81
82
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
Signed-off-by: Max Reitz <mreitz@redhat.com>
4
Message-id: 20180613181823.13618-12-mreitz@redhat.com
5
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
---
8
include/qemu/job.h | 15 +++++++++++++++
9
job.c | 5 +++++
10
2 files changed, 20 insertions(+)
11
12
diff --git a/include/qemu/job.h b/include/qemu/job.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/qemu/job.h
15
+++ b/include/qemu/job.h
16
@@ -XXX,XX +XXX,XX @@ void job_progress_update(Job *job, uint64_t done);
17
*/
18
void job_progress_set_remaining(Job *job, uint64_t remaining);
19
20
+/**
21
+ * @job: The job whose expected progress end value is updated
22
+ * @delta: Value which is to be added to the current expected end
23
+ * value
24
+ *
25
+ * Increases the expected end value of the progress counter of a job.
26
+ * This is useful for parenthesis operations: If a job has to
27
+ * conditionally perform a high-priority operation as part of its
28
+ * progress, it calls this function with the expected operation's
29
+ * length before, and job_progress_update() afterwards.
30
+ * (So the operation acts as a parenthesis in regards to the main job
31
+ * operation running in background.)
32
+ */
33
+void job_progress_increase_remaining(Job *job, uint64_t delta);
34
+
35
/** To be called when a cancelled job is finalised. */
36
void job_event_cancelled(Job *job);
37
38
diff --git a/job.c b/job.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/job.c
41
+++ b/job.c
42
@@ -XXX,XX +XXX,XX @@ void job_progress_set_remaining(Job *job, uint64_t remaining)
43
job->progress_total = job->progress_current + remaining;
44
}
45
46
+void job_progress_increase_remaining(Job *job, uint64_t delta)
47
+{
48
+ job->progress_total += delta;
49
+}
50
+
51
void job_event_cancelled(Job *job)
52
{
53
notifier_list_notify(&job->on_finalize_cancelled, job);
54
--
55
2.13.6
56
57
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
This patch allows the user to specify whether to use active or only
4
background mode for mirror block jobs. Currently, this setting will
5
remain constant for the duration of the entire block job.
6
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: 20180613181823.13618-14-mreitz@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
qapi/block-core.json | 11 +++++++++--
13
include/block/block_int.h | 4 +++-
14
block/mirror.c | 12 +++++++-----
15
blockdev.c | 9 ++++++++-
16
4 files changed, 27 insertions(+), 9 deletions(-)
17
18
diff --git a/qapi/block-core.json b/qapi/block-core.json
19
index XXXXXXX..XXXXXXX 100644
20
--- a/qapi/block-core.json
21
+++ b/qapi/block-core.json
22
@@ -XXX,XX +XXX,XX @@
23
# written. Both will result in identical contents.
24
# Default is true. (Since 2.4)
25
#
26
+# @copy-mode: when to copy data to the destination; defaults to 'background'
27
+# (Since: 3.0)
28
+#
29
# Since: 1.3
30
##
31
{ 'struct': 'DriveMirror',
32
@@ -XXX,XX +XXX,XX @@
33
'*speed': 'int', '*granularity': 'uint32',
34
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
35
'*on-target-error': 'BlockdevOnError',
36
- '*unmap': 'bool' } }
37
+ '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
38
39
##
40
# @BlockDirtyBitmap:
41
@@ -XXX,XX +XXX,XX @@
42
# above @device. If this option is not given, a node name is
43
# autogenerated. (Since: 2.9)
44
#
45
+# @copy-mode: when to copy data to the destination; defaults to 'background'
46
+# (Since: 3.0)
47
+#
48
# Returns: nothing on success.
49
#
50
# Since: 2.6
51
@@ -XXX,XX +XXX,XX @@
52
'*speed': 'int', '*granularity': 'uint32',
53
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
54
'*on-target-error': 'BlockdevOnError',
55
- '*filter-node-name': 'str' } }
56
+ '*filter-node-name': 'str',
57
+ '*copy-mode': 'MirrorCopyMode' } }
58
59
##
60
# @block_set_io_throttle:
61
diff --git a/include/block/block_int.h b/include/block/block_int.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/block/block_int.h
64
+++ b/include/block/block_int.h
65
@@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
66
* @filter_node_name: The node name that should be assigned to the filter
67
* driver that the mirror job inserts into the graph above @bs. NULL means that
68
* a node name should be autogenerated.
69
+ * @copy_mode: When to trigger writes to the target.
70
* @errp: Error object.
71
*
72
* Start a mirroring operation on @bs. Clusters that are allocated
73
@@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs,
74
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
75
BlockdevOnError on_source_error,
76
BlockdevOnError on_target_error,
77
- bool unmap, const char *filter_node_name, Error **errp);
78
+ bool unmap, const char *filter_node_name,
79
+ MirrorCopyMode copy_mode, Error **errp);
80
81
/*
82
* backup_job_create:
83
diff --git a/block/mirror.c b/block/mirror.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/block/mirror.c
86
+++ b/block/mirror.c
87
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
88
const BlockJobDriver *driver,
89
bool is_none_mode, BlockDriverState *base,
90
bool auto_complete, const char *filter_node_name,
91
- bool is_mirror,
92
+ bool is_mirror, MirrorCopyMode copy_mode,
93
Error **errp)
94
{
95
MirrorBlockJob *s;
96
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
97
s->on_target_error = on_target_error;
98
s->is_none_mode = is_none_mode;
99
s->backing_mode = backing_mode;
100
- s->copy_mode = MIRROR_COPY_MODE_BACKGROUND;
101
+ s->copy_mode = copy_mode;
102
s->base = base;
103
s->granularity = granularity;
104
s->buf_size = ROUND_UP(buf_size, granularity);
105
@@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs,
106
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
107
BlockdevOnError on_source_error,
108
BlockdevOnError on_target_error,
109
- bool unmap, const char *filter_node_name, Error **errp)
110
+ bool unmap, const char *filter_node_name,
111
+ MirrorCopyMode copy_mode, Error **errp)
112
{
113
bool is_none_mode;
114
BlockDriverState *base;
115
@@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs,
116
speed, granularity, buf_size, backing_mode,
117
on_source_error, on_target_error, unmap, NULL, NULL,
118
&mirror_job_driver, is_none_mode, base, false,
119
- filter_node_name, true, errp);
120
+ filter_node_name, true, copy_mode, errp);
121
}
122
123
void commit_active_start(const char *job_id, BlockDriverState *bs,
124
@@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
125
MIRROR_LEAVE_BACKING_CHAIN,
126
on_error, on_error, true, cb, opaque,
127
&commit_active_job_driver, false, base, auto_complete,
128
- filter_node_name, false, &local_err);
129
+ filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
130
+ &local_err);
131
if (local_err) {
132
error_propagate(errp, local_err);
133
goto error_restore_flags;
134
diff --git a/blockdev.c b/blockdev.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/blockdev.c
137
+++ b/blockdev.c
138
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
139
bool has_unmap, bool unmap,
140
bool has_filter_node_name,
141
const char *filter_node_name,
142
+ bool has_copy_mode, MirrorCopyMode copy_mode,
143
Error **errp)
144
{
145
146
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
147
if (!has_filter_node_name) {
148
filter_node_name = NULL;
149
}
150
+ if (!has_copy_mode) {
151
+ copy_mode = MIRROR_COPY_MODE_BACKGROUND;
152
+ }
153
154
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
155
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
156
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
157
has_replaces ? replaces : NULL,
158
speed, granularity, buf_size, sync, backing_mode,
159
on_source_error, on_target_error, unmap, filter_node_name,
160
- errp);
161
+ copy_mode, errp);
162
}
163
164
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
165
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
166
arg->has_on_target_error, arg->on_target_error,
167
arg->has_unmap, arg->unmap,
168
false, NULL,
169
+ arg->has_copy_mode, arg->copy_mode,
170
&local_err);
171
bdrv_unref(target_bs);
172
error_propagate(errp, local_err);
173
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
174
BlockdevOnError on_target_error,
175
bool has_filter_node_name,
176
const char *filter_node_name,
177
+ bool has_copy_mode, MirrorCopyMode copy_mode,
178
Error **errp)
179
{
180
BlockDriverState *bs;
181
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
182
has_on_target_error, on_target_error,
183
true, true,
184
has_filter_node_name, filter_node_name,
185
+ has_copy_mode, copy_mode,
186
&local_err);
187
error_propagate(errp, local_err);
188
189
--
190
2.13.6
191
192
diff view generated by jsdifflib