1 | The following changes since commit 2ef2f16781af9dee6ba6517755e9073ba5799fa2: | 1 | The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180615a' into staging (2018-06-15 18:13:35 +0100) | 3 | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to 4c790afe2503eab12874508acab5b388d7babfd2: | 9 | for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8: |
10 | 10 | ||
11 | Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block (2018-06-18 17:20:42 +0200) | 11 | block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches: | 14 | Block layer patches |
15 | |||
16 | - Active mirror (blockdev-mirror copy-mode=write-blocking) | ||
17 | - bdrv_drain_*() fixes and test cases | ||
18 | - Fix crash with scsi-hd and drive_del | ||
19 | 15 | ||
20 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
21 | Greg Kurz (1): | 17 | Doug Gale (1): |
22 | block: fix QEMU crash with scsi-hd and drive_del | 18 | nvme: Add tracing |
23 | 19 | ||
24 | Kevin Wolf (20): | 20 | Edgar Kaziakhmedov (1): |
25 | test-bdrv-drain: bdrv_drain() works with cross-AioContext events | 21 | qcow2: get rid of qcow2_backing_read1 routine |
26 | block: Use bdrv_do_drain_begin/end in bdrv_drain_all() | ||
27 | block: Remove 'recursive' parameter from bdrv_drain_invoke() | ||
28 | block: Don't manually poll in bdrv_drain_all() | ||
29 | tests/test-bdrv-drain: bdrv_drain_all() works in coroutines now | ||
30 | block: Avoid unnecessary aio_poll() in AIO_WAIT_WHILE() | ||
31 | block: Really pause block jobs on drain | ||
32 | block: Remove bdrv_drain_recurse() | ||
33 | block: Drain recursively with a single BDRV_POLL_WHILE() | ||
34 | test-bdrv-drain: Test node deletion in subtree recursion | ||
35 | block: Don't poll in parent drain callbacks | ||
36 | test-bdrv-drain: Graph change through parent callback | ||
37 | block: Defer .bdrv_drain_begin callback to polling phase | ||
38 | test-bdrv-drain: Test that bdrv_drain_invoke() doesn't poll | ||
39 | block: Allow AIO_WAIT_WHILE with NULL ctx | ||
40 | block: Move bdrv_drain_all_begin() out of coroutine context | ||
41 | block: ignore_bds_parents parameter for drain functions | ||
42 | block: Allow graph changes in bdrv_drain_all_begin/end sections | ||
43 | test-bdrv-drain: Test graph changes in drain_all section | ||
44 | Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block | ||
45 | 22 | ||
46 | Max Reitz (15): | 23 | Fam Zheng (2): |
47 | test-bdrv-drain: Add test for node deletion | 24 | block: Open backing image in force share mode for size probe |
48 | block/mirror: Pull out mirror_perform() | 25 | block: Remove unused bdrv_requests_pending |
49 | block/mirror: Convert to coroutines | ||
50 | block/mirror: Use CoQueue to wait on in-flight ops | ||
51 | block/mirror: Wait for in-flight op conflicts | ||
52 | block/mirror: Use source as a BdrvChild | ||
53 | block: Generalize should_update_child() rule | ||
54 | hbitmap: Add @advance param to hbitmap_iter_next() | ||
55 | test-hbitmap: Add non-advancing iter_next tests | ||
56 | block/dirty-bitmap: Add bdrv_dirty_iter_next_area | ||
57 | block/mirror: Add MirrorBDSOpaque | ||
58 | job: Add job_progress_increase_remaining() | ||
59 | block/mirror: Add active mirroring | ||
60 | block/mirror: Add copy mode QAPI interface | ||
61 | iotests: Add test for active mirroring | ||
62 | 26 | ||
63 | qapi/block-core.json | 29 +- | 27 | John Snow (1): |
64 | include/block/aio-wait.h | 25 +- | 28 | iotests: fix 197 for vpc |
65 | include/block/block.h | 31 +- | ||
66 | include/block/block_int.h | 18 +- | ||
67 | include/block/blockjob_int.h | 8 + | ||
68 | include/block/dirty-bitmap.h | 2 + | ||
69 | include/qemu/hbitmap.h | 5 +- | ||
70 | include/qemu/job.h | 15 + | ||
71 | block.c | 96 +++++- | ||
72 | block/backup.c | 2 +- | ||
73 | block/block-backend.c | 5 + | ||
74 | block/dirty-bitmap.c | 57 +++- | ||
75 | block/io.c | 332 ++++++++++++-------- | ||
76 | block/mirror.c | 613 +++++++++++++++++++++++++++++-------- | ||
77 | block/vvfat.c | 1 + | ||
78 | blockdev.c | 9 +- | ||
79 | blockjob.c | 23 ++ | ||
80 | job.c | 5 + | ||
81 | tests/test-bdrv-drain.c | 705 +++++++++++++++++++++++++++++++++++++++++-- | ||
82 | tests/test-hbitmap.c | 38 ++- | ||
83 | util/hbitmap.c | 10 +- | ||
84 | tests/qemu-iotests/151 | 120 ++++++++ | ||
85 | tests/qemu-iotests/151.out | 5 + | ||
86 | tests/qemu-iotests/group | 1 + | ||
87 | 24 files changed, 1836 insertions(+), 319 deletions(-) | ||
88 | create mode 100755 tests/qemu-iotests/151 | ||
89 | create mode 100644 tests/qemu-iotests/151.out | ||
90 | 29 | ||
30 | Kevin Wolf (27): | ||
31 | block: Formats don't need CONSISTENT_READ with NO_IO | ||
32 | block: Make bdrv_drain_invoke() recursive | ||
33 | block: Call .drain_begin only once in bdrv_drain_all_begin() | ||
34 | test-bdrv-drain: Test BlockDriver callbacks for drain | ||
35 | block: bdrv_drain_recurse(): Remove unused begin parameter | ||
36 | block: Don't wait for requests in bdrv_drain*_end() | ||
37 | block: Unify order in drain functions | ||
38 | block: Don't acquire AioContext in hmp_qemu_io() | ||
39 | block: Document that x-blockdev-change breaks quorum children list | ||
40 | block: Assert drain_all is only called from main AioContext | ||
41 | block: Make bdrv_drain() driver callbacks non-recursive | ||
42 | test-bdrv-drain: Test callback for bdrv_drain | ||
43 | test-bdrv-drain: Test bs->quiesce_counter | ||
44 | blockjob: Pause job on draining any job BDS | ||
45 | test-bdrv-drain: Test drain vs. block jobs | ||
46 | block: Don't block_job_pause_all() in bdrv_drain_all() | ||
47 | block: Nested drain_end must still call callbacks | ||
48 | test-bdrv-drain: Test nested drain sections | ||
49 | block: Don't notify parents in drain call chain | ||
50 | block: Add bdrv_subtree_drained_begin/end() | ||
51 | test-bdrv-drain: Tests for bdrv_subtree_drain | ||
52 | test-bdrv-drain: Test behaviour in coroutine context | ||
53 | test-bdrv-drain: Recursive draining with multiple parents | ||
54 | block: Allow graph changes in subtree drained section | ||
55 | test-bdrv-drain: Test graph changes in drained section | ||
56 | commit: Simplify reopen of base | ||
57 | block: Keep nodes drained between reopen_queue/multiple | ||
58 | |||
59 | Thomas Huth (3): | ||
60 | block: Remove the obsolete -drive boot=on|off parameter | ||
61 | block: Remove the deprecated -hdachs option | ||
62 | block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter | ||
63 | |||
64 | qapi/block-core.json | 4 + | ||
65 | block/qcow2.h | 3 - | ||
66 | include/block/block.h | 15 +- | ||
67 | include/block/block_int.h | 6 +- | ||
68 | block.c | 75 ++++- | ||
69 | block/commit.c | 8 +- | ||
70 | block/io.c | 164 +++++++--- | ||
71 | block/qcow2.c | 51 +-- | ||
72 | block/replication.c | 6 + | ||
73 | blockdev.c | 11 - | ||
74 | blockjob.c | 22 +- | ||
75 | hmp.c | 6 - | ||
76 | hw/block/nvme.c | 349 +++++++++++++++++---- | ||
77 | qemu-io-cmds.c | 3 + | ||
78 | tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++ | ||
79 | vl.c | 86 +----- | ||
80 | hw/block/trace-events | 93 ++++++ | ||
81 | qemu-doc.texi | 29 +- | ||
82 | qemu-options.hx | 19 +- | ||
83 | tests/Makefile.include | 2 + | ||
84 | tests/qemu-iotests/197 | 4 + | ||
85 | tests/qemu-iotests/common.filter | 3 +- | ||
86 | 22 files changed, 1294 insertions(+), 316 deletions(-) | ||
87 | create mode 100644 tests/test-bdrv-drain.c | ||
88 | diff view generated by jsdifflib |
1 | bdrv_drain_all() wants to have a single polling loop for draining the | 1 | Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently |
---|---|---|---|
2 | in-flight requests of all nodes. This means that the AIO_WAIT_WHILE() | 2 | in use as a mirror target. It is not enough for image formats, though, |
3 | condition relies on activity in multiple AioContexts, which is polled | 3 | as these still unconditionally request BLK_PERM_CONSISTENT_READ. |
4 | from the mainloop context. We must therefore call AIO_WAIT_WHILE() from | ||
5 | the mainloop thread and use the AioWait notification mechanism. | ||
6 | 4 | ||
7 | Just randomly picking the AioContext of any non-mainloop thread would | 5 | As this permission is geared towards whether the guest-visible data is |
8 | work, but instead of bothering to find such a context in the caller, we | 6 | consistent, and has no impact on whether the metadata is sane, and |
9 | can just as well accept NULL for ctx. | 7 | 'qemu-img info' does not read guest-visible data (except for the raw |
8 | format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there | ||
9 | is not going to be any guest I/O performed, regardless of image format. | ||
10 | 10 | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 12 | --- |
13 | include/block/aio-wait.h | 13 +++++++++---- | 13 | block.c | 6 +++++- |
14 | 1 file changed, 9 insertions(+), 4 deletions(-) | 14 | 1 file changed, 5 insertions(+), 1 deletion(-) |
15 | 15 | ||
16 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | 16 | diff --git a/block.c b/block.c |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/block/aio-wait.h | 18 | --- a/block.c |
19 | +++ b/include/block/aio-wait.h | 19 | +++ b/block.c |
20 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
21 | /** | 21 | assert(role == &child_backing || role == &child_file); |
22 | * AIO_WAIT_WHILE: | 22 | |
23 | * @wait: the aio wait object | 23 | if (!backing) { |
24 | - * @ctx: the aio context | 24 | + int flags = bdrv_reopen_get_flags(reopen_queue, bs); |
25 | + * @ctx: the aio context, or NULL if multiple aio contexts (for which the | 25 | + |
26 | + * caller does not hold a lock) are involved in the polling condition. | 26 | /* Apart from the modifications below, the same permissions are |
27 | * @cond: wait while this conditional expression is true | 27 | * forwarded and left alone as for filters */ |
28 | * | 28 | bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, |
29 | * Wait while a condition is true. Use this to implement synchronous | 29 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 30 | |
31 | bool waited_ = false; \ | 31 | /* bs->file always needs to be consistent because of the metadata. We |
32 | AioWait *wait_ = (wait); \ | 32 | * can never allow other users to resize or write to it. */ |
33 | AioContext *ctx_ = (ctx); \ | 33 | - perm |= BLK_PERM_CONSISTENT_READ; |
34 | - if (in_aio_context_home_thread(ctx_)) { \ | 34 | + if (!(flags & BDRV_O_NO_IO)) { |
35 | + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ | 35 | + perm |= BLK_PERM_CONSISTENT_READ; |
36 | while ((cond)) { \ | 36 | + } |
37 | aio_poll(ctx_, true); \ | 37 | shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); |
38 | waited_ = true; \ | 38 | } else { |
39 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 39 | /* We want consistent read from backing files if the parent needs it. |
40 | /* Increment wait_->num_waiters before evaluating cond. */ \ | ||
41 | atomic_inc(&wait_->num_waiters); \ | ||
42 | while ((cond)) { \ | ||
43 | - aio_context_release(ctx_); \ | ||
44 | + if (ctx_) { \ | ||
45 | + aio_context_release(ctx_); \ | ||
46 | + } \ | ||
47 | aio_poll(qemu_get_aio_context(), true); \ | ||
48 | - aio_context_acquire(ctx_); \ | ||
49 | + if (ctx_) { \ | ||
50 | + aio_context_acquire(ctx_); \ | ||
51 | + } \ | ||
52 | waited_ = true; \ | ||
53 | } \ | ||
54 | atomic_dec(&wait_->num_waiters); \ | ||
55 | -- | 40 | -- |
56 | 2.13.6 | 41 | 2.13.6 |
57 | 42 | ||
58 | 43 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: John Snow <jsnow@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 3 | VPC has some difficulty creating geometries of particular size. |
4 | Message-id: 20180613181823.13618-12-mreitz@redhat.com | 4 | However, we can indeed force it to use a literal one, so let's |
5 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 5 | do that for the sake of test 197, which is testing some specific |
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 6 | offsets. |
7 | |||
8 | Signed-off-by: John Snow <jsnow@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Reviewed-by: Lukáš Doktor <ldoktor@redhat.com> | ||
7 | --- | 13 | --- |
8 | include/qemu/job.h | 15 +++++++++++++++ | 14 | tests/qemu-iotests/197 | 4 ++++ |
9 | job.c | 5 +++++ | 15 | tests/qemu-iotests/common.filter | 3 ++- |
10 | 2 files changed, 20 insertions(+) | 16 | 2 files changed, 6 insertions(+), 1 deletion(-) |
11 | 17 | ||
12 | diff --git a/include/qemu/job.h b/include/qemu/job.h | 18 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 |
19 | index XXXXXXX..XXXXXXX 100755 | ||
20 | --- a/tests/qemu-iotests/197 | ||
21 | +++ b/tests/qemu-iotests/197 | ||
22 | @@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ===' | ||
23 | echo | ||
24 | |||
25 | # Prep the images | ||
26 | +# VPC rounds image sizes to a specific geometry, force a specific size. | ||
27 | +if [ "$IMGFMT" = "vpc" ]; then | ||
28 | + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") | ||
29 | +fi | ||
30 | _make_test_img 4G | ||
31 | $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io | ||
32 | IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ | ||
33 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
13 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/include/qemu/job.h | 35 | --- a/tests/qemu-iotests/common.filter |
15 | +++ b/include/qemu/job.h | 36 | +++ b/tests/qemu-iotests/common.filter |
16 | @@ -XXX,XX +XXX,XX @@ void job_progress_update(Job *job, uint64_t done); | 37 | @@ -XXX,XX +XXX,XX @@ _filter_img_create() |
17 | */ | 38 | -e "s# log_size=[0-9]\\+##g" \ |
18 | void job_progress_set_remaining(Job *job, uint64_t remaining); | 39 | -e "s# refcount_bits=[0-9]\\+##g" \ |
19 | 40 | -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ | |
20 | +/** | 41 | - -e "s# iter-time=[0-9]\\+##g" |
21 | + * @job: The job whose expected progress end value is updated | 42 | + -e "s# iter-time=[0-9]\\+##g" \ |
22 | + * @delta: Value which is to be added to the current expected end | 43 | + -e "s# force_size=\\(on\\|off\\)##g" |
23 | + * value | ||
24 | + * | ||
25 | + * Increases the expected end value of the progress counter of a job. | ||
26 | + * This is useful for parenthesis operations: If a job has to | ||
27 | + * conditionally perform a high-priority operation as part of its | ||
28 | + * progress, it calls this function with the expected operation's | ||
29 | + * length before, and job_progress_update() afterwards. | ||
30 | + * (So the operation acts as a parenthesis in regards to the main job | ||
31 | + * operation running in background.) | ||
32 | + */ | ||
33 | +void job_progress_increase_remaining(Job *job, uint64_t delta); | ||
34 | + | ||
35 | /** To be called when a cancelled job is finalised. */ | ||
36 | void job_event_cancelled(Job *job); | ||
37 | |||
38 | diff --git a/job.c b/job.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/job.c | ||
41 | +++ b/job.c | ||
42 | @@ -XXX,XX +XXX,XX @@ void job_progress_set_remaining(Job *job, uint64_t remaining) | ||
43 | job->progress_total = job->progress_current + remaining; | ||
44 | } | 44 | } |
45 | 45 | ||
46 | +void job_progress_increase_remaining(Job *job, uint64_t delta) | 46 | _filter_img_info() |
47 | +{ | ||
48 | + job->progress_total += delta; | ||
49 | +} | ||
50 | + | ||
51 | void job_event_cancelled(Job *job) | ||
52 | { | ||
53 | notifier_list_notify(&job->on_finalize_cancelled, job); | ||
54 | -- | 47 | -- |
55 | 2.13.6 | 48 | 2.13.6 |
56 | 49 | ||
57 | 50 | diff view generated by jsdifflib |
1 | All callers pass false for the 'recursive' parameter now. Remove it. | 1 | This change separates bdrv_drain_invoke(), which calls the BlockDriver |
---|---|---|---|
2 | drain callbacks, from bdrv_drain_recurse(). Instead, the function | ||
3 | performs its own recursion now. | ||
2 | 4 | ||
5 | One reason for this is that bdrv_drain_recurse() can be called multiple | ||
6 | times by bdrv_drain_all_begin(), but the callbacks may only be called | ||
7 | once. The separation is necessary to fix this bug. | ||
8 | |||
9 | The other reason is that we intend to go to a model where we call all | ||
10 | driver callbacks first, and only then start polling. This is not fully | ||
11 | achieved yet with this patch, as bdrv_drain_invoke() contains a | ||
12 | BDRV_POLL_WHILE() loop for the block driver callbacks, which can still | ||
13 | call callbacks for any unrelated event. It's a step in this direction | ||
14 | anyway. | ||
15 | |||
16 | Cc: qemu-stable@nongnu.org | ||
3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
5 | --- | 19 | --- |
6 | block/io.c | 13 +++---------- | 20 | block/io.c | 14 +++++++++++--- |
7 | 1 file changed, 3 insertions(+), 10 deletions(-) | 21 | 1 file changed, 11 insertions(+), 3 deletions(-) |
8 | 22 | ||
9 | diff --git a/block/io.c b/block/io.c | 23 | diff --git a/block/io.c b/block/io.c |
10 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/block/io.c | 25 | --- a/block/io.c |
12 | +++ b/block/io.c | 26 | +++ b/block/io.c |
13 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | 27 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
28 | bdrv_wakeup(bs); | ||
14 | } | 29 | } |
15 | 30 | ||
16 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ | 31 | +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
17 | -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) | 32 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
18 | +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | ||
19 | { | 33 | { |
20 | - BdrvChild *child, *tmp; | 34 | + BdrvChild *child, *tmp; |
21 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; | 35 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
22 | 36 | ||
23 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || | 37 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || |
24 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) | 38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
25 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); | 39 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); |
26 | bdrv_coroutine_enter(bs, data.co); | 40 | bdrv_coroutine_enter(bs, data.co); |
27 | BDRV_POLL_WHILE(bs, !data.done); | 41 | BDRV_POLL_WHILE(bs, !data.done); |
42 | + | ||
43 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
44 | + bdrv_drain_invoke(child->bs, begin); | ||
45 | + } | ||
46 | } | ||
47 | |||
48 | static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | ||
50 | BdrvChild *child, *tmp; | ||
51 | bool waited; | ||
52 | |||
53 | - /* Ensure any pending metadata writes are submitted to bs->file. */ | ||
54 | - bdrv_drain_invoke(bs, begin); | ||
28 | - | 55 | - |
29 | - if (recursive) { | 56 | /* Wait for drained requests to finish */ |
30 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | 57 | waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); |
31 | - bdrv_drain_invoke(child->bs, begin, true); | 58 | |
32 | - } | 59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
33 | - } | 60 | bdrv_parent_drained_begin(bs); |
61 | } | ||
62 | |||
63 | + bdrv_drain_invoke(bs, true); | ||
64 | bdrv_drain_recurse(bs, true); | ||
34 | } | 65 | } |
35 | 66 | ||
36 | static bool bdrv_drain_recurse(BlockDriverState *bs) | 67 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
37 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
38 | } | 68 | } |
39 | 69 | ||
40 | bdrv_parent_drained_begin(bs, parent); | 70 | bdrv_parent_drained_end(bs); |
41 | - bdrv_drain_invoke(bs, true, false); | ||
42 | + bdrv_drain_invoke(bs, true); | ||
43 | bdrv_drain_recurse(bs); | ||
44 | |||
45 | if (recursive) { | ||
46 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
47 | old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); | ||
48 | |||
49 | /* Re-enable things in child-to-parent order */ | ||
50 | - bdrv_drain_invoke(bs, false, false); | ||
51 | + bdrv_drain_invoke(bs, false); | 71 | + bdrv_drain_invoke(bs, false); |
52 | bdrv_parent_drained_end(bs, parent); | 72 | bdrv_drain_recurse(bs, false); |
53 | if (old_quiesce_counter == 1) { | 73 | aio_enable_external(bdrv_get_aio_context(bs)); |
54 | aio_enable_external(bdrv_get_aio_context(bs)); | 74 | } |
75 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
76 | aio_context_acquire(aio_context); | ||
77 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
78 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
79 | + /* FIXME Calling this multiple times is wrong */ | ||
80 | + bdrv_drain_invoke(bs, true); | ||
81 | waited |= bdrv_drain_recurse(bs, true); | ||
82 | } | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
85 | aio_context_acquire(aio_context); | ||
86 | aio_enable_external(aio_context); | ||
87 | bdrv_parent_drained_end(bs); | ||
88 | + bdrv_drain_invoke(bs, false); | ||
89 | bdrv_drain_recurse(bs, false); | ||
90 | aio_context_release(aio_context); | ||
91 | } | ||
55 | -- | 92 | -- |
56 | 2.13.6 | 93 | 2.13.6 |
57 | 94 | ||
58 | 95 | diff view generated by jsdifflib |
1 | Commit 91af091f923 added an additional aio_poll() to BDRV_POLL_WHILE() | 1 | bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver |
---|---|---|---|
2 | in order to make sure that all pending BHs are executed on drain. This | 2 | callback inside its polling loop. This means that how many times it got |
3 | was the wrong place to make the fix, as it is useless overhead for all | 3 | called for each node depended on long it had to poll the event loop. |
4 | other users of the macro and unnecessarily complicates the mechanism. | ||
5 | 4 | ||
6 | This patch effectively reverts said commit (the context has changed a | 5 | This is obviously not right and results in nodes that stay drained even |
7 | bit and the code has moved to AIO_WAIT_WHILE()) and instead polls in the | 6 | after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per |
8 | loop condition for drain. | 7 | node. |
9 | 8 | ||
10 | The effect is probably hard to measure in any real-world use case | 9 | Fix bdrv_drain_all_begin() to call the callback only once, too. |
11 | because actual I/O will dominate, but if I run only the initialisation | ||
12 | part of 'qemu-img convert' where it calls bdrv_block_status() for the | ||
13 | whole image to find out how much data there is copy, this phase actually | ||
14 | needs only roughly half the time after this patch. | ||
15 | 10 | ||
11 | Cc: qemu-stable@nongnu.org | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
17 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
18 | --- | 14 | --- |
19 | include/block/aio-wait.h | 22 ++++++++-------------- | 15 | block/io.c | 3 +-- |
20 | block/io.c | 11 ++++++++++- | 16 | 1 file changed, 1 insertion(+), 2 deletions(-) |
21 | 2 files changed, 18 insertions(+), 15 deletions(-) | ||
22 | 17 | ||
23 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/block/aio-wait.h | ||
26 | +++ b/include/block/aio-wait.h | ||
27 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
28 | */ | ||
29 | #define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ | ||
30 | bool waited_ = false; \ | ||
31 | - bool busy_ = true; \ | ||
32 | AioWait *wait_ = (wait); \ | ||
33 | AioContext *ctx_ = (ctx); \ | ||
34 | if (in_aio_context_home_thread(ctx_)) { \ | ||
35 | - while ((cond) || busy_) { \ | ||
36 | - busy_ = aio_poll(ctx_, (cond)); \ | ||
37 | - waited_ |= !!(cond) | busy_; \ | ||
38 | + while ((cond)) { \ | ||
39 | + aio_poll(ctx_, true); \ | ||
40 | + waited_ = true; \ | ||
41 | } \ | ||
42 | } else { \ | ||
43 | assert(qemu_get_current_aio_context() == \ | ||
44 | qemu_get_aio_context()); \ | ||
45 | /* Increment wait_->num_waiters before evaluating cond. */ \ | ||
46 | atomic_inc(&wait_->num_waiters); \ | ||
47 | - while (busy_) { \ | ||
48 | - if ((cond)) { \ | ||
49 | - waited_ = busy_ = true; \ | ||
50 | - aio_context_release(ctx_); \ | ||
51 | - aio_poll(qemu_get_aio_context(), true); \ | ||
52 | - aio_context_acquire(ctx_); \ | ||
53 | - } else { \ | ||
54 | - busy_ = aio_poll(ctx_, false); \ | ||
55 | - waited_ |= busy_; \ | ||
56 | - } \ | ||
57 | + while ((cond)) { \ | ||
58 | + aio_context_release(ctx_); \ | ||
59 | + aio_poll(qemu_get_aio_context(), true); \ | ||
60 | + aio_context_acquire(ctx_); \ | ||
61 | + waited_ = true; \ | ||
62 | } \ | ||
63 | atomic_dec(&wait_->num_waiters); \ | ||
64 | } \ | ||
65 | diff --git a/block/io.c b/block/io.c | 18 | diff --git a/block/io.c b/block/io.c |
66 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
67 | --- a/block/io.c | 20 | --- a/block/io.c |
68 | +++ b/block/io.c | 21 | +++ b/block/io.c |
69 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
70 | BDRV_POLL_WHILE(bs, !data.done); | 23 | aio_context_acquire(aio_context); |
71 | } | 24 | bdrv_parent_drained_begin(bs); |
72 | 25 | aio_disable_external(aio_context); | |
73 | +/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ | 26 | + bdrv_drain_invoke(bs, true); |
74 | +static bool bdrv_drain_poll(BlockDriverState *bs) | 27 | aio_context_release(aio_context); |
75 | +{ | 28 | |
76 | + /* Execute pending BHs first and check everything else only after the BHs | 29 | if (!g_slist_find(aio_ctxs, aio_context)) { |
77 | + * have executed. */ | 30 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
78 | + while (aio_poll(bs->aio_context, false)); | 31 | aio_context_acquire(aio_context); |
79 | + return atomic_read(&bs->in_flight); | 32 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
80 | +} | 33 | if (aio_context == bdrv_get_aio_context(bs)) { |
81 | + | 34 | - /* FIXME Calling this multiple times is wrong */ |
82 | static bool bdrv_drain_recurse(BlockDriverState *bs) | 35 | - bdrv_drain_invoke(bs, true); |
83 | { | 36 | waited |= bdrv_drain_recurse(bs, true); |
84 | BdrvChild *child, *tmp; | 37 | } |
85 | bool waited; | 38 | } |
86 | |||
87 | /* Wait for drained requests to finish */ | ||
88 | - waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); | ||
89 | + waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs)); | ||
90 | |||
91 | QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
92 | BlockDriverState *bs = child->bs; | ||
93 | -- | 39 | -- |
94 | 2.13.6 | 40 | 2.13.6 |
95 | 41 | ||
96 | 42 | diff view generated by jsdifflib |
1 | As long as nobody keeps the other I/O thread from working, there is no | 1 | This adds a test case that the BlockDriver callbacks for drain are |
---|---|---|---|
2 | reason why bdrv_drain() wouldn't work with cross-AioContext events. The | 2 | called in bdrv_drained_all_begin/end(), and that both of them are called |
3 | key is that the root request we're waiting for is in the AioContext | 3 | exactly once. |
4 | we're polling (which it always is for bdrv_drain()) so that aio_poll() | ||
5 | is woken up in the end. | ||
6 | |||
7 | Add a test case that shows that it works. Remove the comment in | ||
8 | bdrv_drain() that claims otherwise. | ||
9 | 4 | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
11 | --- | 8 | --- |
12 | block/io.c | 4 -- | 9 | tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ |
13 | tests/test-bdrv-drain.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++- | 10 | tests/Makefile.include | 2 + |
14 | 2 files changed, 186 insertions(+), 5 deletions(-) | 11 | 2 files changed, 139 insertions(+) |
12 | create mode 100644 tests/test-bdrv-drain.c | ||
15 | 13 | ||
16 | diff --git a/block/io.c b/block/io.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/io.c | ||
19 | +++ b/block/io.c | ||
20 | @@ -XXX,XX +XXX,XX @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) | ||
21 | * | ||
22 | * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState | ||
23 | * AioContext. | ||
24 | - * | ||
25 | - * Only this BlockDriverState's AioContext is run, so in-flight requests must | ||
26 | - * not depend on events in other AioContexts. In that case, use | ||
27 | - * bdrv_drain_all() instead. | ||
28 | */ | ||
29 | void coroutine_fn bdrv_co_drain(BlockDriverState *bs) | ||
30 | { | ||
31 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
32 | index XXXXXXX..XXXXXXX 100644 | 15 | new file mode 100644 |
33 | --- a/tests/test-bdrv-drain.c | 16 | index XXXXXXX..XXXXXXX |
17 | --- /dev/null | ||
34 | +++ b/tests/test-bdrv-drain.c | 18 | +++ b/tests/test-bdrv-drain.c |
35 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
36 | #include "block/blockjob_int.h" | 20 | +/* |
37 | #include "sysemu/block-backend.h" | 21 | + * Block node draining tests |
38 | #include "qapi/error.h" | 22 | + * |
39 | +#include "iothread.h" | 23 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> |
24 | + * | ||
25 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
26 | + * of this software and associated documentation files (the "Software"), to deal | ||
27 | + * in the Software without restriction, including without limitation the rights | ||
28 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
29 | + * copies of the Software, and to permit persons to whom the Software is | ||
30 | + * furnished to do so, subject to the following conditions: | ||
31 | + * | ||
32 | + * The above copyright notice and this permission notice shall be included in | ||
33 | + * all copies or substantial portions of the Software. | ||
34 | + * | ||
35 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
36 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
37 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
38 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
39 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
40 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
41 | + * THE SOFTWARE. | ||
42 | + */ | ||
40 | + | 43 | + |
41 | +static QemuEvent done_event; | 44 | +#include "qemu/osdep.h" |
42 | 45 | +#include "block/block.h" | |
43 | typedef struct BDRVTestState { | 46 | +#include "sysemu/block-backend.h" |
44 | int drain_count; | 47 | +#include "qapi/error.h" |
45 | + AioContext *bh_indirection_ctx; | 48 | + |
46 | } BDRVTestState; | 49 | +typedef struct BDRVTestState { |
47 | 50 | + int drain_count; | |
48 | static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | 51 | +} BDRVTestState; |
49 | @@ -XXX,XX +XXX,XX @@ static void bdrv_test_close(BlockDriverState *bs) | 52 | + |
50 | g_assert_cmpint(s->drain_count, >, 0); | 53 | +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) |
51 | } | ||
52 | |||
53 | +static void co_reenter_bh(void *opaque) | ||
54 | +{ | 54 | +{ |
55 | + aio_co_wake(opaque); | 55 | + BDRVTestState *s = bs->opaque; |
56 | + s->drain_count++; | ||
56 | +} | 57 | +} |
57 | + | 58 | + |
58 | static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | 59 | +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) |
59 | uint64_t offset, uint64_t bytes, | 60 | +{ |
60 | QEMUIOVector *qiov, int flags) | ||
61 | { | ||
62 | + BDRVTestState *s = bs->opaque; | 61 | + BDRVTestState *s = bs->opaque; |
62 | + s->drain_count--; | ||
63 | +} | ||
63 | + | 64 | + |
64 | /* We want this request to stay until the polling loop in drain waits for | 65 | +static void bdrv_test_close(BlockDriverState *bs) |
65 | * it to complete. We need to sleep a while as bdrv_drain_invoke() comes | 66 | +{ |
66 | * first and polls its result, too, but it shouldn't accidentally complete | 67 | + BDRVTestState *s = bs->opaque; |
67 | * this request yet. */ | 68 | + g_assert_cmpint(s->drain_count, >, 0); |
68 | qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | 69 | +} |
69 | |||
70 | + if (s->bh_indirection_ctx) { | ||
71 | + aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh, | ||
72 | + qemu_coroutine_self()); | ||
73 | + qemu_coroutine_yield(); | ||
74 | + } | ||
75 | + | 70 | + |
76 | return 0; | 71 | +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, |
77 | } | 72 | + uint64_t offset, uint64_t bytes, |
78 | 73 | + QEMUIOVector *qiov, int flags) | |
79 | @@ -XXX,XX +XXX,XX @@ static void test_graph_change(void) | 74 | +{ |
80 | blk_unref(blk_b); | 75 | + /* We want this request to stay until the polling loop in drain waits for |
81 | } | 76 | + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes |
82 | 77 | + * first and polls its result, too, but it shouldn't accidentally complete | |
83 | +struct test_iothread_data { | 78 | + * this request yet. */ |
84 | + BlockDriverState *bs; | 79 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); |
85 | + enum drain_type drain_type; | 80 | + |
86 | + int *aio_ret; | 81 | + return 0; |
82 | +} | ||
83 | + | ||
84 | +static BlockDriver bdrv_test = { | ||
85 | + .format_name = "test", | ||
86 | + .instance_size = sizeof(BDRVTestState), | ||
87 | + | ||
88 | + .bdrv_close = bdrv_test_close, | ||
89 | + .bdrv_co_preadv = bdrv_test_co_preadv, | ||
90 | + | ||
91 | + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | ||
92 | + .bdrv_co_drain_end = bdrv_test_co_drain_end, | ||
87 | +}; | 93 | +}; |
88 | + | 94 | + |
89 | +static void test_iothread_drain_entry(void *opaque) | 95 | +static void aio_ret_cb(void *opaque, int ret) |
90 | +{ | ||
91 | + struct test_iothread_data *data = opaque; | ||
92 | + | ||
93 | + aio_context_acquire(bdrv_get_aio_context(data->bs)); | ||
94 | + do_drain_begin(data->drain_type, data->bs); | ||
95 | + g_assert_cmpint(*data->aio_ret, ==, 0); | ||
96 | + do_drain_end(data->drain_type, data->bs); | ||
97 | + aio_context_release(bdrv_get_aio_context(data->bs)); | ||
98 | + | ||
99 | + qemu_event_set(&done_event); | ||
100 | +} | ||
101 | + | ||
102 | +static void test_iothread_aio_cb(void *opaque, int ret) | ||
103 | +{ | 96 | +{ |
104 | + int *aio_ret = opaque; | 97 | + int *aio_ret = opaque; |
105 | + *aio_ret = ret; | 98 | + *aio_ret = ret; |
106 | + qemu_event_set(&done_event); | ||
107 | +} | 99 | +} |
108 | + | 100 | + |
109 | +/* | 101 | +static void test_drv_cb_drain_all(void) |
110 | + * Starts an AIO request on a BDS that runs in the AioContext of iothread 1. | ||
111 | + * The request involves a BH on iothread 2 before it can complete. | ||
112 | + * | ||
113 | + * @drain_thread = 0 means that do_drain_begin/end are called from the main | ||
114 | + * thread, @drain_thread = 1 means that they are called from iothread 1. Drain | ||
115 | + * for this BDS cannot be called from iothread 2 because only the main thread | ||
116 | + * may do cross-AioContext polling. | ||
117 | + */ | ||
118 | +static void test_iothread_common(enum drain_type drain_type, int drain_thread) | ||
119 | +{ | 102 | +{ |
120 | + BlockBackend *blk; | 103 | + BlockBackend *blk; |
121 | + BlockDriverState *bs; | 104 | + BlockDriverState *bs; |
122 | + BDRVTestState *s; | 105 | + BDRVTestState *s; |
123 | + BlockAIOCB *acb; | 106 | + BlockAIOCB *acb; |
124 | + int aio_ret; | 107 | + int aio_ret; |
125 | + struct test_iothread_data data; | ||
126 | + | ||
127 | + IOThread *a = iothread_new(); | ||
128 | + IOThread *b = iothread_new(); | ||
129 | + AioContext *ctx_a = iothread_get_aio_context(a); | ||
130 | + AioContext *ctx_b = iothread_get_aio_context(b); | ||
131 | + | 108 | + |
132 | + QEMUIOVector qiov; | 109 | + QEMUIOVector qiov; |
133 | + struct iovec iov = { | 110 | + struct iovec iov = { |
134 | + .iov_base = NULL, | 111 | + .iov_base = NULL, |
135 | + .iov_len = 0, | 112 | + .iov_len = 0, |
136 | + }; | 113 | + }; |
137 | + qemu_iovec_init_external(&qiov, &iov, 1); | 114 | + qemu_iovec_init_external(&qiov, &iov, 1); |
138 | + | 115 | + |
139 | + /* bdrv_drain_all() may only be called from the main loop thread */ | ||
140 | + if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { | ||
141 | + goto out; | ||
142 | + } | ||
143 | + | ||
144 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 116 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
145 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | 117 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
146 | + &error_abort); | 118 | + &error_abort); |
147 | + s = bs->opaque; | 119 | + s = bs->opaque; |
148 | + blk_insert_bs(blk, bs, &error_abort); | 120 | + blk_insert_bs(blk, bs, &error_abort); |
149 | + | 121 | + |
150 | + blk_set_aio_context(blk, ctx_a); | 122 | + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ |
151 | + aio_context_acquire(ctx_a); | 123 | + g_assert_cmpint(s->drain_count, ==, 0); |
124 | + bdrv_drain_all_begin(); | ||
125 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
126 | + bdrv_drain_all_end(); | ||
127 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
152 | + | 128 | + |
153 | + s->bh_indirection_ctx = ctx_b; | 129 | + /* Now do the same while a request is pending */ |
154 | + | ||
155 | + aio_ret = -EINPROGRESS; | 130 | + aio_ret = -EINPROGRESS; |
156 | + if (drain_thread == 0) { | 131 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); |
157 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); | ||
158 | + } else { | ||
159 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); | ||
160 | + } | ||
161 | + g_assert(acb != NULL); | 132 | + g_assert(acb != NULL); |
162 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | 133 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); |
163 | + | 134 | + |
164 | + aio_context_release(ctx_a); | 135 | + g_assert_cmpint(s->drain_count, ==, 0); |
165 | + | 136 | + bdrv_drain_all_begin(); |
166 | + data = (struct test_iothread_data) { | 137 | + g_assert_cmpint(aio_ret, ==, 0); |
167 | + .bs = bs, | 138 | + g_assert_cmpint(s->drain_count, ==, 1); |
168 | + .drain_type = drain_type, | 139 | + bdrv_drain_all_end(); |
169 | + .aio_ret = &aio_ret, | 140 | + g_assert_cmpint(s->drain_count, ==, 0); |
170 | + }; | ||
171 | + | ||
172 | + switch (drain_thread) { | ||
173 | + case 0: | ||
174 | + if (drain_type != BDRV_DRAIN_ALL) { | ||
175 | + aio_context_acquire(ctx_a); | ||
176 | + } | ||
177 | + | ||
178 | + /* The request is running on the IOThread a. Draining its block device | ||
179 | + * will make sure that it has completed as far as the BDS is concerned, | ||
180 | + * but the drain in this thread can continue immediately after | ||
181 | + * bdrv_dec_in_flight() and aio_ret might be assigned only slightly | ||
182 | + * later. */ | ||
183 | + qemu_event_reset(&done_event); | ||
184 | + do_drain_begin(drain_type, bs); | ||
185 | + g_assert_cmpint(bs->in_flight, ==, 0); | ||
186 | + | ||
187 | + if (drain_type != BDRV_DRAIN_ALL) { | ||
188 | + aio_context_release(ctx_a); | ||
189 | + } | ||
190 | + qemu_event_wait(&done_event); | ||
191 | + if (drain_type != BDRV_DRAIN_ALL) { | ||
192 | + aio_context_acquire(ctx_a); | ||
193 | + } | ||
194 | + | ||
195 | + g_assert_cmpint(aio_ret, ==, 0); | ||
196 | + do_drain_end(drain_type, bs); | ||
197 | + | ||
198 | + if (drain_type != BDRV_DRAIN_ALL) { | ||
199 | + aio_context_release(ctx_a); | ||
200 | + } | ||
201 | + break; | ||
202 | + case 1: | ||
203 | + qemu_event_reset(&done_event); | ||
204 | + aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); | ||
205 | + qemu_event_wait(&done_event); | ||
206 | + break; | ||
207 | + default: | ||
208 | + g_assert_not_reached(); | ||
209 | + } | ||
210 | + | ||
211 | + aio_context_acquire(ctx_a); | ||
212 | + blk_set_aio_context(blk, qemu_get_aio_context()); | ||
213 | + aio_context_release(ctx_a); | ||
214 | + | 141 | + |
215 | + bdrv_unref(bs); | 142 | + bdrv_unref(bs); |
216 | + blk_unref(blk); | 143 | + blk_unref(blk); |
217 | + | ||
218 | +out: | ||
219 | + iothread_join(a); | ||
220 | + iothread_join(b); | ||
221 | +} | 144 | +} |
222 | + | 145 | + |
223 | +static void test_iothread_drain_all(void) | 146 | +int main(int argc, char **argv) |
224 | +{ | 147 | +{ |
225 | + test_iothread_common(BDRV_DRAIN_ALL, 0); | 148 | + bdrv_init(); |
226 | + test_iothread_common(BDRV_DRAIN_ALL, 1); | 149 | + qemu_init_main_loop(&error_abort); |
150 | + | ||
151 | + g_test_init(&argc, &argv, NULL); | ||
152 | + | ||
153 | + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
154 | + | ||
155 | + return g_test_run(); | ||
227 | +} | 156 | +} |
228 | + | 157 | diff --git a/tests/Makefile.include b/tests/Makefile.include |
229 | +static void test_iothread_drain(void) | 158 | index XXXXXXX..XXXXXXX 100644 |
230 | +{ | 159 | --- a/tests/Makefile.include |
231 | + test_iothread_common(BDRV_DRAIN, 0); | 160 | +++ b/tests/Makefile.include |
232 | + test_iothread_common(BDRV_DRAIN, 1); | 161 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c |
233 | +} | 162 | gcov-files-test-hbitmap-y = util/hbitmap.c |
234 | + | 163 | check-unit-y += tests/test-hbitmap$(EXESUF) |
235 | +static void test_iothread_drain_subtree(void) | 164 | gcov-files-test-hbitmap-y = blockjob.c |
236 | +{ | 165 | +check-unit-y += tests/test-bdrv-drain$(EXESUF) |
237 | + test_iothread_common(BDRV_SUBTREE_DRAIN, 0); | 166 | check-unit-y += tests/test-blockjob$(EXESUF) |
238 | + test_iothread_common(BDRV_SUBTREE_DRAIN, 1); | 167 | check-unit-y += tests/test-blockjob-txn$(EXESUF) |
239 | +} | 168 | check-unit-y += tests/test-x86-cpuid$(EXESUF) |
240 | + | 169 | @@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) |
241 | 170 | tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) | |
242 | typedef struct TestBlockJob { | 171 | tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) |
243 | BlockJob common; | 172 | tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) |
244 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain_subtree(void) | 173 | +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) |
245 | 174 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) | |
246 | int main(int argc, char **argv) | 175 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) |
247 | { | 176 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) |
248 | + int ret; | ||
249 | + | ||
250 | bdrv_init(); | ||
251 | qemu_init_main_loop(&error_abort); | ||
252 | |||
253 | g_test_init(&argc, &argv, NULL); | ||
254 | + qemu_event_init(&done_event, false); | ||
255 | |||
256 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
257 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
258 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
259 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
260 | g_test_add_func("/bdrv-drain/graph-change", test_graph_change); | ||
261 | |||
262 | + g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); | ||
263 | + g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); | ||
264 | + g_test_add_func("/bdrv-drain/iothread/drain_subtree", | ||
265 | + test_iothread_drain_subtree); | ||
266 | + | ||
267 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
268 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
269 | g_test_add_func("/bdrv-drain/blockjob/drain_subtree", | ||
270 | test_blockjob_drain_subtree); | ||
271 | |||
272 | - return g_test_run(); | ||
273 | + ret = g_test_run(); | ||
274 | + qemu_event_destroy(&done_event); | ||
275 | + return ret; | ||
276 | } | ||
277 | -- | 177 | -- |
278 | 2.13.6 | 178 | 2.13.6 |
279 | 179 | ||
280 | 180 | diff view generated by jsdifflib |
1 | We already requested that block jobs be paused in .bdrv_drained_begin, | 1 | Now that the bdrv_drain_invoke() calls are pulled up to the callers of |
---|---|---|---|
2 | but no guarantee was made that the job was actually inactive at the | 2 | bdrv_drain_recurse(), the 'begin' parameter isn't needed any more. |
3 | point where bdrv_drained_begin() returned. | ||
4 | |||
5 | This introduces a new callback BdrvChildRole.bdrv_drained_poll() and | ||
6 | uses it to make bdrv_drain_poll() consider block jobs using the node to | ||
7 | be drained. | ||
8 | |||
9 | For the test case to work as expected, we have to switch from | ||
10 | block_job_sleep_ns() to qemu_co_sleep_ns() so that the test job is even | ||
11 | considered active and must be waited for when draining the node. | ||
12 | 3 | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | --- | 6 | --- |
15 | include/block/block.h | 8 ++++++++ | 7 | block/io.c | 12 ++++++------ |
16 | include/block/block_int.h | 7 +++++++ | 8 | 1 file changed, 6 insertions(+), 6 deletions(-) |
17 | include/block/blockjob_int.h | 8 ++++++++ | ||
18 | block.c | 9 +++++++++ | ||
19 | block/io.c | 40 ++++++++++++++++++++++++++++++++++------ | ||
20 | block/mirror.c | 8 ++++++++ | ||
21 | blockjob.c | 23 +++++++++++++++++++++++ | ||
22 | tests/test-bdrv-drain.c | 18 ++++++++++-------- | ||
23 | 8 files changed, 107 insertions(+), 14 deletions(-) | ||
24 | 9 | ||
25 | diff --git a/include/block/block.h b/include/block/block.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/block/block.h | ||
28 | +++ b/include/block/block.h | ||
29 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); | ||
30 | void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); | ||
31 | |||
32 | /** | ||
33 | + * bdrv_drain_poll: | ||
34 | + * | ||
35 | + * Poll for pending requests in @bs and its parents (except for | ||
36 | + * @ignore_parent). This is part of bdrv_drained_begin. | ||
37 | + */ | ||
38 | +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent); | ||
39 | + | ||
40 | +/** | ||
41 | * bdrv_drained_begin: | ||
42 | * | ||
43 | * Begin a quiesced section for exclusive access to the BDS, by disabling | ||
44 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/include/block/block_int.h | ||
47 | +++ b/include/block/block_int.h | ||
48 | @@ -XXX,XX +XXX,XX @@ struct BdrvChildRole { | ||
49 | void (*drained_begin)(BdrvChild *child); | ||
50 | void (*drained_end)(BdrvChild *child); | ||
51 | |||
52 | + /* | ||
53 | + * Returns whether the parent has pending requests for the child. This | ||
54 | + * callback is polled after .drained_begin() has been called until all | ||
55 | + * activity on the child has stopped. | ||
56 | + */ | ||
57 | + bool (*drained_poll)(BdrvChild *child); | ||
58 | + | ||
59 | /* Notifies the parent that the child has been activated/inactivated (e.g. | ||
60 | * when migration is completing) and it can start/stop requesting | ||
61 | * permissions and doing I/O on it. */ | ||
62 | diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/include/block/blockjob_int.h | ||
65 | +++ b/include/block/blockjob_int.h | ||
66 | @@ -XXX,XX +XXX,XX @@ struct BlockJobDriver { | ||
67 | JobDriver job_driver; | ||
68 | |||
69 | /* | ||
70 | + * Returns whether the job has pending requests for the child or will | ||
71 | + * submit new requests before the next pause point. This callback is polled | ||
72 | + * in the context of draining a job node after requesting that the job be | ||
73 | + * paused, until all activity on the child has stopped. | ||
74 | + */ | ||
75 | + bool (*drained_poll)(BlockJob *job); | ||
76 | + | ||
77 | + /* | ||
78 | * If the callback is not NULL, it will be invoked before the job is | ||
79 | * resumed in a new AioContext. This is the place to move any resources | ||
80 | * besides job->blk to the new AioContext. | ||
81 | diff --git a/block.c b/block.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/block.c | ||
84 | +++ b/block.c | ||
85 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) | ||
86 | bdrv_drained_begin(bs); | ||
87 | } | ||
88 | |||
89 | +static bool bdrv_child_cb_drained_poll(BdrvChild *child) | ||
90 | +{ | ||
91 | + BlockDriverState *bs = child->opaque; | ||
92 | + return bdrv_drain_poll(bs, NULL); | ||
93 | +} | ||
94 | + | ||
95 | static void bdrv_child_cb_drained_end(BdrvChild *child) | ||
96 | { | ||
97 | BlockDriverState *bs = child->opaque; | ||
98 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { | ||
99 | .get_parent_desc = bdrv_child_get_parent_desc, | ||
100 | .inherit_options = bdrv_inherited_options, | ||
101 | .drained_begin = bdrv_child_cb_drained_begin, | ||
102 | + .drained_poll = bdrv_child_cb_drained_poll, | ||
103 | .drained_end = bdrv_child_cb_drained_end, | ||
104 | .attach = bdrv_child_cb_attach, | ||
105 | .detach = bdrv_child_cb_detach, | ||
106 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { | ||
107 | .get_parent_desc = bdrv_child_get_parent_desc, | ||
108 | .inherit_options = bdrv_inherited_fmt_options, | ||
109 | .drained_begin = bdrv_child_cb_drained_begin, | ||
110 | + .drained_poll = bdrv_child_cb_drained_poll, | ||
111 | .drained_end = bdrv_child_cb_drained_end, | ||
112 | .attach = bdrv_child_cb_attach, | ||
113 | .detach = bdrv_child_cb_detach, | ||
114 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_backing = { | ||
115 | .detach = bdrv_backing_detach, | ||
116 | .inherit_options = bdrv_backing_options, | ||
117 | .drained_begin = bdrv_child_cb_drained_begin, | ||
118 | + .drained_poll = bdrv_child_cb_drained_poll, | ||
119 | .drained_end = bdrv_child_cb_drained_end, | ||
120 | .inactivate = bdrv_child_cb_inactivate, | ||
121 | .update_filename = bdrv_backing_update_filename, | ||
122 | diff --git a/block/io.c b/block/io.c | 10 | diff --git a/block/io.c b/block/io.c |
123 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
124 | --- a/block/io.c | 12 | --- a/block/io.c |
125 | +++ b/block/io.c | 13 | +++ b/block/io.c |
126 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) | 14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
127 | } | 15 | } |
128 | } | 16 | } |
129 | 17 | ||
130 | +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore) | 18 | -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
131 | +{ | 19 | +static bool bdrv_drain_recurse(BlockDriverState *bs) |
132 | + BdrvChild *c, *next; | ||
133 | + bool busy = false; | ||
134 | + | ||
135 | + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
136 | + if (c == ignore) { | ||
137 | + continue; | ||
138 | + } | ||
139 | + if (c->role->drained_poll) { | ||
140 | + busy |= c->role->drained_poll(c); | ||
141 | + } | ||
142 | + } | ||
143 | + | ||
144 | + return busy; | ||
145 | +} | ||
146 | + | ||
147 | static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) | ||
148 | { | ||
149 | dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); | ||
150 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | ||
151 | } | ||
152 | |||
153 | /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ | ||
154 | -static bool bdrv_drain_poll(BlockDriverState *bs) | ||
155 | +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent) | ||
156 | +{ | ||
157 | + if (bdrv_parent_drained_poll(bs, ignore_parent)) { | ||
158 | + return true; | ||
159 | + } | ||
160 | + | ||
161 | + return atomic_read(&bs->in_flight); | ||
162 | +} | ||
163 | + | ||
164 | +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, | ||
165 | + BdrvChild *ignore_parent) | ||
166 | { | ||
167 | /* Execute pending BHs first and check everything else only after the BHs | ||
168 | * have executed. */ | ||
169 | while (aio_poll(bs->aio_context, false)); | ||
170 | - return atomic_read(&bs->in_flight); | ||
171 | + | ||
172 | + return bdrv_drain_poll(bs, ignore_parent); | ||
173 | } | ||
174 | |||
175 | -static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
176 | +static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent) | ||
177 | { | 20 | { |
178 | BdrvChild *child, *tmp; | 21 | BdrvChild *child, *tmp; |
179 | bool waited; | 22 | bool waited; |
180 | 23 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | |
181 | /* Wait for drained requests to finish */ | ||
182 | - waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs)); | ||
183 | + waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); | ||
184 | |||
185 | QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
186 | BlockDriverState *bs = child->bs; | ||
187 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
188 | */ | 24 | */ |
189 | bdrv_ref(bs); | 25 | bdrv_ref(bs); |
190 | } | 26 | } |
191 | - waited |= bdrv_drain_recurse(bs); | 27 | - waited |= bdrv_drain_recurse(bs, begin); |
192 | + waited |= bdrv_drain_recurse(bs, child); | 28 | + waited |= bdrv_drain_recurse(bs); |
193 | if (in_main_loop) { | 29 | if (in_main_loop) { |
194 | bdrv_unref(bs); | 30 | bdrv_unref(bs); |
195 | } | 31 | } |
196 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
197 | 33 | } | |
198 | bdrv_parent_drained_begin(bs, parent); | 34 | |
199 | bdrv_drain_invoke(bs, true); | 35 | bdrv_drain_invoke(bs, true); |
200 | - bdrv_drain_recurse(bs); | 36 | - bdrv_drain_recurse(bs, true); |
201 | + bdrv_drain_recurse(bs, parent); | 37 | + bdrv_drain_recurse(bs); |
202 | |||
203 | if (recursive) { | ||
204 | bs->recursive_quiesce_counter++; | ||
205 | diff --git a/block/mirror.c b/block/mirror.c | ||
206 | index XXXXXXX..XXXXXXX 100644 | ||
207 | --- a/block/mirror.c | ||
208 | +++ b/block/mirror.c | ||
209 | @@ -XXX,XX +XXX,XX @@ static void mirror_pause(Job *job) | ||
210 | mirror_wait_for_all_io(s); | ||
211 | } | 38 | } |
212 | 39 | ||
213 | +static bool mirror_drained_poll(BlockJob *job) | 40 | void bdrv_drained_end(BlockDriverState *bs) |
214 | +{ | 41 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
215 | + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | 42 | |
216 | + return !!s->in_flight; | 43 | bdrv_parent_drained_end(bs); |
217 | +} | 44 | bdrv_drain_invoke(bs, false); |
218 | + | 45 | - bdrv_drain_recurse(bs, false); |
219 | static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context) | 46 | + bdrv_drain_recurse(bs); |
220 | { | 47 | aio_enable_external(bdrv_get_aio_context(bs)); |
221 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | ||
222 | @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver mirror_job_driver = { | ||
223 | .pause = mirror_pause, | ||
224 | .complete = mirror_complete, | ||
225 | }, | ||
226 | + .drained_poll = mirror_drained_poll, | ||
227 | .attached_aio_context = mirror_attached_aio_context, | ||
228 | .drain = mirror_drain, | ||
229 | }; | ||
230 | @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = { | ||
231 | .pause = mirror_pause, | ||
232 | .complete = mirror_complete, | ||
233 | }, | ||
234 | + .drained_poll = mirror_drained_poll, | ||
235 | .attached_aio_context = mirror_attached_aio_context, | ||
236 | .drain = mirror_drain, | ||
237 | }; | ||
238 | diff --git a/blockjob.c b/blockjob.c | ||
239 | index XXXXXXX..XXXXXXX 100644 | ||
240 | --- a/blockjob.c | ||
241 | +++ b/blockjob.c | ||
242 | @@ -XXX,XX +XXX,XX @@ static void child_job_drained_begin(BdrvChild *c) | ||
243 | job_pause(&job->job); | ||
244 | } | 48 | } |
245 | 49 | ||
246 | +static bool child_job_drained_poll(BdrvChild *c) | 50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
247 | +{ | 51 | aio_context_acquire(aio_context); |
248 | + BlockJob *bjob = c->opaque; | 52 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
249 | + Job *job = &bjob->job; | 53 | if (aio_context == bdrv_get_aio_context(bs)) { |
250 | + const BlockJobDriver *drv = block_job_driver(bjob); | 54 | - waited |= bdrv_drain_recurse(bs, true); |
251 | + | 55 | + waited |= bdrv_drain_recurse(bs); |
252 | + /* An inactive or completed job doesn't have any pending requests. Jobs | 56 | } |
253 | + * with !job->busy are either already paused or have a pause point after | 57 | } |
254 | + * being reentered, so no job driver code will run before they pause. */ | 58 | aio_context_release(aio_context); |
255 | + if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) { | 59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
256 | + return false; | 60 | aio_enable_external(aio_context); |
257 | + } | 61 | bdrv_parent_drained_end(bs); |
258 | + | 62 | bdrv_drain_invoke(bs, false); |
259 | + /* Otherwise, assume that it isn't fully stopped yet, but allow the job to | 63 | - bdrv_drain_recurse(bs, false); |
260 | + * override this assumption. */ | 64 | + bdrv_drain_recurse(bs); |
261 | + if (drv->drained_poll) { | 65 | aio_context_release(aio_context); |
262 | + return drv->drained_poll(bjob); | ||
263 | + } else { | ||
264 | + return true; | ||
265 | + } | ||
266 | +} | ||
267 | + | ||
268 | static void child_job_drained_end(BdrvChild *c) | ||
269 | { | ||
270 | BlockJob *job = c->opaque; | ||
271 | @@ -XXX,XX +XXX,XX @@ static void child_job_drained_end(BdrvChild *c) | ||
272 | static const BdrvChildRole child_job = { | ||
273 | .get_parent_desc = child_job_get_parent_desc, | ||
274 | .drained_begin = child_job_drained_begin, | ||
275 | + .drained_poll = child_job_drained_poll, | ||
276 | .drained_end = child_job_drained_end, | ||
277 | .stay_at_node = true, | ||
278 | }; | ||
279 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | ||
280 | index XXXXXXX..XXXXXXX 100644 | ||
281 | --- a/tests/test-bdrv-drain.c | ||
282 | +++ b/tests/test-bdrv-drain.c | ||
283 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn test_job_start(void *opaque) | ||
284 | |||
285 | job_transition_to_ready(&s->common.job); | ||
286 | while (!s->should_complete) { | ||
287 | - job_sleep_ns(&s->common.job, 100000); | ||
288 | + /* Avoid block_job_sleep_ns() because it marks the job as !busy. We | ||
289 | + * want to emulate some actual activity (probably some I/O) here so | ||
290 | + * that drain has to wait for this acitivity to stop. */ | ||
291 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | ||
292 | + job_pause_point(&s->common.job); | ||
293 | } | 66 | } |
294 | 67 | ||
295 | job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); | ||
296 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
297 | |||
298 | g_assert_cmpint(job->job.pause_count, ==, 0); | ||
299 | g_assert_false(job->job.paused); | ||
300 | - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ | ||
301 | + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ | ||
302 | |||
303 | do_drain_begin(drain_type, src); | ||
304 | |||
305 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
306 | } else { | ||
307 | g_assert_cmpint(job->job.pause_count, ==, 1); | ||
308 | } | ||
309 | - /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
310 | - /* g_assert_true(job->job.paused); */ | ||
311 | + g_assert_true(job->job.paused); | ||
312 | g_assert_false(job->job.busy); /* The job is paused */ | ||
313 | |||
314 | do_drain_end(drain_type, src); | ||
315 | |||
316 | g_assert_cmpint(job->job.pause_count, ==, 0); | ||
317 | g_assert_false(job->job.paused); | ||
318 | - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ | ||
319 | + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ | ||
320 | |||
321 | do_drain_begin(drain_type, target); | ||
322 | |||
323 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
324 | } else { | ||
325 | g_assert_cmpint(job->job.pause_count, ==, 1); | ||
326 | } | ||
327 | - /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
328 | - /* g_assert_true(job->job.paused); */ | ||
329 | + g_assert_true(job->job.paused); | ||
330 | g_assert_false(job->job.busy); /* The job is paused */ | ||
331 | |||
332 | do_drain_end(drain_type, target); | ||
333 | |||
334 | g_assert_cmpint(job->job.pause_count, ==, 0); | ||
335 | g_assert_false(job->job.paused); | ||
336 | - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ | ||
337 | + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ | ||
338 | |||
339 | ret = job_complete_sync(&job->job, &error_abort); | ||
340 | g_assert_cmpint(ret, ==, 0); | ||
341 | -- | 68 | -- |
342 | 2.13.6 | 69 | 2.13.6 |
343 | 70 | ||
344 | 71 | diff view generated by jsdifflib |
1 | All involved nodes are already idle, we called bdrv_do_drain_begin() on | 1 | The device is drained, so there is no point in waiting for requests at |
---|---|---|---|
2 | them. | 2 | the end of the drained section. Remove the bdrv_drain_recurse() calls |
3 | there. | ||
3 | 4 | ||
4 | The comment in the code suggested that this was not correct because the | 5 | The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e |
5 | completion of a request on one node could spawn a new request on a | 6 | in order to call the .bdrv_co_drain_end() driver callback. This is now |
6 | different node (which might have been drained before, so we wouldn't | 7 | done by a separate bdrv_drain_invoke() call. |
7 | drain the new request). In reality, new requests to different nodes | ||
8 | aren't spawned out of nothing, but only in the context of a parent | ||
9 | request, and they aren't submitted to random nodes, but only to child | ||
10 | nodes. As long as we still poll for the completion of the parent request | ||
11 | (which we do), draining each root node separately is good enough. | ||
12 | |||
13 | Remove the additional polling code from bdrv_drain_all_begin() and | ||
14 | replace it with an assertion that all nodes are already idle after we | ||
15 | drained them separately. | ||
16 | 8 | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
19 | --- | 12 | --- |
20 | block/io.c | 41 ++++++++++++----------------------------- | 13 | block/io.c | 2 -- |
21 | 1 file changed, 12 insertions(+), 29 deletions(-) | 14 | 1 file changed, 2 deletions(-) |
22 | 15 | ||
23 | diff --git a/block/io.c b/block/io.c | 16 | diff --git a/block/io.c b/block/io.c |
24 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/block/io.c | 18 | --- a/block/io.c |
26 | +++ b/block/io.c | 19 | +++ b/block/io.c |
27 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain(BlockDriverState *bs) | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
28 | bdrv_drained_end(bs); | 21 | |
22 | bdrv_parent_drained_end(bs); | ||
23 | bdrv_drain_invoke(bs, false); | ||
24 | - bdrv_drain_recurse(bs); | ||
25 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
29 | } | 26 | } |
30 | 27 | ||
31 | +static void bdrv_drain_assert_idle(BlockDriverState *bs) | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
32 | +{ | 29 | aio_enable_external(aio_context); |
33 | + BdrvChild *child, *next; | 30 | bdrv_parent_drained_end(bs); |
34 | + | 31 | bdrv_drain_invoke(bs, false); |
35 | + assert(atomic_read(&bs->in_flight) == 0); | 32 | - bdrv_drain_recurse(bs); |
36 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
37 | + bdrv_drain_assert_idle(child->bs); | ||
38 | + } | ||
39 | +} | ||
40 | + | ||
41 | /* | ||
42 | * Wait for pending requests to complete across all BlockDriverStates | ||
43 | * | ||
44 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain(BlockDriverState *bs) | ||
45 | */ | ||
46 | void bdrv_drain_all_begin(void) | ||
47 | { | ||
48 | - /* Always run first iteration so any pending completion BHs run */ | ||
49 | - bool waited = true; | ||
50 | BlockDriverState *bs; | ||
51 | BdrvNextIterator it; | ||
52 | - GSList *aio_ctxs = NULL, *ctx; | ||
53 | |||
54 | /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread | ||
55 | * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on | ||
56 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
57 | aio_context_acquire(aio_context); | ||
58 | bdrv_do_drained_begin(bs, true, NULL); | ||
59 | aio_context_release(aio_context); | 33 | aio_context_release(aio_context); |
60 | - | ||
61 | - if (!g_slist_find(aio_ctxs, aio_context)) { | ||
62 | - aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); | ||
63 | - } | ||
64 | } | 34 | } |
65 | 35 | ||
66 | - /* Note that completion of an asynchronous I/O operation can trigger any | ||
67 | - * number of other I/O operations on other devices---for example a | ||
68 | - * coroutine can submit an I/O request to another device in response to | ||
69 | - * request completion. Therefore we must keep looping until there was no | ||
70 | - * more activity rather than simply draining each device independently. | ||
71 | - */ | ||
72 | - while (waited) { | ||
73 | - waited = false; | ||
74 | - | ||
75 | - for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { | ||
76 | - AioContext *aio_context = ctx->data; | ||
77 | - | ||
78 | - aio_context_acquire(aio_context); | ||
79 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
80 | - if (aio_context == bdrv_get_aio_context(bs)) { | ||
81 | - waited |= bdrv_drain_recurse(bs); | ||
82 | - } | ||
83 | - } | ||
84 | - aio_context_release(aio_context); | ||
85 | - } | ||
86 | + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
87 | + bdrv_drain_assert_idle(bs); | ||
88 | } | ||
89 | - | ||
90 | - g_slist_free(aio_ctxs); | ||
91 | } | ||
92 | |||
93 | void bdrv_drain_all_end(void) | ||
94 | -- | 36 | -- |
95 | 2.13.6 | 37 | 2.13.6 |
96 | 38 | ||
97 | 39 | diff view generated by jsdifflib |
1 | bdrv_do_drain_begin/end() implement already everything that | 1 | Drain requests are propagated to child nodes, parent nodes and directly |
---|---|---|---|
2 | bdrv_drain_all_begin/end() need and currently still do manually: Disable | 2 | to the AioContext. The order in which this happened was different |
3 | external events, call parent drain callbacks, call block driver | 3 | between all combinations of drain/drain_all and begin/end. |
4 | callbacks. | ||
5 | 4 | ||
6 | It also does two more things: | 5 | The correct order is to keep children only drained when their parents |
6 | are also drained. This means that at the start of a drained section, the | ||
7 | AioContext needs to be drained first, the parents second and only then | ||
8 | the children. The correct order for the end of a drained section is the | ||
9 | opposite. | ||
7 | 10 | ||
8 | The first is incrementing bs->quiesce_counter. bdrv_drain_all() already | 11 | This patch changes the three other functions to follow the example of |
9 | stood out in the test case by behaving different from the other drain | 12 | bdrv_drained_begin(), which is the only one that got it right. |
10 | variants. Adding this is not only safe, but in fact a bug fix. | ||
11 | |||
12 | The second is calling bdrv_drain_recurse(). We already do that later in | ||
13 | the same function in a loop, so basically doing an early first iteration | ||
14 | doesn't hurt. | ||
15 | 13 | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
17 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
18 | --- | 16 | --- |
19 | block/io.c | 10 ++-------- | 17 | block/io.c | 12 ++++++++---- |
20 | tests/test-bdrv-drain.c | 14 ++++---------- | 18 | 1 file changed, 8 insertions(+), 4 deletions(-) |
21 | 2 files changed, 6 insertions(+), 18 deletions(-) | ||
22 | 19 | ||
23 | diff --git a/block/io.c b/block/io.c | 20 | diff --git a/block/io.c b/block/io.c |
24 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/block/io.c | 22 | --- a/block/io.c |
26 | +++ b/block/io.c | 23 | +++ b/block/io.c |
24 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | ||
25 | return; | ||
26 | } | ||
27 | |||
28 | + /* Stop things in parent-to-child order */ | ||
29 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
30 | aio_disable_external(bdrv_get_aio_context(bs)); | ||
31 | bdrv_parent_drained_begin(bs); | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
33 | return; | ||
34 | } | ||
35 | |||
36 | - bdrv_parent_drained_end(bs); | ||
37 | + /* Re-enable things in child-to-parent order */ | ||
38 | bdrv_drain_invoke(bs, false); | ||
39 | + bdrv_parent_drained_end(bs); | ||
40 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
41 | } | ||
42 | |||
27 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | 43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
28 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | 44 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
29 | AioContext *aio_context = bdrv_get_aio_context(bs); | 45 | AioContext *aio_context = bdrv_get_aio_context(bs); |
30 | 46 | ||
31 | - /* Stop things in parent-to-child order */ | 47 | + /* Stop things in parent-to-child order */ |
32 | aio_context_acquire(aio_context); | 48 | aio_context_acquire(aio_context); |
33 | - aio_disable_external(aio_context); | 49 | - bdrv_parent_drained_begin(bs); |
34 | - bdrv_parent_drained_begin(bs, NULL); | 50 | aio_disable_external(aio_context); |
35 | - bdrv_drain_invoke(bs, true, true); | 51 | + bdrv_parent_drained_begin(bs); |
36 | + bdrv_do_drained_begin(bs, true, NULL); | 52 | bdrv_drain_invoke(bs, true); |
37 | aio_context_release(aio_context); | 53 | aio_context_release(aio_context); |
38 | 54 | ||
39 | if (!g_slist_find(aio_ctxs, aio_context)) { | ||
40 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | 55 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
41 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | 56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
42 | AioContext *aio_context = bdrv_get_aio_context(bs); | 57 | AioContext *aio_context = bdrv_get_aio_context(bs); |
43 | 58 | ||
44 | - /* Re-enable things in child-to-parent order */ | 59 | + /* Re-enable things in child-to-parent order */ |
45 | aio_context_acquire(aio_context); | 60 | aio_context_acquire(aio_context); |
46 | - bdrv_drain_invoke(bs, false, true); | ||
47 | - bdrv_parent_drained_end(bs, NULL); | ||
48 | - aio_enable_external(aio_context); | 61 | - aio_enable_external(aio_context); |
49 | + bdrv_do_drained_end(bs, true, NULL); | 62 | - bdrv_parent_drained_end(bs); |
63 | bdrv_drain_invoke(bs, false); | ||
64 | + bdrv_parent_drained_end(bs); | ||
65 | + aio_enable_external(aio_context); | ||
50 | aio_context_release(aio_context); | 66 | aio_context_release(aio_context); |
51 | } | 67 | } |
52 | } | 68 | |
53 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/tests/test-bdrv-drain.c | ||
56 | +++ b/tests/test-bdrv-drain.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
58 | |||
59 | static void test_quiesce_drain_all(void) | ||
60 | { | ||
61 | - // XXX drain_all doesn't quiesce | ||
62 | - //test_quiesce_common(BDRV_DRAIN_ALL, true); | ||
63 | + test_quiesce_common(BDRV_DRAIN_ALL, true); | ||
64 | } | ||
65 | |||
66 | static void test_quiesce_drain(void) | ||
67 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) | ||
68 | |||
69 | for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { | ||
70 | for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { | ||
71 | - /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
72 | - int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
73 | - (inner != BDRV_DRAIN_ALL); | ||
74 | - int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + | ||
75 | - (inner == BDRV_SUBTREE_DRAIN); | ||
76 | - int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
77 | + int backing_quiesce = (outer != BDRV_DRAIN) + | ||
78 | (inner != BDRV_DRAIN); | ||
79 | |||
80 | g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
81 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) | ||
82 | do_drain_begin(outer, bs); | ||
83 | do_drain_begin(inner, bs); | ||
84 | |||
85 | - g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); | ||
86 | + g_assert_cmpint(bs->quiesce_counter, ==, 2); | ||
87 | g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); | ||
88 | g_assert_cmpint(s->drain_count, ==, 2); | ||
89 | - g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); | ||
90 | + g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); | ||
91 | |||
92 | do_drain_end(inner, bs); | ||
93 | do_drain_end(outer, bs); | ||
94 | -- | 69 | -- |
95 | 2.13.6 | 70 | 2.13.6 |
96 | 71 | ||
97 | 72 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Commit 15afd94a047 added code to acquire and release the AioContext in |
---|---|---|---|
2 | qemuio_command(). This means that the lock is taken twice now in the | ||
3 | call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for | ||
4 | any requests issued to nodes in a non-mainloop AioContext. | ||
2 | 5 | ||
3 | This new parameter allows the caller to just query the next dirty | 6 | Dropping the first locking from hmp_qemu_io() fixes the problem. |
4 | position without moving the iterator. | ||
5 | 7 | ||
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
7 | Reviewed-by: Fam Zheng <famz@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
9 | Message-id: 20180613181823.13618-8-mreitz@redhat.com | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
11 | --- | 10 | --- |
12 | include/qemu/hbitmap.h | 5 ++++- | 11 | hmp.c | 6 ------ |
13 | block/backup.c | 2 +- | 12 | 1 file changed, 6 deletions(-) |
14 | block/dirty-bitmap.c | 2 +- | ||
15 | tests/test-hbitmap.c | 26 +++++++++++++------------- | ||
16 | util/hbitmap.c | 10 +++++++--- | ||
17 | 5 files changed, 26 insertions(+), 19 deletions(-) | ||
18 | 13 | ||
19 | diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h | 14 | diff --git a/hmp.c b/hmp.c |
20 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/include/qemu/hbitmap.h | 16 | --- a/hmp.c |
22 | +++ b/include/qemu/hbitmap.h | 17 | +++ b/hmp.c |
23 | @@ -XXX,XX +XXX,XX @@ void hbitmap_free_meta(HBitmap *hb); | 18 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
24 | /** | ||
25 | * hbitmap_iter_next: | ||
26 | * @hbi: HBitmapIter to operate on. | ||
27 | + * @advance: If true, advance the iterator. Otherwise, the next call | ||
28 | + * of this function will return the same result (if that | ||
29 | + * position is still dirty). | ||
30 | * | ||
31 | * Return the next bit that is set in @hbi's associated HBitmap, | ||
32 | * or -1 if all remaining bits are zero. | ||
33 | */ | ||
34 | -int64_t hbitmap_iter_next(HBitmapIter *hbi); | ||
35 | +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance); | ||
36 | |||
37 | /** | ||
38 | * hbitmap_iter_next_word: | ||
39 | diff --git a/block/backup.c b/block/backup.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/block/backup.c | ||
42 | +++ b/block/backup.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) | ||
44 | HBitmapIter hbi; | ||
45 | |||
46 | hbitmap_iter_init(&hbi, job->copy_bitmap, 0); | ||
47 | - while ((cluster = hbitmap_iter_next(&hbi)) != -1) { | ||
48 | + while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) { | ||
49 | do { | ||
50 | if (yield_and_check(job)) { | ||
51 | return 0; | ||
52 | diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/block/dirty-bitmap.c | ||
55 | +++ b/block/dirty-bitmap.c | ||
56 | @@ -XXX,XX +XXX,XX @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) | ||
57 | |||
58 | int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) | ||
59 | { | 19 | { |
60 | - return hbitmap_iter_next(&iter->hbi); | 20 | BlockBackend *blk; |
61 | + return hbitmap_iter_next(&iter->hbi, true); | 21 | BlockBackend *local_blk = NULL; |
62 | } | 22 | - AioContext *aio_context; |
63 | 23 | const char* device = qdict_get_str(qdict, "device"); | |
64 | /* Called within bdrv_dirty_bitmap_lock..unlock */ | 24 | const char* command = qdict_get_str(qdict, "command"); |
65 | diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c | 25 | Error *err = NULL; |
66 | index XXXXXXX..XXXXXXX 100644 | 26 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
67 | --- a/tests/test-hbitmap.c | ||
68 | +++ b/tests/test-hbitmap.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void hbitmap_test_check(TestHBitmapData *data, | ||
70 | |||
71 | i = first; | ||
72 | for (;;) { | ||
73 | - next = hbitmap_iter_next(&hbi); | ||
74 | + next = hbitmap_iter_next(&hbi, true); | ||
75 | if (next < 0) { | ||
76 | next = data->size; | ||
77 | } | ||
78 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data, | ||
79 | /* Note that hbitmap_test_check has to be invoked manually in this test. */ | ||
80 | hbitmap_test_init(data, 131072 << 7, 7); | ||
81 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
82 | - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); | ||
83 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
84 | |||
85 | hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8); | ||
86 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
87 | - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); | ||
88 | - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); | ||
89 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); | ||
90 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
91 | |||
92 | hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); | ||
93 | - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); | ||
94 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
95 | |||
96 | hbitmap_test_set(data, (131072 << 7) - 8, 8); | ||
97 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
98 | - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); | ||
99 | - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); | ||
100 | - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); | ||
101 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); | ||
102 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); | ||
103 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
104 | |||
105 | hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); | ||
106 | - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); | ||
107 | - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); | ||
108 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); | ||
109 | + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
110 | } | ||
111 | |||
112 | static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff) | ||
113 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, | ||
114 | for (i = 0; i < num_positions; i++) { | ||
115 | hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); | ||
116 | hbitmap_iter_init(&iter, data->hb, 0); | ||
117 | - next = hbitmap_iter_next(&iter); | ||
118 | + next = hbitmap_iter_next(&iter, true); | ||
119 | if (i == num_positions - 1) { | ||
120 | g_assert_cmpint(next, ==, -1); | ||
121 | } else { | ||
122 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data, | ||
123 | |||
124 | hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1); | ||
125 | |||
126 | - hbitmap_iter_next(&hbi); | ||
127 | + hbitmap_iter_next(&hbi, true); | ||
128 | |||
129 | hbitmap_reset_all(data->hb); | ||
130 | - hbitmap_iter_next(&hbi); | ||
131 | + hbitmap_iter_next(&hbi, true); | ||
132 | } | ||
133 | |||
134 | static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start) | ||
135 | diff --git a/util/hbitmap.c b/util/hbitmap.c | ||
136 | index XXXXXXX..XXXXXXX 100644 | ||
137 | --- a/util/hbitmap.c | ||
138 | +++ b/util/hbitmap.c | ||
139 | @@ -XXX,XX +XXX,XX @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) | ||
140 | return cur; | ||
141 | } | ||
142 | |||
143 | -int64_t hbitmap_iter_next(HBitmapIter *hbi) | ||
144 | +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance) | ||
145 | { | ||
146 | unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] & | ||
147 | hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos]; | ||
148 | @@ -XXX,XX +XXX,XX @@ int64_t hbitmap_iter_next(HBitmapIter *hbi) | ||
149 | } | 27 | } |
150 | } | 28 | } |
151 | 29 | ||
152 | - /* The next call will resume work from the next bit. */ | 30 | - aio_context = blk_get_aio_context(blk); |
153 | - hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); | 31 | - aio_context_acquire(aio_context); |
154 | + if (advance) { | 32 | - |
155 | + /* The next call will resume work from the next bit. */ | 33 | /* |
156 | + hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); | 34 | * Notably absent: Proper permission management. This is sad, but it seems |
157 | + } else { | 35 | * almost impossible to achieve without changing the semantics and thereby |
158 | + hbi->cur[HBITMAP_LEVELS - 1] = cur; | 36 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
159 | + } | 37 | */ |
160 | item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur); | 38 | qemuio_command(blk, command); |
161 | 39 | ||
162 | return item << hbi->granularity; | 40 | - aio_context_release(aio_context); |
41 | - | ||
42 | fail: | ||
43 | blk_unref(local_blk); | ||
44 | hmp_handle_error(mon, &err); | ||
163 | -- | 45 | -- |
164 | 2.13.6 | 46 | 2.13.6 |
165 | 47 | ||
166 | 48 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | Add a function that wraps hbitmap_iter_next() and always calls it in | 3 | Since bdrv_co_preadv does all neccessary checks including |
4 | non-advancing mode first, and in advancing mode next. The result should | 4 | reading after the end of the backing file, avoid duplication |
5 | always be the same. | 5 | of verification before bdrv_co_preadv call. |
6 | 6 | ||
7 | By using this function everywhere we called hbitmap_iter_next() before, | 7 | Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
8 | we should get good test coverage for non-advancing hbitmap_iter_next(). | 8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | ||
12 | block/qcow2.h | 3 --- | ||
13 | block/qcow2.c | 51 ++++++++------------------------------------------- | ||
14 | 2 files changed, 8 insertions(+), 46 deletions(-) | ||
9 | 15 | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 16 | diff --git a/block/qcow2.h b/block/qcow2.h |
11 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
12 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
13 | Message-id: 20180613181823.13618-9-mreitz@redhat.com | ||
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
15 | --- | ||
16 | tests/test-hbitmap.c | 36 ++++++++++++++++++++++++------------ | ||
17 | 1 file changed, 24 insertions(+), 12 deletions(-) | ||
18 | |||
19 | diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/tests/test-hbitmap.c | 18 | --- a/block/qcow2.h |
22 | +++ b/tests/test-hbitmap.c | 19 | +++ b/block/qcow2.h |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct TestHBitmapData { | 20 | @@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) |
24 | } TestHBitmapData; | ||
25 | |||
26 | |||
27 | +static int64_t check_hbitmap_iter_next(HBitmapIter *hbi) | ||
28 | +{ | ||
29 | + int next0, next1; | ||
30 | + | ||
31 | + next0 = hbitmap_iter_next(hbi, false); | ||
32 | + next1 = hbitmap_iter_next(hbi, true); | ||
33 | + | ||
34 | + g_assert_cmpint(next0, ==, next1); | ||
35 | + | ||
36 | + return next0; | ||
37 | +} | ||
38 | + | ||
39 | /* Check that the HBitmap and the shadow bitmap contain the same data, | ||
40 | * ignoring the same "first" bits. | ||
41 | */ | ||
42 | @@ -XXX,XX +XXX,XX @@ static void hbitmap_test_check(TestHBitmapData *data, | ||
43 | |||
44 | i = first; | ||
45 | for (;;) { | ||
46 | - next = hbitmap_iter_next(&hbi, true); | ||
47 | + next = check_hbitmap_iter_next(&hbi); | ||
48 | if (next < 0) { | ||
49 | next = data->size; | ||
50 | } | ||
51 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data, | ||
52 | /* Note that hbitmap_test_check has to be invoked manually in this test. */ | ||
53 | hbitmap_test_init(data, 131072 << 7, 7); | ||
54 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
55 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
56 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); | ||
57 | |||
58 | hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8); | ||
59 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
60 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); | ||
61 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
62 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); | ||
63 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); | ||
64 | |||
65 | hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); | ||
66 | g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
67 | |||
68 | hbitmap_test_set(data, (131072 << 7) - 8, 8); | ||
69 | hbitmap_iter_init(&hbi, data->hb, 0); | ||
70 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); | ||
71 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); | ||
72 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
73 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); | ||
74 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7); | ||
75 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); | ||
76 | |||
77 | hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); | ||
78 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); | ||
79 | - g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); | ||
80 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7); | ||
81 | + g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0); | ||
82 | } | 21 | } |
83 | 22 | ||
84 | static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff) | 23 | /* qcow2.c functions */ |
85 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, | 24 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
86 | for (i = 0; i < num_positions; i++) { | 25 | - int64_t sector_num, int nb_sectors); |
87 | hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); | 26 | - |
88 | hbitmap_iter_init(&iter, data->hb, 0); | 27 | int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, |
89 | - next = hbitmap_iter_next(&iter, true); | 28 | int refcount_order, bool generous_increase, |
90 | + next = check_hbitmap_iter_next(&iter); | 29 | uint64_t *refblock_count); |
91 | if (i == num_positions - 1) { | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
92 | g_assert_cmpint(next, ==, -1); | 31 | index XXXXXXX..XXXXXXX 100644 |
93 | } else { | 32 | --- a/block/qcow2.c |
94 | @@ -XXX,XX +XXX,XX @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data, | 33 | +++ b/block/qcow2.c |
95 | 34 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, | |
96 | hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1); | 35 | return status; |
97 | |||
98 | - hbitmap_iter_next(&hbi, true); | ||
99 | + check_hbitmap_iter_next(&hbi); | ||
100 | |||
101 | hbitmap_reset_all(data->hb); | ||
102 | - hbitmap_iter_next(&hbi, true); | ||
103 | + check_hbitmap_iter_next(&hbi); | ||
104 | } | 36 | } |
105 | 37 | ||
106 | static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start) | 38 | -/* handle reading after the end of the backing file */ |
39 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, | ||
40 | - int64_t offset, int bytes) | ||
41 | -{ | ||
42 | - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; | ||
43 | - int n1; | ||
44 | - | ||
45 | - if ((offset + bytes) <= bs_size) { | ||
46 | - return bytes; | ||
47 | - } | ||
48 | - | ||
49 | - if (offset >= bs_size) { | ||
50 | - n1 = 0; | ||
51 | - } else { | ||
52 | - n1 = bs_size - offset; | ||
53 | - } | ||
54 | - | ||
55 | - qemu_iovec_memset(qiov, n1, 0, bytes - n1); | ||
56 | - | ||
57 | - return n1; | ||
58 | -} | ||
59 | - | ||
60 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
61 | uint64_t bytes, QEMUIOVector *qiov, | ||
62 | int flags) | ||
63 | { | ||
64 | BDRVQcow2State *s = bs->opaque; | ||
65 | - int offset_in_cluster, n1; | ||
66 | + int offset_in_cluster; | ||
67 | int ret; | ||
68 | unsigned int cur_bytes; /* number of bytes in current iteration */ | ||
69 | uint64_t cluster_offset = 0; | ||
70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
71 | case QCOW2_CLUSTER_UNALLOCATED: | ||
72 | |||
73 | if (bs->backing) { | ||
74 | - /* read from the base image */ | ||
75 | - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, | ||
76 | - offset, cur_bytes); | ||
77 | - if (n1 > 0) { | ||
78 | - QEMUIOVector local_qiov; | ||
79 | - | ||
80 | - qemu_iovec_init(&local_qiov, hd_qiov.niov); | ||
81 | - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); | ||
82 | - | ||
83 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
84 | - qemu_co_mutex_unlock(&s->lock); | ||
85 | - ret = bdrv_co_preadv(bs->backing, offset, n1, | ||
86 | - &local_qiov, 0); | ||
87 | - qemu_co_mutex_lock(&s->lock); | ||
88 | - | ||
89 | - qemu_iovec_destroy(&local_qiov); | ||
90 | - | ||
91 | - if (ret < 0) { | ||
92 | - goto fail; | ||
93 | - } | ||
94 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
95 | + qemu_co_mutex_unlock(&s->lock); | ||
96 | + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, | ||
97 | + &hd_qiov, 0); | ||
98 | + qemu_co_mutex_lock(&s->lock); | ||
99 | + if (ret < 0) { | ||
100 | + goto fail; | ||
101 | } | ||
102 | } else { | ||
103 | /* Note: in this case, no need to wait */ | ||
107 | -- | 104 | -- |
108 | 2.13.6 | 105 | 2.13.6 |
109 | 106 | ||
110 | 107 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Removing a quorum child node with x-blockdev-change results in a quorum |
---|---|---|---|
2 | driver state that cannot be recreated with create options because it | ||
3 | would require a list with gaps. This causes trouble in at least | ||
4 | .bdrv_refresh_filename(). | ||
2 | 5 | ||
3 | This patch implements active synchronous mirroring. In active mode, the | 6 | Document this problem so that we won't accidentally mark the command |
4 | passive mechanism will still be in place and is used to copy all | 7 | stable without having addressed it. |
5 | initially dirty clusters off the source disk; but every write request | ||
6 | will write data both to the source and the target disk, so the source | ||
7 | cannot be dirtied faster than data is mirrored to the target. Also, | ||
8 | once the block job has converged (BLOCK_JOB_READY sent), source and | ||
9 | target are guaranteed to stay in sync (unless an error occurs). | ||
10 | 8 | ||
11 | Active mode is completely optional and currently disabled at runtime. A | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | later patch will add a way for users to enable it. | 10 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
13 | |||
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
15 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
16 | Message-id: 20180613181823.13618-13-mreitz@redhat.com | ||
17 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
18 | --- | 11 | --- |
19 | qapi/block-core.json | 18 ++++ | 12 | qapi/block-core.json | 4 ++++ |
20 | block/mirror.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++- | 13 | 1 file changed, 4 insertions(+) |
21 | 2 files changed, 265 insertions(+), 5 deletions(-) | ||
22 | 14 | ||
23 | diff --git a/qapi/block-core.json b/qapi/block-core.json | 15 | diff --git a/qapi/block-core.json b/qapi/block-core.json |
24 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/qapi/block-core.json | 17 | --- a/qapi/block-core.json |
26 | +++ b/qapi/block-core.json | 18 | +++ b/qapi/block-core.json |
27 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
28 | 'data': ['top', 'full', 'none', 'incremental'] } | 20 | # does not support all kinds of operations, all kinds of children, nor |
29 | 21 | # all block drivers. | |
30 | ## | 22 | # |
31 | +# @MirrorCopyMode: | 23 | +# FIXME Removing children from a quorum node means introducing gaps in the |
24 | +# child indices. This cannot be represented in the 'children' list of | ||
25 | +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). | ||
32 | +# | 26 | +# |
33 | +# An enumeration whose values tell the mirror block job when to | 27 | # Warning: The data in a new quorum child MUST be consistent with that of |
34 | +# trigger writes to the target. | 28 | # the rest of the array. |
35 | +# | ||
36 | +# @background: copy data in background only. | ||
37 | +# | ||
38 | +# @write-blocking: when data is written to the source, write it | ||
39 | +# (synchronously) to the target as well. In | ||
40 | +# addition, data is copied in background just like in | ||
41 | +# @background mode. | ||
42 | +# | ||
43 | +# Since: 3.0 | ||
44 | +## | ||
45 | +{ 'enum': 'MirrorCopyMode', | ||
46 | + 'data': ['background', 'write-blocking'] } | ||
47 | + | ||
48 | +## | ||
49 | # @BlockJobInfo: | ||
50 | # | 29 | # |
51 | # Information about a long-running block device operation. | ||
52 | diff --git a/block/mirror.c b/block/mirror.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/block/mirror.c | ||
55 | +++ b/block/mirror.c | ||
56 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob { | ||
57 | Error *replace_blocker; | ||
58 | bool is_none_mode; | ||
59 | BlockMirrorBackingMode backing_mode; | ||
60 | + MirrorCopyMode copy_mode; | ||
61 | BlockdevOnError on_source_error, on_target_error; | ||
62 | bool synced; | ||
63 | + /* Set when the target is synced (dirty bitmap is clean, nothing | ||
64 | + * in flight) and the job is running in active mode */ | ||
65 | + bool actively_synced; | ||
66 | bool should_complete; | ||
67 | int64_t granularity; | ||
68 | size_t buf_size; | ||
69 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob { | ||
70 | int target_cluster_size; | ||
71 | int max_iov; | ||
72 | bool initial_zeroing_ongoing; | ||
73 | + int in_active_write_counter; | ||
74 | } MirrorBlockJob; | ||
75 | |||
76 | typedef struct MirrorBDSOpaque { | ||
77 | @@ -XXX,XX +XXX,XX @@ struct MirrorOp { | ||
78 | int64_t *bytes_handled; | ||
79 | |||
80 | bool is_pseudo_op; | ||
81 | + bool is_active_write; | ||
82 | CoQueue waiting_requests; | ||
83 | |||
84 | QTAILQ_ENTRY(MirrorOp) next; | ||
85 | @@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, | ||
86 | int error) | ||
87 | { | ||
88 | s->synced = false; | ||
89 | + s->actively_synced = false; | ||
90 | if (read) { | ||
91 | return block_job_error_action(&s->common, s->on_source_error, | ||
92 | true, error); | ||
93 | @@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | -static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) | ||
98 | +static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) | ||
99 | { | ||
100 | MirrorOp *op; | ||
101 | |||
102 | @@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) | ||
103 | * caller of this function. Since there is only one pseudo op | ||
104 | * at any given time, we will always find some real operation | ||
105 | * to wait on. */ | ||
106 | - if (!op->is_pseudo_op) { | ||
107 | + if (!op->is_pseudo_op && op->is_active_write == active) { | ||
108 | qemu_co_queue_wait(&op->waiting_requests, NULL); | ||
109 | return; | ||
110 | } | ||
111 | @@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) | ||
112 | abort(); | ||
113 | } | ||
114 | |||
115 | +static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) | ||
116 | +{ | ||
117 | + /* Only non-active operations use up in-flight slots */ | ||
118 | + mirror_wait_for_any_operation(s, false); | ||
119 | +} | ||
120 | + | ||
121 | /* Perform a mirror copy operation. | ||
122 | * | ||
123 | * *op->bytes_handled is set to the number of bytes copied after and | ||
124 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque) | ||
125 | /* Transition to the READY state and wait for complete. */ | ||
126 | job_transition_to_ready(&s->common.job); | ||
127 | s->synced = true; | ||
128 | + s->actively_synced = true; | ||
129 | while (!job_is_cancelled(&s->common.job) && !s->should_complete) { | ||
130 | job_yield(&s->common.job); | ||
131 | } | ||
132 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque) | ||
133 | int64_t cnt, delta; | ||
134 | bool should_complete; | ||
135 | |||
136 | + /* Do not start passive operations while there are active | ||
137 | + * writes in progress */ | ||
138 | + while (s->in_active_write_counter) { | ||
139 | + mirror_wait_for_any_operation(s, true); | ||
140 | + } | ||
141 | + | ||
142 | if (s->ret < 0) { | ||
143 | ret = s->ret; | ||
144 | goto immediate_exit; | ||
145 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque) | ||
146 | */ | ||
147 | job_transition_to_ready(&s->common.job); | ||
148 | s->synced = true; | ||
149 | + if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) { | ||
150 | + s->actively_synced = true; | ||
151 | + } | ||
152 | } | ||
153 | |||
154 | should_complete = s->should_complete || | ||
155 | @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = { | ||
156 | .drain = mirror_drain, | ||
157 | }; | ||
158 | |||
159 | +static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, | ||
160 | + uint64_t offset, uint64_t bytes, | ||
161 | + QEMUIOVector *qiov, int flags) | ||
162 | +{ | ||
163 | + BdrvDirtyBitmapIter *iter; | ||
164 | + QEMUIOVector target_qiov; | ||
165 | + uint64_t dirty_offset; | ||
166 | + int dirty_bytes; | ||
167 | + | ||
168 | + if (qiov) { | ||
169 | + qemu_iovec_init(&target_qiov, qiov->niov); | ||
170 | + } | ||
171 | + | ||
172 | + iter = bdrv_dirty_iter_new(job->dirty_bitmap); | ||
173 | + bdrv_set_dirty_iter(iter, offset); | ||
174 | + | ||
175 | + while (true) { | ||
176 | + bool valid_area; | ||
177 | + int ret; | ||
178 | + | ||
179 | + bdrv_dirty_bitmap_lock(job->dirty_bitmap); | ||
180 | + valid_area = bdrv_dirty_iter_next_area(iter, offset + bytes, | ||
181 | + &dirty_offset, &dirty_bytes); | ||
182 | + if (!valid_area) { | ||
183 | + bdrv_dirty_bitmap_unlock(job->dirty_bitmap); | ||
184 | + break; | ||
185 | + } | ||
186 | + | ||
187 | + bdrv_reset_dirty_bitmap_locked(job->dirty_bitmap, | ||
188 | + dirty_offset, dirty_bytes); | ||
189 | + bdrv_dirty_bitmap_unlock(job->dirty_bitmap); | ||
190 | + | ||
191 | + job_progress_increase_remaining(&job->common.job, dirty_bytes); | ||
192 | + | ||
193 | + assert(dirty_offset - offset <= SIZE_MAX); | ||
194 | + if (qiov) { | ||
195 | + qemu_iovec_reset(&target_qiov); | ||
196 | + qemu_iovec_concat(&target_qiov, qiov, | ||
197 | + dirty_offset - offset, dirty_bytes); | ||
198 | + } | ||
199 | + | ||
200 | + switch (method) { | ||
201 | + case MIRROR_METHOD_COPY: | ||
202 | + ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes, | ||
203 | + qiov ? &target_qiov : NULL, flags); | ||
204 | + break; | ||
205 | + | ||
206 | + case MIRROR_METHOD_ZERO: | ||
207 | + assert(!qiov); | ||
208 | + ret = blk_co_pwrite_zeroes(job->target, dirty_offset, dirty_bytes, | ||
209 | + flags); | ||
210 | + break; | ||
211 | + | ||
212 | + case MIRROR_METHOD_DISCARD: | ||
213 | + assert(!qiov); | ||
214 | + ret = blk_co_pdiscard(job->target, dirty_offset, dirty_bytes); | ||
215 | + break; | ||
216 | + | ||
217 | + default: | ||
218 | + abort(); | ||
219 | + } | ||
220 | + | ||
221 | + if (ret >= 0) { | ||
222 | + job_progress_update(&job->common.job, dirty_bytes); | ||
223 | + } else { | ||
224 | + BlockErrorAction action; | ||
225 | + | ||
226 | + bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_offset, dirty_bytes); | ||
227 | + job->actively_synced = false; | ||
228 | + | ||
229 | + action = mirror_error_action(job, false, -ret); | ||
230 | + if (action == BLOCK_ERROR_ACTION_REPORT) { | ||
231 | + if (!job->ret) { | ||
232 | + job->ret = ret; | ||
233 | + } | ||
234 | + break; | ||
235 | + } | ||
236 | + } | ||
237 | + } | ||
238 | + | ||
239 | + bdrv_dirty_iter_free(iter); | ||
240 | + if (qiov) { | ||
241 | + qemu_iovec_destroy(&target_qiov); | ||
242 | + } | ||
243 | +} | ||
244 | + | ||
245 | +static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, | ||
246 | + uint64_t offset, | ||
247 | + uint64_t bytes) | ||
248 | +{ | ||
249 | + MirrorOp *op; | ||
250 | + uint64_t start_chunk = offset / s->granularity; | ||
251 | + uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); | ||
252 | + | ||
253 | + op = g_new(MirrorOp, 1); | ||
254 | + *op = (MirrorOp){ | ||
255 | + .s = s, | ||
256 | + .offset = offset, | ||
257 | + .bytes = bytes, | ||
258 | + .is_active_write = true, | ||
259 | + }; | ||
260 | + qemu_co_queue_init(&op->waiting_requests); | ||
261 | + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); | ||
262 | + | ||
263 | + s->in_active_write_counter++; | ||
264 | + | ||
265 | + mirror_wait_on_conflicts(op, s, offset, bytes); | ||
266 | + | ||
267 | + bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); | ||
268 | + | ||
269 | + return op; | ||
270 | +} | ||
271 | + | ||
272 | +static void coroutine_fn active_write_settle(MirrorOp *op) | ||
273 | +{ | ||
274 | + uint64_t start_chunk = op->offset / op->s->granularity; | ||
275 | + uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes, | ||
276 | + op->s->granularity); | ||
277 | + | ||
278 | + if (!--op->s->in_active_write_counter && op->s->actively_synced) { | ||
279 | + BdrvChild *source = op->s->mirror_top_bs->backing; | ||
280 | + | ||
281 | + if (QLIST_FIRST(&source->bs->parents) == source && | ||
282 | + QLIST_NEXT(source, next_parent) == NULL) | ||
283 | + { | ||
284 | + /* Assert that we are back in sync once all active write | ||
285 | + * operations are settled. | ||
286 | + * Note that we can only assert this if the mirror node | ||
287 | + * is the source node's only parent. */ | ||
288 | + assert(!bdrv_get_dirty_count(op->s->dirty_bitmap)); | ||
289 | + } | ||
290 | + } | ||
291 | + bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); | ||
292 | + QTAILQ_REMOVE(&op->s->ops_in_flight, op, next); | ||
293 | + qemu_co_queue_restart_all(&op->waiting_requests); | ||
294 | + g_free(op); | ||
295 | +} | ||
296 | + | ||
297 | static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, | ||
298 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) | ||
299 | { | ||
300 | return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); | ||
301 | } | ||
302 | |||
303 | +static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, | ||
304 | + MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, | ||
305 | + int flags) | ||
306 | +{ | ||
307 | + MirrorOp *op = NULL; | ||
308 | + MirrorBDSOpaque *s = bs->opaque; | ||
309 | + int ret = 0; | ||
310 | + bool copy_to_target; | ||
311 | + | ||
312 | + copy_to_target = s->job->ret >= 0 && | ||
313 | + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; | ||
314 | + | ||
315 | + if (copy_to_target) { | ||
316 | + op = active_write_prepare(s->job, offset, bytes); | ||
317 | + } | ||
318 | + | ||
319 | + switch (method) { | ||
320 | + case MIRROR_METHOD_COPY: | ||
321 | + ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); | ||
322 | + break; | ||
323 | + | ||
324 | + case MIRROR_METHOD_ZERO: | ||
325 | + ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); | ||
326 | + break; | ||
327 | + | ||
328 | + case MIRROR_METHOD_DISCARD: | ||
329 | + ret = bdrv_co_pdiscard(bs->backing->bs, offset, bytes); | ||
330 | + break; | ||
331 | + | ||
332 | + default: | ||
333 | + abort(); | ||
334 | + } | ||
335 | + | ||
336 | + if (ret < 0) { | ||
337 | + goto out; | ||
338 | + } | ||
339 | + | ||
340 | + if (copy_to_target) { | ||
341 | + do_sync_target_write(s->job, method, offset, bytes, qiov, flags); | ||
342 | + } | ||
343 | + | ||
344 | +out: | ||
345 | + if (copy_to_target) { | ||
346 | + active_write_settle(op); | ||
347 | + } | ||
348 | + return ret; | ||
349 | +} | ||
350 | + | ||
351 | static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, | ||
352 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) | ||
353 | { | ||
354 | - return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); | ||
355 | + MirrorBDSOpaque *s = bs->opaque; | ||
356 | + QEMUIOVector bounce_qiov; | ||
357 | + void *bounce_buf; | ||
358 | + int ret = 0; | ||
359 | + bool copy_to_target; | ||
360 | + | ||
361 | + copy_to_target = s->job->ret >= 0 && | ||
362 | + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; | ||
363 | + | ||
364 | + if (copy_to_target) { | ||
365 | + /* The guest might concurrently modify the data to write; but | ||
366 | + * the data on source and destination must match, so we have | ||
367 | + * to use a bounce buffer if we are going to write to the | ||
368 | + * target now. */ | ||
369 | + bounce_buf = qemu_blockalign(bs, bytes); | ||
370 | + iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes); | ||
371 | + | ||
372 | + qemu_iovec_init(&bounce_qiov, 1); | ||
373 | + qemu_iovec_add(&bounce_qiov, bounce_buf, bytes); | ||
374 | + qiov = &bounce_qiov; | ||
375 | + } | ||
376 | + | ||
377 | + ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov, | ||
378 | + flags); | ||
379 | + | ||
380 | + if (copy_to_target) { | ||
381 | + qemu_iovec_destroy(&bounce_qiov); | ||
382 | + qemu_vfree(bounce_buf); | ||
383 | + } | ||
384 | + | ||
385 | + return ret; | ||
386 | } | ||
387 | |||
388 | static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) | ||
389 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) | ||
390 | static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, | ||
391 | int64_t offset, int bytes, BdrvRequestFlags flags) | ||
392 | { | ||
393 | - return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); | ||
394 | + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL, | ||
395 | + flags); | ||
396 | } | ||
397 | |||
398 | static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, | ||
399 | int64_t offset, int bytes) | ||
400 | { | ||
401 | - return bdrv_co_pdiscard(bs->backing->bs, offset, bytes); | ||
402 | + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes, | ||
403 | + NULL, 0); | ||
404 | } | ||
405 | |||
406 | static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) | ||
407 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | ||
408 | s->on_target_error = on_target_error; | ||
409 | s->is_none_mode = is_none_mode; | ||
410 | s->backing_mode = backing_mode; | ||
411 | + s->copy_mode = MIRROR_COPY_MODE_BACKGROUND; | ||
412 | s->base = base; | ||
413 | s->granularity = granularity; | ||
414 | s->buf_size = ROUND_UP(buf_size, granularity); | ||
415 | -- | 30 | -- |
416 | 2.13.6 | 31 | 2.13.6 |
417 | 32 | ||
418 | 33 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Doug Gale <doug16k@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 3 | Add trace output for commands, errors, and undefined behavior. |
4 | Reviewed-by: Fam Zheng <famz@redhat.com> | 4 | Add guest error log output for undefined behavior. |
5 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 5 | Report invalid undefined accesses to MMIO. |
6 | Message-id: 20180613181823.13618-15-mreitz@redhat.com | 6 | Annotate unlikely error checks with unlikely. |
7 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 7 | |
8 | Signed-off-by: Doug Gale <doug16k@gmail.com> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | --- | 12 | --- |
9 | tests/qemu-iotests/151 | 120 +++++++++++++++++++++++++++++++++++++++++++++ | 13 | hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++-------- |
10 | tests/qemu-iotests/151.out | 5 ++ | 14 | hw/block/trace-events | 93 ++++++++++++++ |
11 | tests/qemu-iotests/group | 1 + | 15 | 2 files changed, 390 insertions(+), 52 deletions(-) |
12 | 3 files changed, 126 insertions(+) | ||
13 | create mode 100755 tests/qemu-iotests/151 | ||
14 | create mode 100644 tests/qemu-iotests/151.out | ||
15 | 16 | ||
16 | diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151 | 17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c |
17 | new file mode 100755 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | index XXXXXXX..XXXXXXX | 19 | --- a/hw/block/nvme.c |
19 | --- /dev/null | 20 | +++ b/hw/block/nvme.c |
20 | +++ b/tests/qemu-iotests/151 | ||
21 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
22 | +#!/usr/bin/env python | 22 | #include "qapi/visitor.h" |
23 | +# | 23 | #include "sysemu/block-backend.h" |
24 | +# Tests for active mirroring | 24 | |
25 | +# | 25 | +#include "qemu/log.h" |
26 | +# Copyright (C) 2018 Red Hat, Inc. | 26 | +#include "trace.h" |
27 | +# | 27 | #include "nvme.h" |
28 | +# This program is free software; you can redistribute it and/or modify | 28 | |
29 | +# it under the terms of the GNU General Public License as published by | 29 | +#define NVME_GUEST_ERR(trace, fmt, ...) \ |
30 | +# the Free Software Foundation; either version 2 of the License, or | 30 | + do { \ |
31 | +# (at your option) any later version. | 31 | + (trace_##trace)(__VA_ARGS__); \ |
32 | +# | 32 | + qemu_log_mask(LOG_GUEST_ERROR, #trace \ |
33 | +# This program is distributed in the hope that it will be useful, | 33 | + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ |
34 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | 34 | + } while (0) |
35 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 35 | + |
36 | +# GNU General Public License for more details. | 36 | static void nvme_process_sq(void *opaque); |
37 | +# | 37 | |
38 | +# You should have received a copy of the GNU General Public License | 38 | static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) |
39 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | 39 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) |
40 | +# | 40 | { |
41 | + | 41 | if (cq->irq_enabled) { |
42 | +import os | 42 | if (msix_enabled(&(n->parent_obj))) { |
43 | +import iotests | 43 | + trace_nvme_irq_msix(cq->vector); |
44 | +from iotests import qemu_img | 44 | msix_notify(&(n->parent_obj), cq->vector); |
45 | + | 45 | } else { |
46 | +source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt) | 46 | + trace_nvme_irq_pin(); |
47 | +target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) | 47 | pci_irq_pulse(&n->parent_obj); |
48 | + | 48 | } |
49 | +class TestActiveMirror(iotests.QMPTestCase): | 49 | + } else { |
50 | + image_len = 128 * 1024 * 1024 # MB | 50 | + trace_nvme_irq_masked(); |
51 | + potential_writes_in_flight = True | 51 | } |
52 | + | 52 | } |
53 | + def setUp(self): | 53 | |
54 | + qemu_img('create', '-f', iotests.imgfmt, source_img, '128M') | 54 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, |
55 | + qemu_img('create', '-f', iotests.imgfmt, target_img, '128M') | 55 | trans_len = MIN(len, trans_len); |
56 | + | 56 | int num_prps = (len >> n->page_bits) + 1; |
57 | + blk_source = {'id': 'source', | 57 | |
58 | + 'if': 'none', | 58 | - if (!prp1) { |
59 | + 'node-name': 'source-node', | 59 | + if (unlikely(!prp1)) { |
60 | + 'driver': iotests.imgfmt, | 60 | + trace_nvme_err_invalid_prp(); |
61 | + 'file': {'driver': 'file', | 61 | return NVME_INVALID_FIELD | NVME_DNR; |
62 | + 'filename': source_img}} | 62 | } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && |
63 | + | 63 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { |
64 | + blk_target = {'node-name': 'target-node', | 64 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, |
65 | + 'driver': iotests.imgfmt, | 65 | } |
66 | + 'file': {'driver': 'file', | 66 | len -= trans_len; |
67 | + 'filename': target_img}} | 67 | if (len) { |
68 | + | 68 | - if (!prp2) { |
69 | + self.vm = iotests.VM() | 69 | + if (unlikely(!prp2)) { |
70 | + self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source)) | 70 | + trace_nvme_err_invalid_prp2_missing(); |
71 | + self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target)) | 71 | goto unmap; |
72 | + self.vm.add_device('virtio-blk,drive=source') | 72 | } |
73 | + self.vm.launch() | 73 | if (len > n->page_size) { |
74 | + | 74 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, |
75 | + def tearDown(self): | 75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); |
76 | + self.vm.shutdown() | 76 | |
77 | + | 77 | if (i == n->max_prp_ents - 1 && len > n->page_size) { |
78 | + if not self.potential_writes_in_flight: | 78 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { |
79 | + self.assertTrue(iotests.compare_images(source_img, target_img), | 79 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { |
80 | + 'mirror target does not match source') | 80 | + trace_nvme_err_invalid_prplist_ent(prp_ent); |
81 | + | 81 | goto unmap; |
82 | + os.remove(source_img) | 82 | } |
83 | + os.remove(target_img) | 83 | |
84 | + | 84 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, |
85 | + def doActiveIO(self, sync_source_and_target): | 85 | prp_ent = le64_to_cpu(prp_list[i]); |
86 | + # Fill the source image | 86 | } |
87 | + self.vm.hmp_qemu_io('source', | 87 | |
88 | + 'write -P 1 0 %i' % self.image_len); | 88 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { |
89 | + | 89 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { |
90 | + # Start some background requests | 90 | + trace_nvme_err_invalid_prplist_ent(prp_ent); |
91 | + for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024): | 91 | goto unmap; |
92 | + self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset) | 92 | } |
93 | + for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024): | 93 | |
94 | + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) | 94 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, |
95 | + | 95 | i++; |
96 | + # Start the block job | 96 | } |
97 | + result = self.vm.qmp('blockdev-mirror', | 97 | } else { |
98 | + job_id='mirror', | 98 | - if (prp2 & (n->page_size - 1)) { |
99 | + filter_node_name='mirror-node', | 99 | + if (unlikely(prp2 & (n->page_size - 1))) { |
100 | + device='source-node', | 100 | + trace_nvme_err_invalid_prp2_align(prp2); |
101 | + target='target-node', | 101 | goto unmap; |
102 | + sync='full', | 102 | } |
103 | + copy_mode='write-blocking') | 103 | if (qsg->nsg) { |
104 | + self.assert_qmp(result, 'return', {}) | 104 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, |
105 | + | 105 | QEMUIOVector iov; |
106 | + # Start some more requests | 106 | uint16_t status = NVME_SUCCESS; |
107 | + for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024): | 107 | |
108 | + self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset) | 108 | + trace_nvme_dma_read(prp1, prp2); |
109 | + for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024): | 109 | + |
110 | + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) | 110 | if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { |
111 | + | 111 | return NVME_INVALID_FIELD | NVME_DNR; |
112 | + # Wait for the READY event | 112 | } |
113 | + self.wait_ready(drive='mirror') | 113 | if (qsg.nsg > 0) { |
114 | + | 114 | - if (dma_buf_read(ptr, len, &qsg)) { |
115 | + # Now start some final requests; all of these (which land on | 115 | + if (unlikely(dma_buf_read(ptr, len, &qsg))) { |
116 | + # the source) should be settled using the active mechanism. | 116 | + trace_nvme_err_invalid_dma(); |
117 | + # The mirror code itself asserts that the source BDS's dirty | 117 | status = NVME_INVALID_FIELD | NVME_DNR; |
118 | + # bitmap will stay clean between READY and COMPLETED. | 118 | } |
119 | + for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024): | 119 | qemu_sglist_destroy(&qsg); |
120 | + self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset) | 120 | } else { |
121 | + for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024): | 121 | - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { |
122 | + self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset) | 122 | + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { |
123 | + | 123 | + trace_nvme_err_invalid_dma(); |
124 | + if sync_source_and_target: | 124 | status = NVME_INVALID_FIELD | NVME_DNR; |
125 | + # If source and target should be in sync after the mirror, | 125 | } |
126 | + # we have to flush before completion | 126 | qemu_iovec_destroy(&iov); |
127 | + self.vm.hmp_qemu_io('source', 'aio_flush') | 127 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, |
128 | + self.potential_writes_in_flight = False | 128 | uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); |
129 | + | 129 | uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); |
130 | + self.complete_and_wait(drive='mirror', wait_ready=False) | 130 | |
131 | + | 131 | - if (slba + nlb > ns->id_ns.nsze) { |
132 | + def testActiveIO(self): | 132 | + if (unlikely(slba + nlb > ns->id_ns.nsze)) { |
133 | + self.doActiveIO(False) | 133 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); |
134 | + | 134 | return NVME_LBA_RANGE | NVME_DNR; |
135 | + def testActiveIOFlushed(self): | 135 | } |
136 | + self.doActiveIO(True) | 136 | |
137 | + | 137 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, |
138 | + | 138 | int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; |
139 | + | 139 | enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; |
140 | +if __name__ == '__main__': | 140 | |
141 | + iotests.main(supported_fmts=['qcow2', 'raw']) | 141 | - if ((slba + nlb) > ns->id_ns.nsze) { |
142 | diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out | 142 | + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); |
143 | new file mode 100644 | 143 | + |
144 | index XXXXXXX..XXXXXXX | 144 | + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { |
145 | --- /dev/null | 145 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); |
146 | +++ b/tests/qemu-iotests/151.out | 146 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); |
147 | @@ -XXX,XX +XXX,XX @@ | 147 | return NVME_LBA_RANGE | NVME_DNR; |
148 | +.. | 148 | } |
149 | +---------------------------------------------------------------------- | 149 | |
150 | +Ran 2 tests | 150 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) |
151 | + | 151 | NvmeNamespace *ns; |
152 | +OK | 152 | uint32_t nsid = le32_to_cpu(cmd->nsid); |
153 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | 153 | |
154 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
155 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
156 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
157 | return NVME_INVALID_NSID | NVME_DNR; | ||
158 | } | ||
159 | |||
160 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
161 | case NVME_CMD_READ: | ||
162 | return nvme_rw(n, ns, cmd, req); | ||
163 | default: | ||
164 | + trace_nvme_err_invalid_opc(cmd->opcode); | ||
165 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
166 | } | ||
167 | } | ||
168 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
169 | NvmeCQueue *cq; | ||
170 | uint16_t qid = le16_to_cpu(c->qid); | ||
171 | |||
172 | - if (!qid || nvme_check_sqid(n, qid)) { | ||
173 | + if (unlikely(!qid || nvme_check_sqid(n, qid))) { | ||
174 | + trace_nvme_err_invalid_del_sq(qid); | ||
175 | return NVME_INVALID_QID | NVME_DNR; | ||
176 | } | ||
177 | |||
178 | + trace_nvme_del_sq(qid); | ||
179 | + | ||
180 | sq = n->sq[qid]; | ||
181 | while (!QTAILQ_EMPTY(&sq->out_req_list)) { | ||
182 | req = QTAILQ_FIRST(&sq->out_req_list); | ||
183 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
184 | uint16_t qflags = le16_to_cpu(c->sq_flags); | ||
185 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
186 | |||
187 | - if (!cqid || nvme_check_cqid(n, cqid)) { | ||
188 | + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); | ||
189 | + | ||
190 | + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { | ||
191 | + trace_nvme_err_invalid_create_sq_cqid(cqid); | ||
192 | return NVME_INVALID_CQID | NVME_DNR; | ||
193 | } | ||
194 | - if (!sqid || !nvme_check_sqid(n, sqid)) { | ||
195 | + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { | ||
196 | + trace_nvme_err_invalid_create_sq_sqid(sqid); | ||
197 | return NVME_INVALID_QID | NVME_DNR; | ||
198 | } | ||
199 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
200 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
201 | + trace_nvme_err_invalid_create_sq_size(qsize); | ||
202 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
203 | } | ||
204 | - if (!prp1 || prp1 & (n->page_size - 1)) { | ||
205 | + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { | ||
206 | + trace_nvme_err_invalid_create_sq_addr(prp1); | ||
207 | return NVME_INVALID_FIELD | NVME_DNR; | ||
208 | } | ||
209 | - if (!(NVME_SQ_FLAGS_PC(qflags))) { | ||
210 | + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { | ||
211 | + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); | ||
212 | return NVME_INVALID_FIELD | NVME_DNR; | ||
213 | } | ||
214 | sq = g_malloc0(sizeof(*sq)); | ||
215 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
216 | NvmeCQueue *cq; | ||
217 | uint16_t qid = le16_to_cpu(c->qid); | ||
218 | |||
219 | - if (!qid || nvme_check_cqid(n, qid)) { | ||
220 | + if (unlikely(!qid || nvme_check_cqid(n, qid))) { | ||
221 | + trace_nvme_err_invalid_del_cq_cqid(qid); | ||
222 | return NVME_INVALID_CQID | NVME_DNR; | ||
223 | } | ||
224 | |||
225 | cq = n->cq[qid]; | ||
226 | - if (!QTAILQ_EMPTY(&cq->sq_list)) { | ||
227 | + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { | ||
228 | + trace_nvme_err_invalid_del_cq_notempty(qid); | ||
229 | return NVME_INVALID_QUEUE_DEL; | ||
230 | } | ||
231 | + trace_nvme_del_cq(qid); | ||
232 | nvme_free_cq(cq, n); | ||
233 | return NVME_SUCCESS; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
236 | uint16_t qflags = le16_to_cpu(c->cq_flags); | ||
237 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
238 | |||
239 | - if (!cqid || !nvme_check_cqid(n, cqid)) { | ||
240 | + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, | ||
241 | + NVME_CQ_FLAGS_IEN(qflags) != 0); | ||
242 | + | ||
243 | + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { | ||
244 | + trace_nvme_err_invalid_create_cq_cqid(cqid); | ||
245 | return NVME_INVALID_CQID | NVME_DNR; | ||
246 | } | ||
247 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
248 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
249 | + trace_nvme_err_invalid_create_cq_size(qsize); | ||
250 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
251 | } | ||
252 | - if (!prp1) { | ||
253 | + if (unlikely(!prp1)) { | ||
254 | + trace_nvme_err_invalid_create_cq_addr(prp1); | ||
255 | return NVME_INVALID_FIELD | NVME_DNR; | ||
256 | } | ||
257 | - if (vector > n->num_queues) { | ||
258 | + if (unlikely(vector > n->num_queues)) { | ||
259 | + trace_nvme_err_invalid_create_cq_vector(vector); | ||
260 | return NVME_INVALID_IRQ_VECTOR | NVME_DNR; | ||
261 | } | ||
262 | - if (!(NVME_CQ_FLAGS_PC(qflags))) { | ||
263 | + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { | ||
264 | + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); | ||
265 | return NVME_INVALID_FIELD | NVME_DNR; | ||
266 | } | ||
267 | |||
268 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) | ||
269 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
270 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
271 | |||
272 | + trace_nvme_identify_ctrl(); | ||
273 | + | ||
274 | return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), | ||
275 | prp1, prp2); | ||
276 | } | ||
277 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) | ||
278 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
279 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
280 | |||
281 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
282 | + trace_nvme_identify_ns(nsid); | ||
283 | + | ||
284 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
285 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
286 | return NVME_INVALID_NSID | NVME_DNR; | ||
287 | } | ||
288 | |||
289 | ns = &n->namespaces[nsid - 1]; | ||
290 | + | ||
291 | return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), | ||
292 | prp1, prp2); | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) | ||
295 | uint16_t ret; | ||
296 | int i, j = 0; | ||
297 | |||
298 | + trace_nvme_identify_nslist(min_nsid); | ||
299 | + | ||
300 | list = g_malloc0(data_len); | ||
301 | for (i = 0; i < n->num_namespaces; i++) { | ||
302 | if (i < min_nsid) { | ||
303 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) | ||
304 | case 0x02: | ||
305 | return nvme_identify_nslist(n, c); | ||
306 | default: | ||
307 | + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); | ||
308 | return NVME_INVALID_FIELD | NVME_DNR; | ||
309 | } | ||
310 | } | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
312 | switch (dw10) { | ||
313 | case NVME_VOLATILE_WRITE_CACHE: | ||
314 | result = blk_enable_write_cache(n->conf.blk); | ||
315 | + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); | ||
316 | break; | ||
317 | case NVME_NUMBER_OF_QUEUES: | ||
318 | result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
319 | + trace_nvme_getfeat_numq(result); | ||
320 | break; | ||
321 | default: | ||
322 | + trace_nvme_err_invalid_getfeat(dw10); | ||
323 | return NVME_INVALID_FIELD | NVME_DNR; | ||
324 | } | ||
325 | |||
326 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
327 | blk_set_enable_write_cache(n->conf.blk, dw11 & 1); | ||
328 | break; | ||
329 | case NVME_NUMBER_OF_QUEUES: | ||
330 | + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, | ||
331 | + ((dw11 >> 16) & 0xFFFF) + 1, | ||
332 | + n->num_queues - 1, n->num_queues - 1); | ||
333 | req->cqe.result = | ||
334 | cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
335 | break; | ||
336 | default: | ||
337 | + trace_nvme_err_invalid_setfeat(dw10); | ||
338 | return NVME_INVALID_FIELD | NVME_DNR; | ||
339 | } | ||
340 | return NVME_SUCCESS; | ||
341 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
342 | case NVME_ADM_CMD_GET_FEATURES: | ||
343 | return nvme_get_feature(n, cmd, req); | ||
344 | default: | ||
345 | + trace_nvme_err_invalid_admin_opc(cmd->opcode); | ||
346 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
347 | } | ||
348 | } | ||
349 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
350 | uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; | ||
351 | uint32_t page_size = 1 << page_bits; | ||
352 | |||
353 | - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || | ||
354 | - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || | ||
355 | - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || | ||
356 | - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || | ||
357 | - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || | ||
358 | - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || | ||
359 | - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || | ||
360 | - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || | ||
361 | - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { | ||
362 | + if (unlikely(n->cq[0])) { | ||
363 | + trace_nvme_err_startfail_cq(); | ||
364 | + return -1; | ||
365 | + } | ||
366 | + if (unlikely(n->sq[0])) { | ||
367 | + trace_nvme_err_startfail_sq(); | ||
368 | + return -1; | ||
369 | + } | ||
370 | + if (unlikely(!n->bar.asq)) { | ||
371 | + trace_nvme_err_startfail_nbarasq(); | ||
372 | + return -1; | ||
373 | + } | ||
374 | + if (unlikely(!n->bar.acq)) { | ||
375 | + trace_nvme_err_startfail_nbaracq(); | ||
376 | + return -1; | ||
377 | + } | ||
378 | + if (unlikely(n->bar.asq & (page_size - 1))) { | ||
379 | + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); | ||
380 | + return -1; | ||
381 | + } | ||
382 | + if (unlikely(n->bar.acq & (page_size - 1))) { | ||
383 | + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); | ||
384 | + return -1; | ||
385 | + } | ||
386 | + if (unlikely(NVME_CC_MPS(n->bar.cc) < | ||
387 | + NVME_CAP_MPSMIN(n->bar.cap))) { | ||
388 | + trace_nvme_err_startfail_page_too_small( | ||
389 | + NVME_CC_MPS(n->bar.cc), | ||
390 | + NVME_CAP_MPSMIN(n->bar.cap)); | ||
391 | + return -1; | ||
392 | + } | ||
393 | + if (unlikely(NVME_CC_MPS(n->bar.cc) > | ||
394 | + NVME_CAP_MPSMAX(n->bar.cap))) { | ||
395 | + trace_nvme_err_startfail_page_too_large( | ||
396 | + NVME_CC_MPS(n->bar.cc), | ||
397 | + NVME_CAP_MPSMAX(n->bar.cap)); | ||
398 | + return -1; | ||
399 | + } | ||
400 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < | ||
401 | + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { | ||
402 | + trace_nvme_err_startfail_cqent_too_small( | ||
403 | + NVME_CC_IOCQES(n->bar.cc), | ||
404 | + NVME_CTRL_CQES_MIN(n->bar.cap)); | ||
405 | + return -1; | ||
406 | + } | ||
407 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > | ||
408 | + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { | ||
409 | + trace_nvme_err_startfail_cqent_too_large( | ||
410 | + NVME_CC_IOCQES(n->bar.cc), | ||
411 | + NVME_CTRL_CQES_MAX(n->bar.cap)); | ||
412 | + return -1; | ||
413 | + } | ||
414 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < | ||
415 | + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { | ||
416 | + trace_nvme_err_startfail_sqent_too_small( | ||
417 | + NVME_CC_IOSQES(n->bar.cc), | ||
418 | + NVME_CTRL_SQES_MIN(n->bar.cap)); | ||
419 | + return -1; | ||
420 | + } | ||
421 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > | ||
422 | + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { | ||
423 | + trace_nvme_err_startfail_sqent_too_large( | ||
424 | + NVME_CC_IOSQES(n->bar.cc), | ||
425 | + NVME_CTRL_SQES_MAX(n->bar.cap)); | ||
426 | + return -1; | ||
427 | + } | ||
428 | + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { | ||
429 | + trace_nvme_err_startfail_asqent_sz_zero(); | ||
430 | + return -1; | ||
431 | + } | ||
432 | + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { | ||
433 | + trace_nvme_err_startfail_acqent_sz_zero(); | ||
434 | return -1; | ||
435 | } | ||
436 | |||
437 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
438 | static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
439 | unsigned size) | ||
440 | { | ||
441 | + if (unlikely(offset & (sizeof(uint32_t) - 1))) { | ||
442 | + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, | ||
443 | + "MMIO write not 32-bit aligned," | ||
444 | + " offset=0x%"PRIx64"", offset); | ||
445 | + /* should be ignored, fall through for now */ | ||
446 | + } | ||
447 | + | ||
448 | + if (unlikely(size < sizeof(uint32_t))) { | ||
449 | + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, | ||
450 | + "MMIO write smaller than 32-bits," | ||
451 | + " offset=0x%"PRIx64", size=%u", | ||
452 | + offset, size); | ||
453 | + /* should be ignored, fall through for now */ | ||
454 | + } | ||
455 | + | ||
456 | switch (offset) { | ||
457 | - case 0xc: | ||
458 | + case 0xc: /* INTMS */ | ||
459 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
460 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
461 | + "undefined access to interrupt mask set" | ||
462 | + " when MSI-X is enabled"); | ||
463 | + /* should be ignored, fall through for now */ | ||
464 | + } | ||
465 | n->bar.intms |= data & 0xffffffff; | ||
466 | n->bar.intmc = n->bar.intms; | ||
467 | + trace_nvme_mmio_intm_set(data & 0xffffffff, | ||
468 | + n->bar.intmc); | ||
469 | break; | ||
470 | - case 0x10: | ||
471 | + case 0x10: /* INTMC */ | ||
472 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
473 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
474 | + "undefined access to interrupt mask clr" | ||
475 | + " when MSI-X is enabled"); | ||
476 | + /* should be ignored, fall through for now */ | ||
477 | + } | ||
478 | n->bar.intms &= ~(data & 0xffffffff); | ||
479 | n->bar.intmc = n->bar.intms; | ||
480 | + trace_nvme_mmio_intm_clr(data & 0xffffffff, | ||
481 | + n->bar.intmc); | ||
482 | break; | ||
483 | - case 0x14: | ||
484 | + case 0x14: /* CC */ | ||
485 | + trace_nvme_mmio_cfg(data & 0xffffffff); | ||
486 | /* Windows first sends data, then sends enable bit */ | ||
487 | if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && | ||
488 | !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
490 | |||
491 | if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { | ||
492 | n->bar.cc = data; | ||
493 | - if (nvme_start_ctrl(n)) { | ||
494 | + if (unlikely(nvme_start_ctrl(n))) { | ||
495 | + trace_nvme_err_startfail(); | ||
496 | n->bar.csts = NVME_CSTS_FAILED; | ||
497 | } else { | ||
498 | + trace_nvme_mmio_start_success(); | ||
499 | n->bar.csts = NVME_CSTS_READY; | ||
500 | } | ||
501 | } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { | ||
502 | + trace_nvme_mmio_stopped(); | ||
503 | nvme_clear_ctrl(n); | ||
504 | n->bar.csts &= ~NVME_CSTS_READY; | ||
505 | } | ||
506 | if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { | ||
507 | - nvme_clear_ctrl(n); | ||
508 | - n->bar.cc = data; | ||
509 | - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
510 | + trace_nvme_mmio_shutdown_set(); | ||
511 | + nvme_clear_ctrl(n); | ||
512 | + n->bar.cc = data; | ||
513 | + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
514 | } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { | ||
515 | - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
516 | - n->bar.cc = data; | ||
517 | + trace_nvme_mmio_shutdown_cleared(); | ||
518 | + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
519 | + n->bar.cc = data; | ||
520 | + } | ||
521 | + break; | ||
522 | + case 0x1C: /* CSTS */ | ||
523 | + if (data & (1 << 4)) { | ||
524 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, | ||
525 | + "attempted to W1C CSTS.NSSRO" | ||
526 | + " but CAP.NSSRS is zero (not supported)"); | ||
527 | + } else if (data != 0) { | ||
528 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, | ||
529 | + "attempted to set a read only bit" | ||
530 | + " of controller status"); | ||
531 | + } | ||
532 | + break; | ||
533 | + case 0x20: /* NSSR */ | ||
534 | + if (data == 0x4E564D65) { | ||
535 | + trace_nvme_ub_mmiowr_ssreset_unsupported(); | ||
536 | + } else { | ||
537 | + /* The spec says that writes of other values have no effect */ | ||
538 | + return; | ||
539 | } | ||
540 | break; | ||
541 | - case 0x24: | ||
542 | + case 0x24: /* AQA */ | ||
543 | n->bar.aqa = data & 0xffffffff; | ||
544 | + trace_nvme_mmio_aqattr(data & 0xffffffff); | ||
545 | break; | ||
546 | - case 0x28: | ||
547 | + case 0x28: /* ASQ */ | ||
548 | n->bar.asq = data; | ||
549 | + trace_nvme_mmio_asqaddr(data); | ||
550 | break; | ||
551 | - case 0x2c: | ||
552 | + case 0x2c: /* ASQ hi */ | ||
553 | n->bar.asq |= data << 32; | ||
554 | + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); | ||
555 | break; | ||
556 | - case 0x30: | ||
557 | + case 0x30: /* ACQ */ | ||
558 | + trace_nvme_mmio_acqaddr(data); | ||
559 | n->bar.acq = data; | ||
560 | break; | ||
561 | - case 0x34: | ||
562 | + case 0x34: /* ACQ hi */ | ||
563 | n->bar.acq |= data << 32; | ||
564 | + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); | ||
565 | break; | ||
566 | + case 0x38: /* CMBLOC */ | ||
567 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, | ||
568 | + "invalid write to reserved CMBLOC" | ||
569 | + " when CMBSZ is zero, ignored"); | ||
570 | + return; | ||
571 | + case 0x3C: /* CMBSZ */ | ||
572 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
573 | + "invalid write to read only CMBSZ, ignored"); | ||
574 | + return; | ||
575 | default: | ||
576 | + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
577 | + "invalid MMIO write," | ||
578 | + " offset=0x%"PRIx64", data=%"PRIx64"", | ||
579 | + offset, data); | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
584 | uint8_t *ptr = (uint8_t *)&n->bar; | ||
585 | uint64_t val = 0; | ||
586 | |||
587 | + if (unlikely(addr & (sizeof(uint32_t) - 1))) { | ||
588 | + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, | ||
589 | + "MMIO read not 32-bit aligned," | ||
590 | + " offset=0x%"PRIx64"", addr); | ||
591 | + /* should RAZ, fall through for now */ | ||
592 | + } else if (unlikely(size < sizeof(uint32_t))) { | ||
593 | + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, | ||
594 | + "MMIO read smaller than 32-bits," | ||
595 | + " offset=0x%"PRIx64"", addr); | ||
596 | + /* should RAZ, fall through for now */ | ||
597 | + } | ||
598 | + | ||
599 | if (addr < sizeof(n->bar)) { | ||
600 | memcpy(&val, ptr + addr, size); | ||
601 | + } else { | ||
602 | + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
603 | + "MMIO read beyond last register," | ||
604 | + " offset=0x%"PRIx64", returning 0", addr); | ||
605 | } | ||
606 | + | ||
607 | return val; | ||
608 | } | ||
609 | |||
610 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
611 | { | ||
612 | uint32_t qid; | ||
613 | |||
614 | - if (addr & ((1 << 2) - 1)) { | ||
615 | + if (unlikely(addr & ((1 << 2) - 1))) { | ||
616 | + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, | ||
617 | + "doorbell write not 32-bit aligned," | ||
618 | + " offset=0x%"PRIx64", ignoring", addr); | ||
619 | return; | ||
620 | } | ||
621 | |||
622 | if (((addr - 0x1000) >> 2) & 1) { | ||
623 | + /* Completion queue doorbell write */ | ||
624 | + | ||
625 | uint16_t new_head = val & 0xffff; | ||
626 | int start_sqs; | ||
627 | NvmeCQueue *cq; | ||
628 | |||
629 | qid = (addr - (0x1000 + (1 << 2))) >> 3; | ||
630 | - if (nvme_check_cqid(n, qid)) { | ||
631 | + if (unlikely(nvme_check_cqid(n, qid))) { | ||
632 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, | ||
633 | + "completion queue doorbell write" | ||
634 | + " for nonexistent queue," | ||
635 | + " sqid=%"PRIu32", ignoring", qid); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | cq = n->cq[qid]; | ||
640 | - if (new_head >= cq->size) { | ||
641 | + if (unlikely(new_head >= cq->size)) { | ||
642 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, | ||
643 | + "completion queue doorbell write value" | ||
644 | + " beyond queue size, sqid=%"PRIu32"," | ||
645 | + " new_head=%"PRIu16", ignoring", | ||
646 | + qid, new_head); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
651 | nvme_isr_notify(n, cq); | ||
652 | } | ||
653 | } else { | ||
654 | + /* Submission queue doorbell write */ | ||
655 | + | ||
656 | uint16_t new_tail = val & 0xffff; | ||
657 | NvmeSQueue *sq; | ||
658 | |||
659 | qid = (addr - 0x1000) >> 3; | ||
660 | - if (nvme_check_sqid(n, qid)) { | ||
661 | + if (unlikely(nvme_check_sqid(n, qid))) { | ||
662 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, | ||
663 | + "submission queue doorbell write" | ||
664 | + " for nonexistent queue," | ||
665 | + " sqid=%"PRIu32", ignoring", qid); | ||
666 | return; | ||
667 | } | ||
668 | |||
669 | sq = n->sq[qid]; | ||
670 | - if (new_tail >= sq->size) { | ||
671 | + if (unlikely(new_tail >= sq->size)) { | ||
672 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, | ||
673 | + "submission queue doorbell write value" | ||
674 | + " beyond queue size, sqid=%"PRIu32"," | ||
675 | + " new_tail=%"PRIu16", ignoring", | ||
676 | + qid, new_tail); | ||
677 | return; | ||
678 | } | ||
679 | |||
680 | diff --git a/hw/block/trace-events b/hw/block/trace-events | ||
154 | index XXXXXXX..XXXXXXX 100644 | 681 | index XXXXXXX..XXXXXXX 100644 |
155 | --- a/tests/qemu-iotests/group | 682 | --- a/hw/block/trace-events |
156 | +++ b/tests/qemu-iotests/group | 683 | +++ b/hw/block/trace-events |
157 | @@ -XXX,XX +XXX,XX @@ | 684 | @@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 |
158 | 148 rw auto quick | 685 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" |
159 | 149 rw auto sudo | 686 | hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" |
160 | 150 rw auto quick | 687 | |
161 | +151 rw auto | 688 | +# hw/block/nvme.c |
162 | 152 rw auto quick | 689 | +# nvme traces for successful events |
163 | 153 rw auto quick | 690 | +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" |
164 | 154 rw auto backing quick | 691 | +nvme_irq_pin(void) "pulsing IRQ pin" |
692 | +nvme_irq_masked(void) "IRQ is masked" | ||
693 | +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" | ||
694 | +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" | ||
695 | +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" | ||
696 | +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" | ||
697 | +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" | ||
698 | +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" | ||
699 | +nvme_identify_ctrl(void) "identify controller" | ||
700 | +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" | ||
701 | +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" | ||
702 | +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" | ||
703 | +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" | ||
704 | +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" | ||
705 | +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
706 | +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
707 | +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" | ||
708 | +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" | ||
709 | +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" | ||
710 | +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" | ||
711 | +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
712 | +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
713 | +nvme_mmio_start_success(void) "setting controller enable bit succeeded" | ||
714 | +nvme_mmio_stopped(void) "cleared controller enable bit" | ||
715 | +nvme_mmio_shutdown_set(void) "shutdown bit set" | ||
716 | +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" | ||
717 | + | ||
718 | +# nvme traces for error conditions | ||
719 | +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" | ||
720 | +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" | ||
721 | +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" | ||
722 | +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" | ||
723 | +nvme_err_invalid_field(void) "invalid field" | ||
724 | +nvme_err_invalid_prp(void) "invalid PRP" | ||
725 | +nvme_err_invalid_sgl(void) "invalid SGL" | ||
726 | +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" | ||
727 | +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" | ||
728 | +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" | ||
729 | +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" | ||
730 | +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" | ||
731 | +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" | ||
732 | +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" | ||
733 | +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" | ||
734 | +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" | ||
735 | +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" | ||
736 | +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" | ||
737 | +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" | ||
738 | +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" | ||
739 | +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" | ||
740 | +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" | ||
741 | +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" | ||
742 | +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" | ||
743 | +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" | ||
744 | +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" | ||
745 | +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" | ||
746 | +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" | ||
747 | +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" | ||
748 | +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" | ||
749 | +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" | ||
750 | +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" | ||
751 | +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" | ||
752 | +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" | ||
753 | +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" | ||
754 | +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" | ||
755 | +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" | ||
756 | +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" | ||
757 | +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" | ||
758 | +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" | ||
759 | +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" | ||
760 | +nvme_err_startfail(void) "setting controller enable bit failed" | ||
761 | + | ||
762 | +# Traces for undefined behavior | ||
763 | +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" | ||
764 | +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" | ||
765 | +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" | ||
766 | +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" | ||
767 | +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" | ||
768 | +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
769 | +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
770 | +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
771 | +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
772 | +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
773 | +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
774 | +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" | ||
775 | +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" | ||
776 | +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" | ||
777 | +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
778 | +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" | ||
779 | +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
780 | + | ||
781 | # hw/block/xen_disk.c | ||
782 | xen_disk_alloc(char *name) "%s" | ||
783 | xen_disk_init(char *name) "%s" | ||
165 | -- | 784 | -- |
166 | 2.13.6 | 785 | 2.13.6 |
167 | 786 | ||
168 | 787 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Currently, bdrv_replace_node() refuses to create loops from one BDS to | 3 | Management tools create overlays of running guests with qemu-img: |
4 | itself if the BDS to be replaced is the backing node of the BDS to | ||
5 | replace it: Say there is a node A and a node B. Replacing B by A means | ||
6 | making all references to B point to A. If B is a child of A (i.e. A has | ||
7 | a reference to B), that would mean we would have to make this reference | ||
8 | point to A itself -- so we'd create a loop. | ||
9 | 4 | ||
10 | bdrv_replace_node() (through should_update_child()) refuses to do so if | 5 | $ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2 |
11 | B is the backing node of A. There is no reason why we should create | ||
12 | loops if B is not the backing node of A, though. The BDS graph should | ||
13 | never contain loops, so we should always refuse to create them. | ||
14 | 6 | ||
15 | If B is a child of A and B is to be replaced by A, we should simply | 7 | but this doesn't work anymore due to image locking: |
16 | leave B in place there because it is the most sensible choice. | ||
17 | 8 | ||
18 | A more specific argument would be: Putting filter drivers into the BDS | 9 | qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock |
19 | graph is basically the same as appending an overlay to a backing chain. | 10 | Is another process using the image? |
20 | But the main child BDS of a filter driver is not "backing" but "file", | 11 | Could not open backing image to determine size. |
21 | so restricting the no-loop rule to backing nodes would fail here. | 12 | Use the force share option to allow this use case again. |
22 | 13 | ||
23 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 14 | Cc: qemu-stable@nongnu.org |
24 | Reviewed-by: Fam Zheng <famz@redhat.com> | 15 | Signed-off-by: Fam Zheng <famz@redhat.com> |
25 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 16 | Reviewed-by: Eric Blake <eblake@redhat.com> |
26 | Message-id: 20180613181823.13618-7-mreitz@redhat.com | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
27 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
28 | --- | 18 | --- |
29 | block.c | 44 ++++++++++++++++++++++++++++++++++---------- | 19 | block.c | 3 ++- |
30 | 1 file changed, 34 insertions(+), 10 deletions(-) | 20 | 1 file changed, 2 insertions(+), 1 deletion(-) |
31 | 21 | ||
32 | diff --git a/block.c b/block.c | 22 | diff --git a/block.c b/block.c |
33 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/block.c | 24 | --- a/block.c |
35 | +++ b/block.c | 25 | +++ b/block.c |
36 | @@ -XXX,XX +XXX,XX @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt, |
37 | return false; | 27 | back_flags = flags; |
38 | } | 28 | back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); |
39 | 29 | ||
40 | - if (c->role == &child_backing) { | 30 | + backing_options = qdict_new(); |
41 | - /* If @from is a backing file of @to, ignore the child to avoid | 31 | if (backing_fmt) { |
42 | - * creating a loop. We only want to change the pointer of other | 32 | - backing_options = qdict_new(); |
43 | - * parents. */ | 33 | qdict_put_str(backing_options, "driver", backing_fmt); |
44 | - QLIST_FOREACH(to_c, &to->children, next) { | ||
45 | - if (to_c == c) { | ||
46 | - break; | ||
47 | - } | ||
48 | - } | ||
49 | - if (to_c) { | ||
50 | + /* If the child @c belongs to the BDS @to, replacing the current | ||
51 | + * c->bs by @to would mean to create a loop. | ||
52 | + * | ||
53 | + * Such a case occurs when appending a BDS to a backing chain. | ||
54 | + * For instance, imagine the following chain: | ||
55 | + * | ||
56 | + * guest device -> node A -> further backing chain... | ||
57 | + * | ||
58 | + * Now we create a new BDS B which we want to put on top of this | ||
59 | + * chain, so we first attach A as its backing node: | ||
60 | + * | ||
61 | + * node B | ||
62 | + * | | ||
63 | + * v | ||
64 | + * guest device -> node A -> further backing chain... | ||
65 | + * | ||
66 | + * Finally we want to replace A by B. When doing that, we want to | ||
67 | + * replace all pointers to A by pointers to B -- except for the | ||
68 | + * pointer from B because (1) that would create a loop, and (2) | ||
69 | + * that pointer should simply stay intact: | ||
70 | + * | ||
71 | + * guest device -> node B | ||
72 | + * | | ||
73 | + * v | ||
74 | + * node A -> further backing chain... | ||
75 | + * | ||
76 | + * In general, when replacing a node A (c->bs) by a node B (@to), | ||
77 | + * if A is a child of B, that means we cannot replace A by B there | ||
78 | + * because that would create a loop. Silently detaching A from B | ||
79 | + * is also not really an option. So overall just leaving A in | ||
80 | + * place there is the most sensible choice. */ | ||
81 | + QLIST_FOREACH(to_c, &to->children, next) { | ||
82 | + if (to_c == c) { | ||
83 | return false; | ||
84 | } | 34 | } |
85 | } | 35 | + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); |
86 | @@ -XXX,XX +XXX,XX @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, | 36 | |
87 | 37 | bs = bdrv_open(full_backing, NULL, backing_options, back_flags, | |
88 | /* Put all parents into @list and calculate their cumulative permissions */ | 38 | &local_err); |
89 | QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { | ||
90 | + assert(c->bs == from); | ||
91 | if (!should_update_child(c, to)) { | ||
92 | continue; | ||
93 | } | ||
94 | -- | 39 | -- |
95 | 2.13.6 | 40 | 2.13.6 |
96 | 41 | ||
97 | 42 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch allows the user to specify whether to use active or only | 3 | It's not working anymore since QEMU v1.3.0 - time to remove it now. |
4 | background mode for mirror block jobs. Currently, this setting will | ||
5 | remain constant for the duration of the entire block job. | ||
6 | 4 | ||
7 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 5 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
8 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 6 | Reviewed-by: John Snow <jsnow@redhat.com> |
9 | Message-id: 20180613181823.13618-14-mreitz@redhat.com | 7 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 9 | --- |
12 | qapi/block-core.json | 11 +++++++++-- | 10 | blockdev.c | 11 ----------- |
13 | include/block/block_int.h | 4 +++- | 11 | qemu-doc.texi | 6 ------ |
14 | block/mirror.c | 12 +++++++----- | 12 | 2 files changed, 17 deletions(-) |
15 | blockdev.c | 9 ++++++++- | ||
16 | 4 files changed, 27 insertions(+), 9 deletions(-) | ||
17 | 13 | ||
18 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/qapi/block-core.json | ||
21 | +++ b/qapi/block-core.json | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | # written. Both will result in identical contents. | ||
24 | # Default is true. (Since 2.4) | ||
25 | # | ||
26 | +# @copy-mode: when to copy data to the destination; defaults to 'background' | ||
27 | +# (Since: 3.0) | ||
28 | +# | ||
29 | # Since: 1.3 | ||
30 | ## | ||
31 | { 'struct': 'DriveMirror', | ||
32 | @@ -XXX,XX +XXX,XX @@ | ||
33 | '*speed': 'int', '*granularity': 'uint32', | ||
34 | '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', | ||
35 | '*on-target-error': 'BlockdevOnError', | ||
36 | - '*unmap': 'bool' } } | ||
37 | + '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } } | ||
38 | |||
39 | ## | ||
40 | # @BlockDirtyBitmap: | ||
41 | @@ -XXX,XX +XXX,XX @@ | ||
42 | # above @device. If this option is not given, a node name is | ||
43 | # autogenerated. (Since: 2.9) | ||
44 | # | ||
45 | +# @copy-mode: when to copy data to the destination; defaults to 'background' | ||
46 | +# (Since: 3.0) | ||
47 | +# | ||
48 | # Returns: nothing on success. | ||
49 | # | ||
50 | # Since: 2.6 | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | '*speed': 'int', '*granularity': 'uint32', | ||
53 | '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', | ||
54 | '*on-target-error': 'BlockdevOnError', | ||
55 | - '*filter-node-name': 'str' } } | ||
56 | + '*filter-node-name': 'str', | ||
57 | + '*copy-mode': 'MirrorCopyMode' } } | ||
58 | |||
59 | ## | ||
60 | # @block_set_io_throttle: | ||
61 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/include/block/block_int.h | ||
64 | +++ b/include/block/block_int.h | ||
65 | @@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs, | ||
66 | * @filter_node_name: The node name that should be assigned to the filter | ||
67 | * driver that the mirror job inserts into the graph above @bs. NULL means that | ||
68 | * a node name should be autogenerated. | ||
69 | + * @copy_mode: When to trigger writes to the target. | ||
70 | * @errp: Error object. | ||
71 | * | ||
72 | * Start a mirroring operation on @bs. Clusters that are allocated | ||
73 | @@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs, | ||
74 | MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, | ||
75 | BlockdevOnError on_source_error, | ||
76 | BlockdevOnError on_target_error, | ||
77 | - bool unmap, const char *filter_node_name, Error **errp); | ||
78 | + bool unmap, const char *filter_node_name, | ||
79 | + MirrorCopyMode copy_mode, Error **errp); | ||
80 | |||
81 | /* | ||
82 | * backup_job_create: | ||
83 | diff --git a/block/mirror.c b/block/mirror.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/block/mirror.c | ||
86 | +++ b/block/mirror.c | ||
87 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | ||
88 | const BlockJobDriver *driver, | ||
89 | bool is_none_mode, BlockDriverState *base, | ||
90 | bool auto_complete, const char *filter_node_name, | ||
91 | - bool is_mirror, | ||
92 | + bool is_mirror, MirrorCopyMode copy_mode, | ||
93 | Error **errp) | ||
94 | { | ||
95 | MirrorBlockJob *s; | ||
96 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | ||
97 | s->on_target_error = on_target_error; | ||
98 | s->is_none_mode = is_none_mode; | ||
99 | s->backing_mode = backing_mode; | ||
100 | - s->copy_mode = MIRROR_COPY_MODE_BACKGROUND; | ||
101 | + s->copy_mode = copy_mode; | ||
102 | s->base = base; | ||
103 | s->granularity = granularity; | ||
104 | s->buf_size = ROUND_UP(buf_size, granularity); | ||
105 | @@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs, | ||
106 | MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, | ||
107 | BlockdevOnError on_source_error, | ||
108 | BlockdevOnError on_target_error, | ||
109 | - bool unmap, const char *filter_node_name, Error **errp) | ||
110 | + bool unmap, const char *filter_node_name, | ||
111 | + MirrorCopyMode copy_mode, Error **errp) | ||
112 | { | ||
113 | bool is_none_mode; | ||
114 | BlockDriverState *base; | ||
115 | @@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs, | ||
116 | speed, granularity, buf_size, backing_mode, | ||
117 | on_source_error, on_target_error, unmap, NULL, NULL, | ||
118 | &mirror_job_driver, is_none_mode, base, false, | ||
119 | - filter_node_name, true, errp); | ||
120 | + filter_node_name, true, copy_mode, errp); | ||
121 | } | ||
122 | |||
123 | void commit_active_start(const char *job_id, BlockDriverState *bs, | ||
124 | @@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs, | ||
125 | MIRROR_LEAVE_BACKING_CHAIN, | ||
126 | on_error, on_error, true, cb, opaque, | ||
127 | &commit_active_job_driver, false, base, auto_complete, | ||
128 | - filter_node_name, false, &local_err); | ||
129 | + filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, | ||
130 | + &local_err); | ||
131 | if (local_err) { | ||
132 | error_propagate(errp, local_err); | ||
133 | goto error_restore_flags; | ||
134 | diff --git a/blockdev.c b/blockdev.c | 14 | diff --git a/blockdev.c b/blockdev.c |
135 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
136 | --- a/blockdev.c | 16 | --- a/blockdev.c |
137 | +++ b/blockdev.c | 17 | +++ b/blockdev.c |
138 | @@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, | 18 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = { |
139 | bool has_unmap, bool unmap, | 19 | .type = QEMU_OPT_STRING, |
140 | bool has_filter_node_name, | 20 | .help = "chs translation (auto, lba, none)", |
141 | const char *filter_node_name, | 21 | },{ |
142 | + bool has_copy_mode, MirrorCopyMode copy_mode, | 22 | - .name = "boot", |
143 | Error **errp) | 23 | - .type = QEMU_OPT_BOOL, |
144 | { | 24 | - .help = "(deprecated, ignored)", |
145 | 25 | - },{ | |
146 | @@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, | 26 | .name = "addr", |
147 | if (!has_filter_node_name) { | 27 | .type = QEMU_OPT_STRING, |
148 | filter_node_name = NULL; | 28 | .help = "pci address (virtio only)", |
29 | @@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) | ||
30 | goto fail; | ||
149 | } | 31 | } |
150 | + if (!has_copy_mode) { | 32 | |
151 | + copy_mode = MIRROR_COPY_MODE_BACKGROUND; | 33 | - /* Deprecated option boot=[on|off] */ |
152 | + } | 34 | - if (qemu_opt_get(legacy_opts, "boot") != NULL) { |
153 | 35 | - fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " | |
154 | if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { | 36 | - "ignored. Future versions will reject this parameter. Please " |
155 | error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", | 37 | - "update your scripts.\n"); |
156 | @@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, | 38 | - } |
157 | has_replaces ? replaces : NULL, | 39 | - |
158 | speed, granularity, buf_size, sync, backing_mode, | 40 | /* Other deprecated options */ |
159 | on_source_error, on_target_error, unmap, filter_node_name, | 41 | if (!qtest_enabled()) { |
160 | - errp); | 42 | for (i = 0; i < ARRAY_SIZE(deprecated); i++) { |
161 | + copy_mode, errp); | 43 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
162 | } | 44 | index XXXXXXX..XXXXXXX 100644 |
163 | 45 | --- a/qemu-doc.texi | |
164 | void qmp_drive_mirror(DriveMirror *arg, Error **errp) | 46 | +++ b/qemu-doc.texi |
165 | @@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) | 47 | @@ -XXX,XX +XXX,XX @@ deprecated. |
166 | arg->has_on_target_error, arg->on_target_error, | 48 | |
167 | arg->has_unmap, arg->unmap, | 49 | @section System emulator command line arguments |
168 | false, NULL, | 50 | |
169 | + arg->has_copy_mode, arg->copy_mode, | 51 | -@subsection -drive boot=on|off (since 1.3.0) |
170 | &local_err); | 52 | - |
171 | bdrv_unref(target_bs); | 53 | -The ``boot=on|off'' option to the ``-drive'' argument is |
172 | error_propagate(errp, local_err); | 54 | -ignored. Applications should use the ``bootindex=N'' parameter |
173 | @@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, | 55 | -to set an absolute ordering between devices instead. |
174 | BlockdevOnError on_target_error, | 56 | - |
175 | bool has_filter_node_name, | 57 | @subsection -tdf (since 1.3.0) |
176 | const char *filter_node_name, | 58 | |
177 | + bool has_copy_mode, MirrorCopyMode copy_mode, | 59 | The ``-tdf'' argument is ignored. The behaviour implemented |
178 | Error **errp) | ||
179 | { | ||
180 | BlockDriverState *bs; | ||
181 | @@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, | ||
182 | has_on_target_error, on_target_error, | ||
183 | true, true, | ||
184 | has_filter_node_name, filter_node_name, | ||
185 | + has_copy_mode, copy_mode, | ||
186 | &local_err); | ||
187 | error_propagate(errp, local_err); | ||
188 | |||
189 | -- | 60 | -- |
190 | 2.13.6 | 61 | 2.13.6 |
191 | 62 | ||
192 | 63 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This will allow us to access the block job data when the mirror block | 3 | It's been marked as deprecated since QEMU v2.10.0, and so far nobody |
4 | driver becomes more complex. | 4 | complained that we should keep it, so let's remove this legacy option |
5 | 5 | now to simplify the code quite a bit. | |
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 6 | |
7 | Reviewed-by: Fam Zheng <famz@redhat.com> | 7 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
8 | Message-id: 20180613181823.13618-11-mreitz@redhat.com | 8 | Reviewed-by: John Snow <jsnow@redhat.com> |
9 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 9 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | 11 | --- |
11 | block/mirror.c | 12 ++++++++++++ | 12 | vl.c | 86 ++------------------------------------------------------- |
12 | 1 file changed, 12 insertions(+) | 13 | qemu-doc.texi | 8 ------ |
13 | 14 | qemu-options.hx | 19 ++----------- | |
14 | diff --git a/block/mirror.c b/block/mirror.c | 15 | 3 files changed, 4 insertions(+), 109 deletions(-) |
16 | |||
17 | diff --git a/vl.c b/vl.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/mirror.c | 19 | --- a/vl.c |
17 | +++ b/block/mirror.c | 20 | +++ b/vl.c |
18 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob { | 21 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
19 | bool initial_zeroing_ongoing; | 22 | const char *boot_order = NULL; |
20 | } MirrorBlockJob; | 23 | const char *boot_once = NULL; |
21 | 24 | DisplayState *ds; | |
22 | +typedef struct MirrorBDSOpaque { | 25 | - int cyls, heads, secs, translation; |
23 | + MirrorBlockJob *job; | 26 | QemuOpts *opts, *machine_opts; |
24 | +} MirrorBDSOpaque; | 27 | - QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL; |
25 | + | 28 | + QemuOpts *icount_opts = NULL, *accel_opts = NULL; |
26 | struct MirrorOp { | 29 | QemuOptsList *olist; |
27 | MirrorBlockJob *s; | 30 | int optind; |
28 | QEMUIOVector qiov; | 31 | const char *optarg; |
29 | @@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque) | 32 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
30 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); | 33 | |
31 | BlockJob *bjob = &s->common; | 34 | cpu_model = NULL; |
32 | MirrorExitData *data = opaque; | 35 | snapshot = 0; |
33 | + MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; | 36 | - cyls = heads = secs = 0; |
34 | AioContext *replace_aio_context = NULL; | 37 | - translation = BIOS_ATA_TRANSLATION_AUTO; |
35 | BlockDriverState *src = s->mirror_top_bs->backing->bs; | 38 | |
36 | BlockDriverState *target_bs = blk_bs(s->target); | 39 | nb_nics = 0; |
37 | @@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque) | 40 | |
38 | blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); | 41 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
39 | blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); | 42 | if (optind >= argc) |
40 | 43 | break; | |
41 | + bs_opaque->job = NULL; | 44 | if (argv[optind][0] != '-') { |
42 | job_completed(job, data->ret, NULL); | 45 | - hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); |
43 | 46 | + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); | |
44 | g_free(data); | 47 | } else { |
45 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | 48 | const QEMUOption *popt; |
46 | Error **errp) | 49 | |
47 | { | 50 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
48 | MirrorBlockJob *s; | 51 | cpu_model = optarg; |
49 | + MirrorBDSOpaque *bs_opaque; | 52 | break; |
50 | BlockDriverState *mirror_top_bs; | 53 | case QEMU_OPTION_hda: |
51 | bool target_graph_mod; | 54 | - { |
52 | bool target_is_backing; | 55 | - char buf[256]; |
53 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | 56 | - if (cyls == 0) |
54 | mirror_top_bs->total_sectors = bs->total_sectors; | 57 | - snprintf(buf, sizeof(buf), "%s", HD_OPTS); |
55 | mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; | 58 | - else |
56 | mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; | 59 | - snprintf(buf, sizeof(buf), |
57 | + bs_opaque = g_new0(MirrorBDSOpaque, 1); | 60 | - "%s,cyls=%d,heads=%d,secs=%d%s", |
58 | + mirror_top_bs->opaque = bs_opaque; | 61 | - HD_OPTS , cyls, heads, secs, |
59 | bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); | 62 | - translation == BIOS_ATA_TRANSLATION_LBA ? |
60 | 63 | - ",trans=lba" : | |
61 | /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep | 64 | - translation == BIOS_ATA_TRANSLATION_NONE ? |
62 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | 65 | - ",trans=none" : ""); |
63 | if (!s) { | 66 | - drive_add(IF_DEFAULT, 0, optarg, buf); |
64 | goto fail; | 67 | - break; |
65 | } | 68 | - } |
66 | + bs_opaque->job = s; | 69 | case QEMU_OPTION_hdb: |
67 | + | 70 | case QEMU_OPTION_hdc: |
68 | /* The block job now has a reference to this node */ | 71 | case QEMU_OPTION_hdd: |
69 | bdrv_unref(mirror_top_bs); | 72 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
70 | 73 | case QEMU_OPTION_snapshot: | |
71 | @@ -XXX,XX +XXX,XX @@ fail: | 74 | snapshot = 1; |
72 | 75 | break; | |
73 | g_free(s->replaces); | 76 | - case QEMU_OPTION_hdachs: |
74 | blk_unref(s->target); | 77 | - { |
75 | + bs_opaque->job = NULL; | 78 | - const char *p; |
76 | job_early_fail(&s->common.job); | 79 | - p = optarg; |
77 | } | 80 | - cyls = strtol(p, (char **)&p, 0); |
78 | 81 | - if (cyls < 1 || cyls > 16383) | |
82 | - goto chs_fail; | ||
83 | - if (*p != ',') | ||
84 | - goto chs_fail; | ||
85 | - p++; | ||
86 | - heads = strtol(p, (char **)&p, 0); | ||
87 | - if (heads < 1 || heads > 16) | ||
88 | - goto chs_fail; | ||
89 | - if (*p != ',') | ||
90 | - goto chs_fail; | ||
91 | - p++; | ||
92 | - secs = strtol(p, (char **)&p, 0); | ||
93 | - if (secs < 1 || secs > 63) | ||
94 | - goto chs_fail; | ||
95 | - if (*p == ',') { | ||
96 | - p++; | ||
97 | - if (!strcmp(p, "large")) { | ||
98 | - translation = BIOS_ATA_TRANSLATION_LARGE; | ||
99 | - } else if (!strcmp(p, "rechs")) { | ||
100 | - translation = BIOS_ATA_TRANSLATION_RECHS; | ||
101 | - } else if (!strcmp(p, "none")) { | ||
102 | - translation = BIOS_ATA_TRANSLATION_NONE; | ||
103 | - } else if (!strcmp(p, "lba")) { | ||
104 | - translation = BIOS_ATA_TRANSLATION_LBA; | ||
105 | - } else if (!strcmp(p, "auto")) { | ||
106 | - translation = BIOS_ATA_TRANSLATION_AUTO; | ||
107 | - } else { | ||
108 | - goto chs_fail; | ||
109 | - } | ||
110 | - } else if (*p != '\0') { | ||
111 | - chs_fail: | ||
112 | - error_report("invalid physical CHS format"); | ||
113 | - exit(1); | ||
114 | - } | ||
115 | - if (hda_opts != NULL) { | ||
116 | - qemu_opt_set_number(hda_opts, "cyls", cyls, | ||
117 | - &error_abort); | ||
118 | - qemu_opt_set_number(hda_opts, "heads", heads, | ||
119 | - &error_abort); | ||
120 | - qemu_opt_set_number(hda_opts, "secs", secs, | ||
121 | - &error_abort); | ||
122 | - if (translation == BIOS_ATA_TRANSLATION_LARGE) { | ||
123 | - qemu_opt_set(hda_opts, "trans", "large", | ||
124 | - &error_abort); | ||
125 | - } else if (translation == BIOS_ATA_TRANSLATION_RECHS) { | ||
126 | - qemu_opt_set(hda_opts, "trans", "rechs", | ||
127 | - &error_abort); | ||
128 | - } else if (translation == BIOS_ATA_TRANSLATION_LBA) { | ||
129 | - qemu_opt_set(hda_opts, "trans", "lba", | ||
130 | - &error_abort); | ||
131 | - } else if (translation == BIOS_ATA_TRANSLATION_NONE) { | ||
132 | - qemu_opt_set(hda_opts, "trans", "none", | ||
133 | - &error_abort); | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - error_report("'-hdachs' is deprecated, please use '-device" | ||
138 | - " ide-hd,cyls=c,heads=h,secs=s,...' instead"); | ||
139 | - break; | ||
140 | case QEMU_OPTION_numa: | ||
141 | opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), | ||
142 | optarg, true); | ||
143 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/qemu-doc.texi | ||
146 | +++ b/qemu-doc.texi | ||
147 | @@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the | ||
148 | ``-object filter-dump'' argument which works in combination | ||
149 | with the modern ``-netdev`` backends instead. | ||
150 | |||
151 | -@subsection -hdachs (since 2.10.0) | ||
152 | - | ||
153 | -The ``-hdachs'' argument is now a synonym for setting | ||
154 | -the ``cyls'', ``heads'', ``secs'', and ``trans'' properties | ||
155 | -on the ``ide-hd'' device using the ``-device'' argument. | ||
156 | -The new syntax allows different settings to be provided | ||
157 | -per disk. | ||
158 | - | ||
159 | @subsection -usbdevice (since 2.10.0) | ||
160 | |||
161 | The ``-usbdevice DEV'' argument is now a synonym for setting | ||
162 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/qemu-options.hx | ||
165 | +++ b/qemu-options.hx | ||
166 | @@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type. | ||
167 | @item media=@var{media} | ||
168 | This option defines the type of the media: disk or cdrom. | ||
169 | @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}] | ||
170 | -These options have the same definition as they have in @option{-hdachs}. | ||
171 | -These parameters are deprecated, use the corresponding parameters | ||
172 | +Force disk physical geometry and the optional BIOS translation (trans=none or | ||
173 | +lba). These parameters are deprecated, use the corresponding parameters | ||
174 | of @code{-device} instead. | ||
175 | @item snapshot=@var{snapshot} | ||
176 | @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive | ||
177 | @@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force | ||
178 | the write back by pressing @key{C-a s} (@pxref{disk_images}). | ||
179 | ETEXI | ||
180 | |||
181 | -DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \ | ||
182 | - "-hdachs c,h,s[,t]\n" \ | ||
183 | - " force hard disk 0 physical geometry and the optional BIOS\n" \ | ||
184 | - " translation (t=none or lba) (usually QEMU can guess them)\n", | ||
185 | - QEMU_ARCH_ALL) | ||
186 | -STEXI | ||
187 | -@item -hdachs @var{c},@var{h},@var{s},[,@var{t}] | ||
188 | -@findex -hdachs | ||
189 | -Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <= | ||
190 | -@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS | ||
191 | -translation mode (@var{t}=none, lba or auto). Usually QEMU can guess | ||
192 | -all those parameters. This option is deprecated, please use | ||
193 | -@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead. | ||
194 | -ETEXI | ||
195 | - | ||
196 | DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, | ||
197 | "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" | ||
198 | " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" | ||
79 | -- | 199 | -- |
80 | 2.13.6 | 200 | 2.13.6 |
81 | 201 | ||
82 | 202 | diff view generated by jsdifflib |
1 | From: Greg Kurz <groug@kaod.org> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Removing a drive with drive_del while it is being used to run an I/O | 3 | Looks like we forgot to announce the deprecation of these options in |
4 | intensive workload can cause QEMU to crash. | 4 | the corresponding chapter of the qemu-doc text, so let's do that now. |
5 | 5 | ||
6 | An AIO flush can yield at some point: | 6 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
7 | 7 | Reviewed-by: John Snow <jsnow@redhat.com> | |
8 | blk_aio_flush_entry() | 8 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
9 | blk_co_flush(blk) | ||
10 | bdrv_co_flush(blk->root->bs) | ||
11 | ... | ||
12 | qemu_coroutine_yield() | ||
13 | |||
14 | and let the HMP command to run, free blk->root and give control | ||
15 | back to the AIO flush: | ||
16 | |||
17 | hmp_drive_del() | ||
18 | blk_remove_bs() | ||
19 | bdrv_root_unref_child(blk->root) | ||
20 | child_bs = blk->root->bs | ||
21 | bdrv_detach_child(blk->root) | ||
22 | bdrv_replace_child(blk->root, NULL) | ||
23 | blk->root->bs = NULL | ||
24 | g_free(blk->root) <============== blk->root becomes stale | ||
25 | bdrv_unref(child_bs) | ||
26 | bdrv_delete(child_bs) | ||
27 | bdrv_close() | ||
28 | bdrv_drained_begin() | ||
29 | bdrv_do_drained_begin() | ||
30 | bdrv_drain_recurse() | ||
31 | aio_poll() | ||
32 | ... | ||
33 | qemu_coroutine_switch() | ||
34 | |||
35 | and the AIO flush completion ends up dereferencing blk->root: | ||
36 | |||
37 | blk_aio_complete() | ||
38 | scsi_aio_complete() | ||
39 | blk_get_aio_context(blk) | ||
40 | bs = blk_bs(blk) | ||
41 | ie, bs = blk->root ? blk->root->bs : NULL | ||
42 | ^^^^^ | ||
43 | stale | ||
44 | |||
45 | The problem is that we should avoid making block driver graph | ||
46 | changes while we have in-flight requests. Let's drain all I/O | ||
47 | for this BB before calling bdrv_root_unref_child(). | ||
48 | |||
49 | Signed-off-by: Greg Kurz <groug@kaod.org> | ||
50 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
51 | --- | 10 | --- |
52 | block/block-backend.c | 5 +++++ | 11 | qemu-doc.texi | 15 +++++++++++++++ |
53 | 1 file changed, 5 insertions(+) | 12 | 1 file changed, 15 insertions(+) |
54 | 13 | ||
55 | diff --git a/block/block-backend.c b/block/block-backend.c | 14 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
56 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
57 | --- a/block/block-backend.c | 16 | --- a/qemu-doc.texi |
58 | +++ b/block/block-backend.c | 17 | +++ b/qemu-doc.texi |
59 | @@ -XXX,XX +XXX,XX @@ void blk_remove_bs(BlockBackend *blk) | 18 | @@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU. |
60 | 19 | The ``-drive if=scsi'' argument is replaced by the the | |
61 | blk_update_root_state(blk); | 20 | ``-device BUS-TYPE'' argument combined with ``-drive if=none''. |
62 | 21 | ||
63 | + /* bdrv_root_unref_child() will cause blk->root to become stale and may | 22 | +@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0) |
64 | + * switch to a completion coroutine later on. Let's drain all I/O here | 23 | + |
65 | + * to avoid that and a potential QEMU crash. | 24 | +The drive geometry arguments are replaced by the the geometry arguments |
66 | + */ | 25 | +that can be specified with the ``-device'' parameter. |
67 | + blk_drain(blk); | 26 | + |
68 | bdrv_root_unref_child(blk->root); | 27 | +@subsection -drive serial=... (since 2.10.0) |
69 | blk->root = NULL; | 28 | + |
70 | } | 29 | +The drive serial argument is replaced by the the serial argument |
30 | +that can be specified with the ``-device'' parameter. | ||
31 | + | ||
32 | +@subsection -drive addr=... (since 2.10.0) | ||
33 | + | ||
34 | +The drive addr argument is replaced by the the addr argument | ||
35 | +that can be specified with the ``-device'' parameter. | ||
36 | + | ||
37 | @subsection -net dump (since 2.10.0) | ||
38 | |||
39 | The ``--net dump'' argument is now replaced with the | ||
71 | -- | 40 | -- |
72 | 2.13.6 | 41 | 2.13.6 |
73 | 42 | ||
74 | 43 | diff view generated by jsdifflib |
1 | For bdrv_drain(), recursively waiting for child node requests is | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | pointless because we didn't quiesce their parents, so new requests could | ||
3 | come in anyway. Letting the function work only on a single node makes it | ||
4 | more consistent. | ||
5 | 2 | ||
6 | For subtree drains and drain_all, we already have the recursion in | 3 | Signed-off-by: Fam Zheng <famz@redhat.com> |
7 | bdrv_do_drained_begin(), so the extra recursion doesn't add anything | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | either. | 5 | --- |
6 | include/block/block_int.h | 1 - | ||
7 | block/io.c | 18 ------------------ | ||
8 | 2 files changed, 19 deletions(-) | ||
9 | 9 | ||
10 | Remove the useless code. | 10 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
11 | 11 | index XXXXXXX..XXXXXXX 100644 | |
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | --- a/include/block/block_int.h |
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | +++ b/include/block/block_int.h |
14 | --- | 14 | @@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk); |
15 | block/io.c | 36 +++--------------------------------- | 15 | bool blk_dev_is_medium_locked(BlockBackend *blk); |
16 | 1 file changed, 3 insertions(+), 33 deletions(-) | 16 | |
17 | 17 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); | |
18 | -bool bdrv_requests_pending(BlockDriverState *bs); | ||
19 | |||
20 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); | ||
21 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); | ||
18 | diff --git a/block/io.c b/block/io.c | 22 | diff --git a/block/io.c b/block/io.c |
19 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/io.c | 24 | --- a/block/io.c |
21 | +++ b/block/io.c | 25 | +++ b/block/io.c |
22 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) |
23 | return bdrv_drain_poll(bs, ignore_parent); | 27 | assert(old >= 1); |
24 | } | 28 | } |
25 | 29 | ||
26 | -static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent) | 30 | -/* Check if any requests are in-flight (including throttled requests) */ |
31 | -bool bdrv_requests_pending(BlockDriverState *bs) | ||
27 | -{ | 32 | -{ |
28 | - BdrvChild *child, *tmp; | 33 | - BdrvChild *child; |
29 | - bool waited; | ||
30 | - | 34 | - |
31 | - /* Wait for drained requests to finish */ | 35 | - if (atomic_read(&bs->in_flight)) { |
32 | - waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); | 36 | - return true; |
37 | - } | ||
33 | - | 38 | - |
34 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | 39 | - QLIST_FOREACH(child, &bs->children, next) { |
35 | - BlockDriverState *bs = child->bs; | 40 | - if (bdrv_requests_pending(child->bs)) { |
36 | - bool in_main_loop = | 41 | - return true; |
37 | - qemu_get_current_aio_context() == qemu_get_aio_context(); | ||
38 | - assert(bs->refcnt > 0); | ||
39 | - if (in_main_loop) { | ||
40 | - /* In case the recursive bdrv_drain_recurse processes a | ||
41 | - * block_job_defer_to_main_loop BH and modifies the graph, | ||
42 | - * let's hold a reference to bs until we are done. | ||
43 | - * | ||
44 | - * IOThread doesn't have such a BH, and it is not safe to call | ||
45 | - * bdrv_unref without BQL, so skip doing it there. | ||
46 | - */ | ||
47 | - bdrv_ref(bs); | ||
48 | - } | ||
49 | - waited |= bdrv_drain_recurse(bs, child); | ||
50 | - if (in_main_loop) { | ||
51 | - bdrv_unref(bs); | ||
52 | - } | 42 | - } |
53 | - } | 43 | - } |
54 | - | 44 | - |
55 | - return waited; | 45 | - return false; |
56 | -} | 46 | -} |
57 | - | 47 | - |
58 | static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | 48 | typedef struct { |
59 | BdrvChild *parent); | 49 | Coroutine *co; |
60 | static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | 50 | BlockDriverState *bs; |
61 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
62 | |||
63 | bdrv_parent_drained_begin(bs, parent); | ||
64 | bdrv_drain_invoke(bs, true); | ||
65 | - bdrv_drain_recurse(bs, parent); | ||
66 | + | ||
67 | + /* Wait for drained requests to finish */ | ||
68 | + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); | ||
69 | |||
70 | if (recursive) { | ||
71 | bs->recursive_quiesce_counter++; | ||
72 | -- | 51 | -- |
73 | 2.13.6 | 52 | 2.13.6 |
74 | 53 | ||
75 | 54 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
---|---|---|---|
2 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
3 | --- | ||
4 | block/io.c | 6 ++++++ | ||
5 | 1 file changed, 6 insertions(+) | ||
2 | 6 | ||
3 | With this, the mirror_top_bs is no longer just a technically required | 7 | diff --git a/block/io.c b/block/io.c |
4 | node in the BDS graph but actually represents the block job operation. | ||
5 | |||
6 | Also, drop MirrorBlockJob.source, as we can reach it through | ||
7 | mirror_top_bs->backing. | ||
8 | |||
9 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
10 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
12 | Message-id: 20180613181823.13618-6-mreitz@redhat.com | ||
13 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
14 | --- | ||
15 | block/mirror.c | 14 ++++++-------- | ||
16 | 1 file changed, 6 insertions(+), 8 deletions(-) | ||
17 | |||
18 | diff --git a/block/mirror.c b/block/mirror.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/mirror.c | 9 | --- a/block/io.c |
21 | +++ b/block/mirror.c | 10 | +++ b/block/io.c |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob { | 11 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
23 | BlockJob common; | 12 | BdrvNextIterator it; |
24 | BlockBackend *target; | 13 | GSList *aio_ctxs = NULL, *ctx; |
25 | BlockDriverState *mirror_top_bs; | 14 | |
26 | - BlockDriverState *source; | 15 | + /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread |
27 | BlockDriverState *base; | 16 | + * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on |
28 | 17 | + * nodes in several different AioContexts, so make sure we're in the main | |
29 | /* The name of the graph node to replace */ | 18 | + * context. */ |
30 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque) | 19 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); |
31 | { | 20 | + |
32 | MirrorOp *op = opaque; | 21 | block_job_pause_all(); |
33 | MirrorBlockJob *s = op->s; | 22 | |
34 | - BlockBackend *source = s->common.blk; | 23 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
35 | int nb_chunks; | ||
36 | uint64_t ret; | ||
37 | uint64_t max_bytes; | ||
38 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque) | ||
39 | s->bytes_in_flight += op->bytes; | ||
40 | trace_mirror_one_iteration(s, op->offset, op->bytes); | ||
41 | |||
42 | - ret = blk_co_preadv(source, op->offset, op->bytes, &op->qiov, 0); | ||
43 | + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, | ||
44 | + &op->qiov, 0); | ||
45 | mirror_read_complete(op, ret); | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | ||
49 | |||
50 | static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
51 | { | ||
52 | - BlockDriverState *source = s->source; | ||
53 | + BlockDriverState *source = s->mirror_top_bs->backing->bs; | ||
54 | MirrorOp *pseudo_op; | ||
55 | int64_t offset; | ||
56 | uint64_t delay_ns = 0, ret = 0; | ||
57 | @@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job, void *opaque) | ||
58 | BlockJob *bjob = &s->common; | ||
59 | MirrorExitData *data = opaque; | ||
60 | AioContext *replace_aio_context = NULL; | ||
61 | - BlockDriverState *src = s->source; | ||
62 | + BlockDriverState *src = s->mirror_top_bs->backing->bs; | ||
63 | BlockDriverState *target_bs = blk_bs(s->target); | ||
64 | BlockDriverState *mirror_top_bs = s->mirror_top_bs; | ||
65 | Error *local_err = NULL; | ||
66 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) | ||
67 | { | ||
68 | int64_t offset; | ||
69 | BlockDriverState *base = s->base; | ||
70 | - BlockDriverState *bs = s->source; | ||
71 | + BlockDriverState *bs = s->mirror_top_bs->backing->bs; | ||
72 | BlockDriverState *target_bs = blk_bs(s->target); | ||
73 | int ret; | ||
74 | int64_t count; | ||
75 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque) | ||
76 | { | ||
77 | MirrorBlockJob *s = opaque; | ||
78 | MirrorExitData *data; | ||
79 | - BlockDriverState *bs = s->source; | ||
80 | + BlockDriverState *bs = s->mirror_top_bs->backing->bs; | ||
81 | BlockDriverState *target_bs = blk_bs(s->target); | ||
82 | bool need_drain = true; | ||
83 | int64_t length; | ||
84 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | ||
85 | /* The block job now has a reference to this node */ | ||
86 | bdrv_unref(mirror_top_bs); | ||
87 | |||
88 | - s->source = bs; | ||
89 | s->mirror_top_bs = mirror_top_bs; | ||
90 | |||
91 | /* No resize for the target either; while the mirror is still running, a | ||
92 | -- | 24 | -- |
93 | 2.13.6 | 25 | 2.13.6 |
94 | 26 | ||
95 | 27 | diff view generated by jsdifflib |
1 | We cannot allow aio_poll() in bdrv_drain_invoke(begin=true) until we're | 1 | bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively |
---|---|---|---|
2 | done with propagating the drain through the graph and are doing the | 2 | and also doesn't notify other parent nodes of children, which both means |
3 | single final BDRV_POLL_WHILE(). | 3 | that the child nodes are not actually drained, and bdrv_drained_begin() |
4 | is providing useful functionality only on a single node. | ||
4 | 5 | ||
5 | Just schedule the coroutine with the callback and increase bs->in_flight | 6 | To keep things consistent, we also shouldn't call the block driver |
6 | to make sure that the polling phase will wait for it. | 7 | callbacks recursively. |
8 | |||
9 | A proper recursive drain version that provides an actually working | ||
10 | drained section for child nodes will be introduced later. | ||
7 | 11 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | --- | 14 | --- |
10 | block/io.c | 28 +++++++++++++++++++++++----- | 15 | block/io.c | 16 +++++++++------- |
11 | 1 file changed, 23 insertions(+), 5 deletions(-) | 16 | 1 file changed, 9 insertions(+), 7 deletions(-) |
12 | 17 | ||
13 | diff --git a/block/io.c b/block/io.c | 18 | diff --git a/block/io.c b/block/io.c |
14 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/io.c | 20 | --- a/block/io.c |
16 | +++ b/block/io.c | 21 | +++ b/block/io.c |
17 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | 22 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
18 | |||
19 | /* Set data->done before reading bs->wakeup. */ | ||
20 | atomic_mb_set(&data->done, true); | ||
21 | - bdrv_wakeup(bs); | ||
22 | + bdrv_dec_in_flight(bs); | ||
23 | + | ||
24 | + if (data->begin) { | ||
25 | + g_free(data); | ||
26 | + } | ||
27 | } | 23 | } |
28 | 24 | ||
29 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ | 25 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
30 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 26 | -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
27 | +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) | ||
31 | { | 28 | { |
32 | - BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; | 29 | BdrvChild *child, *tmp; |
33 | + BdrvCoDrainData *data; | 30 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
34 | 31 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | |
35 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || | 32 | bdrv_coroutine_enter(bs, data.co); |
36 | (!begin && !bs->drv->bdrv_co_drain_end)) { | 33 | BDRV_POLL_WHILE(bs, !data.done); |
37 | return; | 34 | |
35 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
36 | - bdrv_drain_invoke(child->bs, begin); | ||
37 | + if (recursive) { | ||
38 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
39 | + bdrv_drain_invoke(child->bs, begin, true); | ||
40 | + } | ||
38 | } | 41 | } |
39 | |||
40 | - data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); | ||
41 | - bdrv_coroutine_enter(bs, data.co); | ||
42 | - BDRV_POLL_WHILE(bs, !data.done); | ||
43 | + data = g_new(BdrvCoDrainData, 1); | ||
44 | + *data = (BdrvCoDrainData) { | ||
45 | + .bs = bs, | ||
46 | + .done = false, | ||
47 | + .begin = begin | ||
48 | + }; | ||
49 | + | ||
50 | + /* Make sure the driver callback completes during the polling phase for | ||
51 | + * drain_begin. */ | ||
52 | + bdrv_inc_in_flight(bs); | ||
53 | + data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); | ||
54 | + aio_co_schedule(bdrv_get_aio_context(bs), data->co); | ||
55 | + | ||
56 | + if (!begin) { | ||
57 | + BDRV_POLL_WHILE(bs, !data->done); | ||
58 | + g_free(data); | ||
59 | + } | ||
60 | } | 42 | } |
61 | 43 | ||
62 | /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ | 44 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
45 | bdrv_parent_drained_begin(bs); | ||
46 | } | ||
47 | |||
48 | - bdrv_drain_invoke(bs, true); | ||
49 | + bdrv_drain_invoke(bs, true, false); | ||
50 | bdrv_drain_recurse(bs); | ||
51 | } | ||
52 | |||
53 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
54 | } | ||
55 | |||
56 | /* Re-enable things in child-to-parent order */ | ||
57 | - bdrv_drain_invoke(bs, false); | ||
58 | + bdrv_drain_invoke(bs, false, false); | ||
59 | bdrv_parent_drained_end(bs); | ||
60 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
61 | } | ||
62 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
63 | aio_context_acquire(aio_context); | ||
64 | aio_disable_external(aio_context); | ||
65 | bdrv_parent_drained_begin(bs); | ||
66 | - bdrv_drain_invoke(bs, true); | ||
67 | + bdrv_drain_invoke(bs, true, true); | ||
68 | aio_context_release(aio_context); | ||
69 | |||
70 | if (!g_slist_find(aio_ctxs, aio_context)) { | ||
71 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
72 | |||
73 | /* Re-enable things in child-to-parent order */ | ||
74 | aio_context_acquire(aio_context); | ||
75 | - bdrv_drain_invoke(bs, false); | ||
76 | + bdrv_drain_invoke(bs, false, true); | ||
77 | bdrv_parent_drained_end(bs); | ||
78 | aio_enable_external(aio_context); | ||
79 | aio_context_release(aio_context); | ||
63 | -- | 80 | -- |
64 | 2.13.6 | 81 | 2.13.6 |
65 | 82 | ||
66 | 83 | diff view generated by jsdifflib |
1 | This adds a test case that goes wrong if bdrv_drain_invoke() calls | 1 | The existing test is for bdrv_drain_all_begin/end() only. Generalise the |
---|---|---|---|
2 | aio_poll(). | 2 | test case so that it can be run for the other variants as well. At the |
3 | moment this is only bdrv_drain_begin/end(), but in a while, we'll add | ||
4 | another one. | ||
5 | |||
6 | Also, add a backing file to the test node to test whether the operations | ||
7 | work recursively. | ||
3 | 8 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | --- | 10 | --- |
6 | tests/test-bdrv-drain.c | 102 +++++++++++++++++++++++++++++++++++++++++------- | 11 | tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- |
7 | 1 file changed, 88 insertions(+), 14 deletions(-) | 12 | 1 file changed, 62 insertions(+), 7 deletions(-) |
8 | 13 | ||
9 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
10 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tests/test-bdrv-drain.c | 16 | --- a/tests/test-bdrv-drain.c |
12 | +++ b/tests/test-bdrv-drain.c | 17 | +++ b/tests/test-bdrv-drain.c |
13 | @@ -XXX,XX +XXX,XX @@ static QemuEvent done_event; | ||
14 | typedef struct BDRVTestState { | ||
15 | int drain_count; | ||
16 | AioContext *bh_indirection_ctx; | ||
17 | + bool sleep_in_drain_begin; | ||
18 | } BDRVTestState; | ||
19 | |||
20 | static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | ||
21 | { | ||
22 | BDRVTestState *s = bs->opaque; | ||
23 | s->drain_count++; | ||
24 | + if (s->sleep_in_drain_begin) { | ||
25 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | ||
26 | + } | ||
27 | } | ||
28 | |||
29 | static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) | ||
30 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | +static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
35 | + const BdrvChildRole *role, | ||
36 | + BlockReopenQueue *reopen_queue, | ||
37 | + uint64_t perm, uint64_t shared, | ||
38 | + uint64_t *nperm, uint64_t *nshared) | ||
39 | +{ | ||
40 | + /* bdrv_format_default_perms() accepts only these two, so disguise | ||
41 | + * detach_by_driver_cb_role as one of them. */ | ||
42 | + if (role != &child_file && role != &child_backing) { | ||
43 | + role = &child_file; | ||
44 | + } | ||
45 | + | ||
46 | + bdrv_format_default_perms(bs, c, role, reopen_queue, perm, shared, | ||
47 | + nperm, nshared); | ||
48 | +} | ||
49 | + | ||
50 | static BlockDriver bdrv_test = { | ||
51 | .format_name = "test", | ||
52 | .instance_size = sizeof(BDRVTestState), | ||
53 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { | 18 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { |
19 | |||
54 | .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | 20 | .bdrv_co_drain_begin = bdrv_test_co_drain_begin, |
55 | .bdrv_co_drain_end = bdrv_test_co_drain_end, | 21 | .bdrv_co_drain_end = bdrv_test_co_drain_end, |
56 | 22 | + | |
57 | - .bdrv_child_perm = bdrv_format_default_perms, | 23 | + .bdrv_child_perm = bdrv_format_default_perms, |
58 | + .bdrv_child_perm = bdrv_test_child_perm, | ||
59 | }; | 24 | }; |
60 | 25 | ||
61 | static void aio_ret_cb(void *opaque, int ret) | 26 | static void aio_ret_cb(void *opaque, int ret) |
62 | @@ -XXX,XX +XXX,XX @@ struct detach_by_parent_data { | 27 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
63 | BdrvChild *child_b; | 28 | *aio_ret = ret; |
64 | BlockDriverState *c; | ||
65 | BdrvChild *child_c; | ||
66 | + bool by_parent_cb; | ||
67 | }; | ||
68 | +static struct detach_by_parent_data detach_by_parent_data; | ||
69 | |||
70 | -static void detach_by_parent_aio_cb(void *opaque, int ret) | ||
71 | +static void detach_indirect_bh(void *opaque) | ||
72 | { | ||
73 | struct detach_by_parent_data *data = opaque; | ||
74 | |||
75 | - g_assert_cmpint(ret, ==, 0); | ||
76 | bdrv_unref_child(data->parent_b, data->child_b); | ||
77 | |||
78 | bdrv_ref(data->c); | ||
79 | @@ -XXX,XX +XXX,XX @@ static void detach_by_parent_aio_cb(void *opaque, int ret) | ||
80 | &child_file, &error_abort); | ||
81 | } | 29 | } |
82 | 30 | ||
83 | +static void detach_by_parent_aio_cb(void *opaque, int ret) | 31 | -static void test_drv_cb_drain_all(void) |
32 | +enum drain_type { | ||
33 | + BDRV_DRAIN_ALL, | ||
34 | + BDRV_DRAIN, | ||
35 | +}; | ||
36 | + | ||
37 | +static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) | ||
84 | +{ | 38 | +{ |
85 | + struct detach_by_parent_data *data = &detach_by_parent_data; | 39 | + switch (drain_type) { |
86 | + | 40 | + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; |
87 | + g_assert_cmpint(ret, ==, 0); | 41 | + case BDRV_DRAIN: bdrv_drained_begin(bs); break; |
88 | + if (data->by_parent_cb) { | 42 | + default: g_assert_not_reached(); |
89 | + detach_indirect_bh(data); | ||
90 | + } | 43 | + } |
91 | +} | 44 | +} |
92 | + | 45 | + |
93 | +static void detach_by_driver_cb_drained_begin(BdrvChild *child) | 46 | +static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) |
94 | +{ | 47 | +{ |
95 | + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), | 48 | + switch (drain_type) { |
96 | + detach_indirect_bh, &detach_by_parent_data); | 49 | + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; |
97 | + child_file.drained_begin(child); | 50 | + case BDRV_DRAIN: bdrv_drained_end(bs); break; |
51 | + default: g_assert_not_reached(); | ||
52 | + } | ||
98 | +} | 53 | +} |
99 | + | 54 | + |
100 | +static BdrvChildRole detach_by_driver_cb_role; | 55 | +static void test_drv_cb_common(enum drain_type drain_type, bool recursive) |
101 | + | ||
102 | /* | ||
103 | * Initial graph: | ||
104 | * | ||
105 | @@ -XXX,XX +XXX,XX @@ static void detach_by_parent_aio_cb(void *opaque, int ret) | ||
106 | * \ / \ | ||
107 | * A B C | ||
108 | * | ||
109 | - * PA has a pending write request whose callback changes the child nodes of PB: | ||
110 | - * It removes B and adds C instead. The subtree of PB is drained, which will | ||
111 | - * indirectly drain the write request, too. | ||
112 | + * by_parent_cb == true: Test that parent callbacks don't poll | ||
113 | + * | ||
114 | + * PA has a pending write request whose callback changes the child nodes of | ||
115 | + * PB: It removes B and adds C instead. The subtree of PB is drained, which | ||
116 | + * will indirectly drain the write request, too. | ||
117 | + * | ||
118 | + * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll | ||
119 | + * | ||
120 | + * PA's BdrvChildRole has a .drained_begin callback that schedules a BH | ||
121 | + * that does the same graph change. If bdrv_drain_invoke() calls it, the | ||
122 | + * state is messed up, but if it is only polled in the single | ||
123 | + * BDRV_POLL_WHILE() at the end of the drain, this should work fine. | ||
124 | */ | ||
125 | -static void test_detach_by_parent_cb(void) | ||
126 | +static void test_detach_indirect(bool by_parent_cb) | ||
127 | { | 56 | { |
128 | BlockBackend *blk; | 57 | BlockBackend *blk; |
129 | BlockDriverState *parent_a, *parent_b, *a, *b, *c; | 58 | - BlockDriverState *bs; |
130 | BdrvChild *child_a, *child_b; | 59 | - BDRVTestState *s; |
60 | + BlockDriverState *bs, *backing; | ||
61 | + BDRVTestState *s, *backing_s; | ||
131 | BlockAIOCB *acb; | 62 | BlockAIOCB *acb; |
132 | - struct detach_by_parent_data data; | 63 | int aio_ret; |
133 | 64 | ||
134 | QEMUIOVector qiov; | 65 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) |
135 | struct iovec iov = { | 66 | s = bs->opaque; |
136 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | 67 | blk_insert_bs(blk, bs, &error_abort); |
137 | }; | 68 | |
138 | qemu_iovec_init_external(&qiov, &iov, 1); | 69 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
139 | 70 | + backing_s = backing->opaque; | |
140 | + if (!by_parent_cb) { | 71 | + bdrv_set_backing_hd(bs, backing, &error_abort); |
141 | + detach_by_driver_cb_role = child_file; | ||
142 | + detach_by_driver_cb_role.drained_begin = | ||
143 | + detach_by_driver_cb_drained_begin; | ||
144 | + } | ||
145 | + | 72 | + |
146 | /* Create all involved nodes */ | 73 | /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ |
147 | parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR, | 74 | g_assert_cmpint(s->drain_count, ==, 0); |
148 | &error_abort); | 75 | - bdrv_drain_all_begin(); |
149 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | 76 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
150 | blk_insert_bs(blk, parent_a, &error_abort); | ||
151 | bdrv_unref(parent_a); | ||
152 | |||
153 | + /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver | ||
154 | + * callback must not return immediately. */ | ||
155 | + if (!by_parent_cb) { | ||
156 | + BDRVTestState *s = parent_a->opaque; | ||
157 | + s->sleep_in_drain_begin = true; | ||
158 | + } | ||
159 | + | 77 | + |
160 | /* Set child relationships */ | 78 | + do_drain_begin(drain_type, bs); |
161 | bdrv_ref(b); | 79 | + |
162 | bdrv_ref(a); | 80 | g_assert_cmpint(s->drain_count, ==, 1); |
163 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | 81 | - bdrv_drain_all_end(); |
164 | child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, &error_abort); | 82 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); |
165 | 83 | + | |
166 | bdrv_ref(a); | 84 | + do_drain_end(drain_type, bs); |
167 | - bdrv_attach_child(parent_a, a, "PA-A", &child_file, &error_abort); | 85 | + |
168 | + bdrv_attach_child(parent_a, a, "PA-A", | 86 | g_assert_cmpint(s->drain_count, ==, 0); |
169 | + by_parent_cb ? &child_file : &detach_by_driver_cb_role, | 87 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
170 | + &error_abort); | 88 | |
171 | 89 | /* Now do the same while a request is pending */ | |
172 | g_assert_cmpint(parent_a->refcnt, ==, 1); | 90 | aio_ret = -EINPROGRESS; |
173 | g_assert_cmpint(parent_b->refcnt, ==, 1); | 91 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) |
174 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | 92 | g_assert_cmpint(aio_ret, ==, -EINPROGRESS); |
175 | g_assert(QLIST_NEXT(child_b, next) == NULL); | 93 | |
176 | 94 | g_assert_cmpint(s->drain_count, ==, 0); | |
177 | /* Start the evil write request */ | 95 | - bdrv_drain_all_begin(); |
178 | - data = (struct detach_by_parent_data) { | 96 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
179 | + detach_by_parent_data = (struct detach_by_parent_data) { | 97 | + |
180 | .parent_b = parent_b, | 98 | + do_drain_begin(drain_type, bs); |
181 | .child_b = child_b, | 99 | + |
182 | .c = c, | 100 | g_assert_cmpint(aio_ret, ==, 0); |
183 | + .by_parent_cb = by_parent_cb, | 101 | g_assert_cmpint(s->drain_count, ==, 1); |
184 | }; | 102 | - bdrv_drain_all_end(); |
185 | - acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, &data); | 103 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); |
186 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL); | 104 | + |
187 | g_assert(acb != NULL); | 105 | + do_drain_end(drain_type, bs); |
188 | 106 | + | |
189 | /* Drain and check the expected result */ | 107 | g_assert_cmpint(s->drain_count, ==, 0); |
190 | bdrv_subtree_drained_begin(parent_b); | 108 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
191 | 109 | ||
192 | - g_assert(data.child_c != NULL); | 110 | + bdrv_unref(backing); |
193 | + g_assert(detach_by_parent_data.child_c != NULL); | 111 | bdrv_unref(bs); |
194 | 112 | blk_unref(blk); | |
195 | g_assert_cmpint(parent_a->refcnt, ==, 1); | ||
196 | g_assert_cmpint(parent_b->refcnt, ==, 1); | ||
197 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | ||
198 | g_assert_cmpint(b->refcnt, ==, 1); | ||
199 | g_assert_cmpint(c->refcnt, ==, 2); | ||
200 | |||
201 | - g_assert(QLIST_FIRST(&parent_b->children) == data.child_c); | ||
202 | - g_assert(QLIST_NEXT(data.child_c, next) == child_a); | ||
203 | + g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c); | ||
204 | + g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a); | ||
205 | g_assert(QLIST_NEXT(child_a, next) == NULL); | ||
206 | |||
207 | g_assert_cmpint(parent_a->quiesce_counter, ==, 1); | ||
208 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_parent_cb(void) | ||
209 | bdrv_unref(c); | ||
210 | } | 113 | } |
211 | 114 | ||
212 | +static void test_detach_by_parent_cb(void) | 115 | +static void test_drv_cb_drain_all(void) |
213 | +{ | 116 | +{ |
214 | + test_detach_indirect(true); | 117 | + test_drv_cb_common(BDRV_DRAIN_ALL, true); |
215 | +} | 118 | +} |
216 | + | 119 | + |
217 | +static void test_detach_by_driver_cb(void) | 120 | +static void test_drv_cb_drain(void) |
218 | +{ | 121 | +{ |
219 | + test_detach_indirect(false); | 122 | + test_drv_cb_common(BDRV_DRAIN, false); |
220 | +} | 123 | +} |
221 | 124 | + | |
222 | int main(int argc, char **argv) | 125 | int main(int argc, char **argv) |
223 | { | 126 | { |
127 | bdrv_init(); | ||
224 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 128 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
225 | g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); | 129 | g_test_init(&argc, &argv, NULL); |
226 | g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); | 130 | |
227 | g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); | 131 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
228 | + g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); | 132 | + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
229 | 133 | ||
230 | ret = g_test_run(); | 134 | return g_test_run(); |
231 | qemu_event_destroy(&done_event); | 135 | } |
232 | -- | 136 | -- |
233 | 2.13.6 | 137 | 2.13.6 |
234 | 138 | ||
235 | 139 | diff view generated by jsdifflib |
1 | This is currently only working correctly for bdrv_drain(), not for | ||
---|---|---|---|
2 | bdrv_drain_all(). Leave a comment for the drain_all case, we'll address | ||
3 | it later. | ||
4 | |||
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
2 | --- | 6 | --- |
3 | tests/test-bdrv-drain.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ | 7 | tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ |
4 | 1 file changed, 130 insertions(+) | 8 | 1 file changed, 45 insertions(+) |
5 | 9 | ||
6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
7 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
8 | --- a/tests/test-bdrv-drain.c | 12 | --- a/tests/test-bdrv-drain.c |
9 | +++ b/tests/test-bdrv-drain.c | 13 | +++ b/tests/test-bdrv-drain.c |
10 | @@ -XXX,XX +XXX,XX @@ static void test_detach_by_drain_subtree(void) | 14 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) |
15 | test_drv_cb_common(BDRV_DRAIN, false); | ||
11 | } | 16 | } |
12 | 17 | ||
13 | 18 | +static void test_quiesce_common(enum drain_type drain_type, bool recursive) | |
14 | +struct detach_by_parent_data { | 19 | +{ |
15 | + BlockDriverState *parent_b; | 20 | + BlockBackend *blk; |
16 | + BdrvChild *child_b; | 21 | + BlockDriverState *bs, *backing; |
17 | + BlockDriverState *c; | ||
18 | + BdrvChild *child_c; | ||
19 | +}; | ||
20 | + | 22 | + |
21 | +static void detach_by_parent_aio_cb(void *opaque, int ret) | 23 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
22 | +{ | 24 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
23 | + struct detach_by_parent_data *data = opaque; | 25 | + &error_abort); |
26 | + blk_insert_bs(blk, bs, &error_abort); | ||
24 | + | 27 | + |
25 | + g_assert_cmpint(ret, ==, 0); | 28 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
26 | + bdrv_unref_child(data->parent_b, data->child_b); | 29 | + bdrv_set_backing_hd(bs, backing, &error_abort); |
27 | + | 30 | + |
28 | + bdrv_ref(data->c); | 31 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); |
29 | + data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", | 32 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); |
30 | + &child_file, &error_abort); | 33 | + |
34 | + do_drain_begin(drain_type, bs); | ||
35 | + | ||
36 | + g_assert_cmpint(bs->quiesce_counter, ==, 1); | ||
37 | + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); | ||
38 | + | ||
39 | + do_drain_end(drain_type, bs); | ||
40 | + | ||
41 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
43 | + | ||
44 | + bdrv_unref(backing); | ||
45 | + bdrv_unref(bs); | ||
46 | + blk_unref(blk); | ||
31 | +} | 47 | +} |
32 | + | 48 | + |
33 | +/* | 49 | +static void test_quiesce_drain_all(void) |
34 | + * Initial graph: | ||
35 | + * | ||
36 | + * PA PB | ||
37 | + * \ / \ | ||
38 | + * A B C | ||
39 | + * | ||
40 | + * PA has a pending write request whose callback changes the child nodes of PB: | ||
41 | + * It removes B and adds C instead. The subtree of PB is drained, which will | ||
42 | + * indirectly drain the write request, too. | ||
43 | + */ | ||
44 | +static void test_detach_by_parent_cb(void) | ||
45 | +{ | 50 | +{ |
46 | + BlockBackend *blk; | 51 | + // XXX drain_all doesn't quiesce |
47 | + BlockDriverState *parent_a, *parent_b, *a, *b, *c; | 52 | + //test_quiesce_common(BDRV_DRAIN_ALL, true); |
48 | + BdrvChild *child_a, *child_b; | ||
49 | + BlockAIOCB *acb; | ||
50 | + struct detach_by_parent_data data; | ||
51 | + | ||
52 | + QEMUIOVector qiov; | ||
53 | + struct iovec iov = { | ||
54 | + .iov_base = NULL, | ||
55 | + .iov_len = 0, | ||
56 | + }; | ||
57 | + qemu_iovec_init_external(&qiov, &iov, 1); | ||
58 | + | ||
59 | + /* Create all involved nodes */ | ||
60 | + parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR, | ||
61 | + &error_abort); | ||
62 | + parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0, | ||
63 | + &error_abort); | ||
64 | + | ||
65 | + a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort); | ||
66 | + b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort); | ||
67 | + c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); | ||
68 | + | ||
69 | + /* blk is a BB for parent-a */ | ||
70 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
71 | + blk_insert_bs(blk, parent_a, &error_abort); | ||
72 | + bdrv_unref(parent_a); | ||
73 | + | ||
74 | + /* Set child relationships */ | ||
75 | + bdrv_ref(b); | ||
76 | + bdrv_ref(a); | ||
77 | + child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_file, &error_abort); | ||
78 | + child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, &error_abort); | ||
79 | + | ||
80 | + bdrv_ref(a); | ||
81 | + bdrv_attach_child(parent_a, a, "PA-A", &child_file, &error_abort); | ||
82 | + | ||
83 | + g_assert_cmpint(parent_a->refcnt, ==, 1); | ||
84 | + g_assert_cmpint(parent_b->refcnt, ==, 1); | ||
85 | + g_assert_cmpint(a->refcnt, ==, 3); | ||
86 | + g_assert_cmpint(b->refcnt, ==, 2); | ||
87 | + g_assert_cmpint(c->refcnt, ==, 1); | ||
88 | + | ||
89 | + g_assert(QLIST_FIRST(&parent_b->children) == child_a); | ||
90 | + g_assert(QLIST_NEXT(child_a, next) == child_b); | ||
91 | + g_assert(QLIST_NEXT(child_b, next) == NULL); | ||
92 | + | ||
93 | + /* Start the evil write request */ | ||
94 | + data = (struct detach_by_parent_data) { | ||
95 | + .parent_b = parent_b, | ||
96 | + .child_b = child_b, | ||
97 | + .c = c, | ||
98 | + }; | ||
99 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, &data); | ||
100 | + g_assert(acb != NULL); | ||
101 | + | ||
102 | + /* Drain and check the expected result */ | ||
103 | + bdrv_subtree_drained_begin(parent_b); | ||
104 | + | ||
105 | + g_assert(data.child_c != NULL); | ||
106 | + | ||
107 | + g_assert_cmpint(parent_a->refcnt, ==, 1); | ||
108 | + g_assert_cmpint(parent_b->refcnt, ==, 1); | ||
109 | + g_assert_cmpint(a->refcnt, ==, 3); | ||
110 | + g_assert_cmpint(b->refcnt, ==, 1); | ||
111 | + g_assert_cmpint(c->refcnt, ==, 2); | ||
112 | + | ||
113 | + g_assert(QLIST_FIRST(&parent_b->children) == data.child_c); | ||
114 | + g_assert(QLIST_NEXT(data.child_c, next) == child_a); | ||
115 | + g_assert(QLIST_NEXT(child_a, next) == NULL); | ||
116 | + | ||
117 | + g_assert_cmpint(parent_a->quiesce_counter, ==, 1); | ||
118 | + g_assert_cmpint(parent_b->quiesce_counter, ==, 1); | ||
119 | + g_assert_cmpint(a->quiesce_counter, ==, 1); | ||
120 | + g_assert_cmpint(b->quiesce_counter, ==, 0); | ||
121 | + g_assert_cmpint(c->quiesce_counter, ==, 1); | ||
122 | + | ||
123 | + bdrv_subtree_drained_end(parent_b); | ||
124 | + | ||
125 | + bdrv_unref(parent_b); | ||
126 | + blk_unref(blk); | ||
127 | + | ||
128 | + /* XXX Once bdrv_close() unref's children instead of just detaching them, | ||
129 | + * this won't be necessary any more. */ | ||
130 | + bdrv_unref(a); | ||
131 | + bdrv_unref(a); | ||
132 | + bdrv_unref(c); | ||
133 | + | ||
134 | + g_assert_cmpint(a->refcnt, ==, 1); | ||
135 | + g_assert_cmpint(b->refcnt, ==, 1); | ||
136 | + g_assert_cmpint(c->refcnt, ==, 1); | ||
137 | + bdrv_unref(a); | ||
138 | + bdrv_unref(b); | ||
139 | + bdrv_unref(c); | ||
140 | +} | 53 | +} |
141 | + | 54 | + |
55 | +static void test_quiesce_drain(void) | ||
56 | +{ | ||
57 | + test_quiesce_common(BDRV_DRAIN, false); | ||
58 | +} | ||
142 | + | 59 | + |
143 | int main(int argc, char **argv) | 60 | int main(int argc, char **argv) |
144 | { | 61 | { |
145 | int ret; | 62 | bdrv_init(); |
146 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 63 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
147 | g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); | 64 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
148 | g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); | 65 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
149 | g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); | 66 | |
150 | + g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); | 67 | + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
151 | 68 | + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | |
152 | ret = g_test_run(); | 69 | + |
153 | qemu_event_destroy(&done_event); | 70 | return g_test_run(); |
71 | } | ||
154 | -- | 72 | -- |
155 | 2.13.6 | 73 | 2.13.6 |
156 | 74 | ||
157 | 75 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Block jobs already paused themselves when their main BlockBackend |
---|---|---|---|
2 | entered a drained section. This is not good enough: We also want to | ||
3 | pause a block job and may not submit new requests if, for example, the | ||
4 | mirror target node should be drained. | ||
2 | 5 | ||
3 | Attach a CoQueue to each in-flight operation so if we need to wait for | 6 | This implements .drained_begin/end callbacks in child_job in order to |
4 | any we can use it to wait instead of just blindly yielding and hoping | 7 | consider all block nodes related to the job, and removes the |
5 | for some operation to wake us. | 8 | BlockBackend callbacks which are unnecessary now because the root of the |
9 | job main BlockBackend is always referenced with a child_job, too. | ||
6 | 10 | ||
7 | A later patch will use this infrastructure to allow requests accessing | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | the same area of the virtual disk to specifically wait for each other. | 12 | --- |
13 | blockjob.c | 22 +++++++++------------- | ||
14 | 1 file changed, 9 insertions(+), 13 deletions(-) | ||
9 | 15 | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 16 | diff --git a/blockjob.c b/blockjob.c |
11 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
12 | Message-id: 20180613181823.13618-4-mreitz@redhat.com | ||
13 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
14 | --- | ||
15 | block/mirror.c | 34 +++++++++++++++++++++++----------- | ||
16 | 1 file changed, 23 insertions(+), 11 deletions(-) | ||
17 | |||
18 | diff --git a/block/mirror.c b/block/mirror.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/mirror.c | 18 | --- a/blockjob.c |
21 | +++ b/block/mirror.c | 19 | +++ b/blockjob.c |
22 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c) |
23 | 21 | job->id); | |
24 | #include "qemu/osdep.h" | ||
25 | #include "qemu/cutils.h" | ||
26 | +#include "qemu/coroutine.h" | ||
27 | #include "trace.h" | ||
28 | #include "block/blockjob_int.h" | ||
29 | #include "block/block_int.h" | ||
30 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBuffer { | ||
31 | QSIMPLEQ_ENTRY(MirrorBuffer) next; | ||
32 | } MirrorBuffer; | ||
33 | |||
34 | +typedef struct MirrorOp MirrorOp; | ||
35 | + | ||
36 | typedef struct MirrorBlockJob { | ||
37 | BlockJob common; | ||
38 | BlockBackend *target; | ||
39 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob { | ||
40 | unsigned long *in_flight_bitmap; | ||
41 | int in_flight; | ||
42 | int64_t bytes_in_flight; | ||
43 | + QTAILQ_HEAD(MirrorOpList, MirrorOp) ops_in_flight; | ||
44 | int ret; | ||
45 | bool unmap; | ||
46 | - bool waiting_for_io; | ||
47 | int target_cluster_size; | ||
48 | int max_iov; | ||
49 | bool initial_zeroing_ongoing; | ||
50 | } MirrorBlockJob; | ||
51 | |||
52 | -typedef struct MirrorOp { | ||
53 | +struct MirrorOp { | ||
54 | MirrorBlockJob *s; | ||
55 | QEMUIOVector qiov; | ||
56 | int64_t offset; | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp { | ||
58 | /* The pointee is set by mirror_co_read(), mirror_co_zero(), and | ||
59 | * mirror_co_discard() before yielding for the first time */ | ||
60 | int64_t *bytes_handled; | ||
61 | -} MirrorOp; | ||
62 | + | ||
63 | + CoQueue waiting_requests; | ||
64 | + | ||
65 | + QTAILQ_ENTRY(MirrorOp) next; | ||
66 | +}; | ||
67 | |||
68 | typedef enum MirrorMethod { | ||
69 | MIRROR_METHOD_COPY, | ||
70 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) | ||
71 | |||
72 | chunk_num = op->offset / s->granularity; | ||
73 | nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); | ||
74 | + | ||
75 | bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); | ||
76 | + QTAILQ_REMOVE(&s->ops_in_flight, op, next); | ||
77 | if (ret >= 0) { | ||
78 | if (s->cow_bitmap) { | ||
79 | bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) | ||
81 | } | ||
82 | } | ||
83 | qemu_iovec_destroy(&op->qiov); | ||
84 | - g_free(op); | ||
85 | |||
86 | - if (s->waiting_for_io) { | ||
87 | - qemu_coroutine_enter(s->common.job.co); | ||
88 | - } | ||
89 | + qemu_co_queue_restart_all(&op->waiting_requests); | ||
90 | + g_free(op); | ||
91 | } | 22 | } |
92 | 23 | ||
93 | static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret) | 24 | -static const BdrvChildRole child_job = { |
94 | @@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, | 25 | - .get_parent_desc = child_job_get_parent_desc, |
95 | 26 | - .stay_at_node = true, | |
96 | static inline void mirror_wait_for_io(MirrorBlockJob *s) | 27 | -}; |
28 | - | ||
29 | -static void block_job_drained_begin(void *opaque) | ||
30 | +static void child_job_drained_begin(BdrvChild *c) | ||
97 | { | 31 | { |
98 | - assert(!s->waiting_for_io); | 32 | - BlockJob *job = opaque; |
99 | - s->waiting_for_io = true; | 33 | + BlockJob *job = c->opaque; |
100 | - qemu_coroutine_yield(); | 34 | block_job_pause(job); |
101 | - s->waiting_for_io = false; | ||
102 | + MirrorOp *op; | ||
103 | + | ||
104 | + op = QTAILQ_FIRST(&s->ops_in_flight); | ||
105 | + assert(op); | ||
106 | + qemu_co_queue_wait(&op->waiting_requests, NULL); | ||
107 | } | 35 | } |
108 | 36 | ||
109 | /* Perform a mirror copy operation. | 37 | -static void block_job_drained_end(void *opaque) |
110 | @@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | 38 | +static void child_job_drained_end(BdrvChild *c) |
111 | .bytes = bytes, | 39 | { |
112 | .bytes_handled = &bytes_handled, | 40 | - BlockJob *job = opaque; |
113 | }; | 41 | + BlockJob *job = c->opaque; |
114 | + qemu_co_queue_init(&op->waiting_requests); | 42 | block_job_resume(job); |
115 | 43 | } | |
116 | switch (mirror_method) { | 44 | |
117 | case MIRROR_METHOD_COPY: | 45 | -static const BlockDevOps block_job_dev_ops = { |
118 | @@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | 46 | - .drained_begin = block_job_drained_begin, |
119 | abort(); | 47 | - .drained_end = block_job_drained_end, |
120 | } | 48 | +static const BdrvChildRole child_job = { |
121 | 49 | + .get_parent_desc = child_job_get_parent_desc, | |
122 | + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); | 50 | + .drained_begin = child_job_drained_begin, |
123 | qemu_coroutine_enter(co); | 51 | + .drained_end = child_job_drained_end, |
124 | /* At this point, ownership of op has been moved to the coroutine | 52 | + .stay_at_node = true, |
125 | * and the object may already be freed */ | 53 | }; |
126 | @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, | 54 | |
127 | } | 55 | void block_job_remove_all_bdrv(BlockJob *job) |
128 | } | 56 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, |
129 | 57 | block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | |
130 | + QTAILQ_INIT(&s->ops_in_flight); | 58 | bs->job = job; |
131 | + | 59 | |
132 | trace_mirror_start(bs, s, opaque); | 60 | - blk_set_dev_ops(blk, &block_job_dev_ops, job); |
133 | job_start(&s->common.job); | 61 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); |
134 | return; | 62 | |
63 | QLIST_INSERT_HEAD(&block_jobs, job, job_list); | ||
135 | -- | 64 | -- |
136 | 2.13.6 | 65 | 2.13.6 |
137 | 66 | ||
138 | 67 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Block jobs must be paused if any of the involved nodes are drained. |
---|---|---|---|
2 | 2 | ||
3 | This patch adds two bdrv-drain tests for what happens if some BDS goes | ||
4 | away during the drainage. | ||
5 | |||
6 | The basic idea is that you have a parent BDS with some child nodes. | ||
7 | Then, you drain one of the children. Because of that, the party who | ||
8 | actually owns the parent decides to (A) delete it, or (B) detach all its | ||
9 | children from it -- both while the child is still being drained. | ||
10 | |||
11 | A real-world case where this can happen is the mirror block job, which | ||
12 | may exit if you drain one of its children. | ||
13 | |||
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
16 | --- | 4 | --- |
17 | tests/test-bdrv-drain.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++ | 5 | tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ |
18 | 1 file changed, 169 insertions(+) | 6 | 1 file changed, 121 insertions(+) |
19 | 7 | ||
20 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
21 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/tests/test-bdrv-drain.c | 10 | --- a/tests/test-bdrv-drain.c |
23 | +++ b/tests/test-bdrv-drain.c | 11 | +++ b/tests/test-bdrv-drain.c |
24 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain_subtree(void) | 12 | @@ -XXX,XX +XXX,XX @@ |
25 | test_blockjob_common(BDRV_SUBTREE_DRAIN); | 13 | |
14 | #include "qemu/osdep.h" | ||
15 | #include "block/block.h" | ||
16 | +#include "block/blockjob_int.h" | ||
17 | #include "sysemu/block-backend.h" | ||
18 | #include "qapi/error.h" | ||
19 | |||
20 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | ||
21 | test_quiesce_common(BDRV_DRAIN, false); | ||
26 | } | 22 | } |
27 | 23 | ||
28 | + | 24 | + |
29 | +typedef struct BDRVTestTopState { | 25 | +typedef struct TestBlockJob { |
30 | + BdrvChild *wait_child; | 26 | + BlockJob common; |
31 | +} BDRVTestTopState; | 27 | + bool should_complete; |
28 | +} TestBlockJob; | ||
32 | + | 29 | + |
33 | +static void bdrv_test_top_close(BlockDriverState *bs) | 30 | +static void test_job_completed(BlockJob *job, void *opaque) |
34 | +{ | 31 | +{ |
35 | + BdrvChild *c, *next_c; | 32 | + block_job_completed(job, 0); |
36 | + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { | ||
37 | + bdrv_unref_child(bs, c); | ||
38 | + } | ||
39 | +} | 33 | +} |
40 | + | 34 | + |
41 | +static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs, | 35 | +static void coroutine_fn test_job_start(void *opaque) |
42 | + uint64_t offset, uint64_t bytes, | ||
43 | + QEMUIOVector *qiov, int flags) | ||
44 | +{ | 36 | +{ |
45 | + BDRVTestTopState *tts = bs->opaque; | 37 | + TestBlockJob *s = opaque; |
46 | + return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags); | 38 | + |
39 | + while (!s->should_complete) { | ||
40 | + block_job_sleep_ns(&s->common, 100000); | ||
41 | + } | ||
42 | + | ||
43 | + block_job_defer_to_main_loop(&s->common, test_job_completed, NULL); | ||
47 | +} | 44 | +} |
48 | + | 45 | + |
49 | +static BlockDriver bdrv_test_top_driver = { | 46 | +static void test_job_complete(BlockJob *job, Error **errp) |
50 | + .format_name = "test_top_driver", | 47 | +{ |
51 | + .instance_size = sizeof(BDRVTestTopState), | 48 | + TestBlockJob *s = container_of(job, TestBlockJob, common); |
49 | + s->should_complete = true; | ||
50 | +} | ||
52 | + | 51 | + |
53 | + .bdrv_close = bdrv_test_top_close, | 52 | +BlockJobDriver test_job_driver = { |
54 | + .bdrv_co_preadv = bdrv_test_top_co_preadv, | 53 | + .instance_size = sizeof(TestBlockJob), |
55 | + | 54 | + .start = test_job_start, |
56 | + .bdrv_child_perm = bdrv_format_default_perms, | 55 | + .complete = test_job_complete, |
57 | +}; | 56 | +}; |
58 | + | 57 | + |
59 | +typedef struct TestCoDeleteByDrainData { | 58 | +static void test_blockjob_common(enum drain_type drain_type) |
60 | + BlockBackend *blk; | 59 | +{ |
61 | + bool detach_instead_of_delete; | 60 | + BlockBackend *blk_src, *blk_target; |
62 | + bool done; | 61 | + BlockDriverState *src, *target; |
63 | +} TestCoDeleteByDrainData; | 62 | + BlockJob *job; |
63 | + int ret; | ||
64 | + | 64 | + |
65 | +static void coroutine_fn test_co_delete_by_drain(void *opaque) | 65 | + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, |
66 | +{ | 66 | + &error_abort); |
67 | + TestCoDeleteByDrainData *dbdd = opaque; | 67 | + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
68 | + BlockBackend *blk = dbdd->blk; | 68 | + blk_insert_bs(blk_src, src, &error_abort); |
69 | + BlockDriverState *bs = blk_bs(blk); | ||
70 | + BDRVTestTopState *tts = bs->opaque; | ||
71 | + void *buffer = g_malloc(65536); | ||
72 | + QEMUIOVector qiov; | ||
73 | + struct iovec iov = { | ||
74 | + .iov_base = buffer, | ||
75 | + .iov_len = 65536, | ||
76 | + }; | ||
77 | + | 69 | + |
78 | + qemu_iovec_init_external(&qiov, &iov, 1); | 70 | + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, |
71 | + &error_abort); | ||
72 | + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
73 | + blk_insert_bs(blk_target, target, &error_abort); | ||
79 | + | 74 | + |
80 | + /* Pretend some internal write operation from parent to child. | 75 | + job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0, |
81 | + * Important: We have to read from the child, not from the parent! | 76 | + 0, NULL, NULL, &error_abort); |
82 | + * Draining works by first propagating it all up the tree to the | 77 | + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); |
83 | + * root and then waiting for drainage from root to the leaves | 78 | + block_job_start(job); |
84 | + * (protocol nodes). If we have a request waiting on the root, | ||
85 | + * everything will be drained before we go back down the tree, but | ||
86 | + * we do not want that. We want to be in the middle of draining | ||
87 | + * when this following requests returns. */ | ||
88 | + bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0); | ||
89 | + | 79 | + |
90 | + g_assert_cmpint(bs->refcnt, ==, 1); | 80 | + g_assert_cmpint(job->pause_count, ==, 0); |
81 | + g_assert_false(job->paused); | ||
82 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
91 | + | 83 | + |
92 | + if (!dbdd->detach_instead_of_delete) { | 84 | + do_drain_begin(drain_type, src); |
93 | + blk_unref(blk); | 85 | + |
86 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
87 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
88 | + * additional block_job_pause_all() */ | ||
89 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
94 | + } else { | 90 | + } else { |
95 | + BdrvChild *c, *next_c; | 91 | + g_assert_cmpint(job->pause_count, ==, 1); |
96 | + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { | ||
97 | + bdrv_unref_child(bs, c); | ||
98 | + } | ||
99 | + } | 92 | + } |
93 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
94 | + /* g_assert_true(job->paused); */ | ||
95 | + g_assert_false(job->busy); /* The job is paused */ | ||
100 | + | 96 | + |
101 | + dbdd->done = true; | 97 | + do_drain_end(drain_type, src); |
98 | + | ||
99 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
100 | + g_assert_false(job->paused); | ||
101 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
102 | + | ||
103 | + do_drain_begin(drain_type, target); | ||
104 | + | ||
105 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
106 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
107 | + * additional block_job_pause_all() */ | ||
108 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
109 | + } else { | ||
110 | + g_assert_cmpint(job->pause_count, ==, 1); | ||
111 | + } | ||
112 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
113 | + /* g_assert_true(job->paused); */ | ||
114 | + g_assert_false(job->busy); /* The job is paused */ | ||
115 | + | ||
116 | + do_drain_end(drain_type, target); | ||
117 | + | ||
118 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
119 | + g_assert_false(job->paused); | ||
120 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
121 | + | ||
122 | + ret = block_job_complete_sync(job, &error_abort); | ||
123 | + g_assert_cmpint(ret, ==, 0); | ||
124 | + | ||
125 | + blk_unref(blk_src); | ||
126 | + blk_unref(blk_target); | ||
127 | + bdrv_unref(src); | ||
128 | + bdrv_unref(target); | ||
102 | +} | 129 | +} |
103 | + | 130 | + |
104 | +/** | 131 | +static void test_blockjob_drain_all(void) |
105 | + * Test what happens when some BDS has some children, you drain one of | ||
106 | + * them and this results in the BDS being deleted. | ||
107 | + * | ||
108 | + * If @detach_instead_of_delete is set, the BDS is not going to be | ||
109 | + * deleted but will only detach all of its children. | ||
110 | + */ | ||
111 | +static void do_test_delete_by_drain(bool detach_instead_of_delete) | ||
112 | +{ | 132 | +{ |
113 | + BlockBackend *blk; | 133 | + test_blockjob_common(BDRV_DRAIN_ALL); |
114 | + BlockDriverState *bs, *child_bs, *null_bs; | ||
115 | + BDRVTestTopState *tts; | ||
116 | + TestCoDeleteByDrainData dbdd; | ||
117 | + Coroutine *co; | ||
118 | + | ||
119 | + bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR, | ||
120 | + &error_abort); | ||
121 | + bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; | ||
122 | + tts = bs->opaque; | ||
123 | + | ||
124 | + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, | ||
125 | + &error_abort); | ||
126 | + bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); | ||
127 | + | ||
128 | + /* This child will be the one to pass to requests through to, and | ||
129 | + * it will stall until a drain occurs */ | ||
130 | + child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR, | ||
131 | + &error_abort); | ||
132 | + child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; | ||
133 | + /* Takes our reference to child_bs */ | ||
134 | + tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_file, | ||
135 | + &error_abort); | ||
136 | + | ||
137 | + /* This child is just there to be deleted | ||
138 | + * (for detach_instead_of_delete == true) */ | ||
139 | + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, | ||
140 | + &error_abort); | ||
141 | + bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); | ||
142 | + | ||
143 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
144 | + blk_insert_bs(blk, bs, &error_abort); | ||
145 | + | ||
146 | + /* Referenced by blk now */ | ||
147 | + bdrv_unref(bs); | ||
148 | + | ||
149 | + g_assert_cmpint(bs->refcnt, ==, 1); | ||
150 | + g_assert_cmpint(child_bs->refcnt, ==, 1); | ||
151 | + g_assert_cmpint(null_bs->refcnt, ==, 1); | ||
152 | + | ||
153 | + | ||
154 | + dbdd = (TestCoDeleteByDrainData){ | ||
155 | + .blk = blk, | ||
156 | + .detach_instead_of_delete = detach_instead_of_delete, | ||
157 | + .done = false, | ||
158 | + }; | ||
159 | + co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd); | ||
160 | + qemu_coroutine_enter(co); | ||
161 | + | ||
162 | + /* Drain the child while the read operation is still pending. | ||
163 | + * This should result in the operation finishing and | ||
164 | + * test_co_delete_by_drain() resuming. Thus, @bs will be deleted | ||
165 | + * and the coroutine will exit while this drain operation is still | ||
166 | + * in progress. */ | ||
167 | + bdrv_ref(child_bs); | ||
168 | + bdrv_drain(child_bs); | ||
169 | + bdrv_unref(child_bs); | ||
170 | + | ||
171 | + while (!dbdd.done) { | ||
172 | + aio_poll(qemu_get_aio_context(), true); | ||
173 | + } | ||
174 | + | ||
175 | + if (detach_instead_of_delete) { | ||
176 | + /* Here, the reference has not passed over to the coroutine, | ||
177 | + * so we have to delete the BB ourselves */ | ||
178 | + blk_unref(blk); | ||
179 | + } | ||
180 | +} | 134 | +} |
181 | + | 135 | + |
182 | + | 136 | +static void test_blockjob_drain(void) |
183 | +static void test_delete_by_drain(void) | ||
184 | +{ | 137 | +{ |
185 | + do_test_delete_by_drain(false); | 138 | + test_blockjob_common(BDRV_DRAIN); |
186 | +} | 139 | +} |
187 | + | ||
188 | +static void test_detach_by_drain(void) | ||
189 | +{ | ||
190 | + do_test_delete_by_drain(true); | ||
191 | +} | ||
192 | + | ||
193 | + | 140 | + |
194 | int main(int argc, char **argv) | 141 | int main(int argc, char **argv) |
195 | { | 142 | { |
196 | int ret; | 143 | bdrv_init(); |
197 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 144 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
198 | g_test_add_func("/bdrv-drain/blockjob/drain_subtree", | 145 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
199 | test_blockjob_drain_subtree); | 146 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
200 | 147 | ||
201 | + g_test_add_func("/bdrv-drain/deletion", test_delete_by_drain); | 148 | + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
202 | + g_test_add_func("/bdrv-drain/detach", test_detach_by_drain); | 149 | + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
203 | + | 150 | + |
204 | ret = g_test_run(); | 151 | return g_test_run(); |
205 | qemu_event_destroy(&done_event); | 152 | } |
206 | return ret; | ||
207 | -- | 153 | -- |
208 | 2.13.6 | 154 | 2.13.6 |
209 | 155 | ||
210 | 156 | diff view generated by jsdifflib |
1 | Before we can introduce a single polling loop for all nodes in | 1 | Block jobs are already paused using the BdrvChildRole drain callbacks, |
---|---|---|---|
2 | bdrv_drain_all_begin(), we must make sure to run it outside of coroutine | 2 | so we don't need an additional block_job_pause_all() call. |
3 | context like we already do for bdrv_do_drained_begin(). | ||
4 | 3 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | --- | 5 | --- |
7 | block/io.c | 22 +++++++++++++++++----- | 6 | block/io.c | 4 ---- |
8 | 1 file changed, 17 insertions(+), 5 deletions(-) | 7 | tests/test-bdrv-drain.c | 10 ++++------ |
8 | 2 files changed, 4 insertions(+), 10 deletions(-) | ||
9 | 9 | ||
10 | diff --git a/block/io.c b/block/io.c | 10 | diff --git a/block/io.c b/block/io.c |
11 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/block/io.c | 12 | --- a/block/io.c |
13 | +++ b/block/io.c | 13 | +++ b/block/io.c |
14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
15 | Coroutine *co = data->co; | 15 | * context. */ |
16 | BlockDriverState *bs = data->bs; | 16 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); |
17 | 17 | ||
18 | - bdrv_dec_in_flight(bs); | 18 | - block_job_pause_all(); |
19 | - if (data->begin) { | 19 | - |
20 | - bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); | 20 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
21 | + if (bs) { | 21 | AioContext *aio_context = bdrv_get_aio_context(bs); |
22 | + bdrv_dec_in_flight(bs); | 22 | |
23 | + if (data->begin) { | 23 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
24 | + bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); | 24 | aio_enable_external(aio_context); |
25 | + } else { | 25 | aio_context_release(aio_context); |
26 | + bdrv_do_drained_end(bs, data->recursive, data->parent); | 26 | } |
27 | + } | 27 | - |
28 | - block_job_resume_all(); | ||
29 | } | ||
30 | |||
31 | void bdrv_drain_all(void) | ||
32 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/tests/test-bdrv-drain.c | ||
35 | +++ b/tests/test-bdrv-drain.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
37 | do_drain_begin(drain_type, src); | ||
38 | |||
39 | if (drain_type == BDRV_DRAIN_ALL) { | ||
40 | - /* bdrv_drain_all() drains both src and target, and involves an | ||
41 | - * additional block_job_pause_all() */ | ||
42 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
43 | + /* bdrv_drain_all() drains both src and target */ | ||
44 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
28 | } else { | 45 | } else { |
29 | - bdrv_do_drained_end(bs, data->recursive, data->parent); | 46 | g_assert_cmpint(job->pause_count, ==, 1); |
30 | + assert(data->begin); | ||
31 | + bdrv_drain_all_begin(); | ||
32 | } | 47 | } |
33 | 48 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | |
34 | data->done = true; | 49 | do_drain_begin(drain_type, target); |
35 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | 50 | |
36 | .parent = parent, | 51 | if (drain_type == BDRV_DRAIN_ALL) { |
37 | .poll = poll, | 52 | - /* bdrv_drain_all() drains both src and target, and involves an |
38 | }; | 53 | - * additional block_job_pause_all() */ |
39 | - bdrv_inc_in_flight(bs); | 54 | - g_assert_cmpint(job->pause_count, ==, 3); |
40 | + if (bs) { | 55 | + /* bdrv_drain_all() drains both src and target */ |
41 | + bdrv_inc_in_flight(bs); | 56 | + g_assert_cmpint(job->pause_count, ==, 2); |
42 | + } | 57 | } else { |
43 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | 58 | g_assert_cmpint(job->pause_count, ==, 1); |
44 | bdrv_co_drain_bh_cb, &data); | 59 | } |
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
47 | BlockDriverState *bs; | ||
48 | BdrvNextIterator it; | ||
49 | |||
50 | + if (qemu_in_coroutine()) { | ||
51 | + bdrv_co_yield_to_drain(NULL, true, false, NULL, true); | ||
52 | + return; | ||
53 | + } | ||
54 | + | ||
55 | /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread | ||
56 | * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on | ||
57 | * nodes in several different AioContexts, so make sure we're in the main | ||
58 | -- | 60 | -- |
59 | 2.13.6 | 61 | 2.13.6 |
60 | 62 | ||
61 | 63 | diff view generated by jsdifflib |
1 | bdrv_do_drained_begin() is only safe if we have a single | 1 | bdrv_do_drained_begin() restricts the call of parent callbacks and |
---|---|---|---|
2 | BDRV_POLL_WHILE() after quiescing all affected nodes. We cannot allow | 2 | aio_disable_external() to the outermost drain section, but the block |
3 | that parent callbacks introduce a nested polling loop that could cause | 3 | driver callbacks are always called. bdrv_do_drained_end() must match |
4 | graph changes while we're traversing the graph. | 4 | this behaviour, otherwise nodes stay drained even if begin/end calls |
5 | 5 | were balanced. | |
6 | Split off bdrv_do_drained_begin_quiesce(), which only quiesces a single | ||
7 | node without waiting for its requests to complete. These requests will | ||
8 | be waited for in the BDRV_POLL_WHILE() call down the call chain. | ||
9 | 6 | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 8 | --- |
12 | include/block/block.h | 9 +++++++++ | 9 | block/io.c | 12 +++++++----- |
13 | block.c | 2 +- | 10 | 1 file changed, 7 insertions(+), 5 deletions(-) |
14 | block/io.c | 24 ++++++++++++++++-------- | ||
15 | 3 files changed, 26 insertions(+), 9 deletions(-) | ||
16 | 11 | ||
17 | diff --git a/include/block/block.h b/include/block/block.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/block/block.h | ||
20 | +++ b/include/block/block.h | ||
21 | @@ -XXX,XX +XXX,XX @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | ||
22 | void bdrv_drained_begin(BlockDriverState *bs); | ||
23 | |||
24 | /** | ||
25 | + * bdrv_do_drained_begin_quiesce: | ||
26 | + * | ||
27 | + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already | ||
28 | + * running requests to complete. | ||
29 | + */ | ||
30 | +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | ||
31 | + BdrvChild *parent); | ||
32 | + | ||
33 | +/** | ||
34 | * Like bdrv_drained_begin, but recursively begins a quiesced section for | ||
35 | * exclusive access to all child nodes as well. | ||
36 | */ | ||
37 | diff --git a/block.c b/block.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/block.c | ||
40 | +++ b/block.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) | ||
42 | static void bdrv_child_cb_drained_begin(BdrvChild *child) | ||
43 | { | ||
44 | BlockDriverState *bs = child->opaque; | ||
45 | - bdrv_drained_begin(bs); | ||
46 | + bdrv_do_drained_begin_quiesce(bs, NULL); | ||
47 | } | ||
48 | |||
49 | static bool bdrv_child_cb_drained_poll(BdrvChild *child) | ||
50 | diff --git a/block/io.c b/block/io.c | 12 | diff --git a/block/io.c b/block/io.c |
51 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/block/io.c | 14 | --- a/block/io.c |
53 | +++ b/block/io.c | 15 | +++ b/block/io.c |
54 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | 16 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
55 | assert(data.done); | 17 | |
56 | } | 18 | void bdrv_drained_end(BlockDriverState *bs) |
57 | |||
58 | -void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
59 | - BdrvChild *parent, bool poll) | ||
60 | +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | ||
61 | + BdrvChild *parent) | ||
62 | { | 19 | { |
63 | - BdrvChild *child, *next; | 20 | + int old_quiesce_counter; |
64 | - | 21 | + |
65 | - if (qemu_in_coroutine()) { | 22 | if (qemu_in_coroutine()) { |
66 | - bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); | 23 | bdrv_co_yield_to_drain(bs, false); |
24 | return; | ||
25 | } | ||
26 | assert(bs->quiesce_counter > 0); | ||
27 | - if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { | ||
67 | - return; | 28 | - return; |
68 | - } | 29 | - } |
69 | + assert(!qemu_in_coroutine()); | 30 | + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); |
70 | 31 | ||
71 | /* Stop things in parent-to-child order */ | 32 | /* Re-enable things in child-to-parent order */ |
72 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | 33 | bdrv_drain_invoke(bs, false, false); |
73 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | 34 | - bdrv_parent_drained_end(bs); |
74 | 35 | - aio_enable_external(bdrv_get_aio_context(bs)); | |
75 | bdrv_parent_drained_begin(bs, parent); | 36 | + if (old_quiesce_counter == 1) { |
76 | bdrv_drain_invoke(bs, true); | 37 | + bdrv_parent_drained_end(bs); |
77 | +} | 38 | + aio_enable_external(bdrv_get_aio_context(bs)); |
78 | + | ||
79 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
80 | + BdrvChild *parent, bool poll) | ||
81 | +{ | ||
82 | + BdrvChild *child, *next; | ||
83 | + | ||
84 | + if (qemu_in_coroutine()) { | ||
85 | + bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); | ||
86 | + return; | ||
87 | + } | 39 | + } |
88 | + | 40 | } |
89 | + bdrv_do_drained_begin_quiesce(bs, parent); | 41 | |
90 | 42 | /* | |
91 | if (recursive) { | ||
92 | bs->recursive_quiesce_counter++; | ||
93 | -- | 43 | -- |
94 | 2.13.6 | 44 | 2.13.6 |
95 | 45 | ||
96 | 46 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
---|---|---|---|
2 | --- | ||
3 | tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ | ||
4 | 1 file changed, 57 insertions(+) | ||
2 | 5 | ||
3 | This new function allows to look for a consecutively dirty area in a | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
4 | dirty bitmap. | ||
5 | |||
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
7 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
8 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
9 | Message-id: 20180613181823.13618-10-mreitz@redhat.com | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
11 | --- | ||
12 | include/block/dirty-bitmap.h | 2 ++ | ||
13 | block/dirty-bitmap.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ | ||
14 | 2 files changed, 57 insertions(+) | ||
15 | |||
16 | diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/block/dirty-bitmap.h | 8 | --- a/tests/test-bdrv-drain.c |
19 | +++ b/include/block/dirty-bitmap.h | 9 | +++ b/tests/test-bdrv-drain.c |
20 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, | 10 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
21 | void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, | 11 | enum drain_type { |
22 | int64_t offset, int64_t bytes); | 12 | BDRV_DRAIN_ALL, |
23 | int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); | 13 | BDRV_DRAIN, |
24 | +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, | 14 | + DRAIN_TYPE_MAX, |
25 | + uint64_t *offset, int *bytes); | 15 | }; |
26 | void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); | 16 | |
27 | int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); | 17 | static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
28 | int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); | 18 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
29 | diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c | 19 | test_quiesce_common(BDRV_DRAIN, false); |
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/block/dirty-bitmap.c | ||
32 | +++ b/block/dirty-bitmap.c | ||
33 | @@ -XXX,XX +XXX,XX @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) | ||
34 | return hbitmap_iter_next(&iter->hbi, true); | ||
35 | } | 20 | } |
36 | 21 | ||
37 | +/** | 22 | +static void test_nested(void) |
38 | + * Return the next consecutively dirty area in the dirty bitmap | ||
39 | + * belonging to the given iterator @iter. | ||
40 | + * | ||
41 | + * @max_offset: Maximum value that may be returned for | ||
42 | + * *offset + *bytes | ||
43 | + * @offset: Will contain the start offset of the next dirty area | ||
44 | + * @bytes: Will contain the length of the next dirty area | ||
45 | + * | ||
46 | + * Returns: True if a dirty area could be found before max_offset | ||
47 | + * (which means that *offset and *bytes then contain valid | ||
48 | + * values), false otherwise. | ||
49 | + * | ||
50 | + * Note that @iter is never advanced if false is returned. If an area | ||
51 | + * is found (which means that true is returned), it will be advanced | ||
52 | + * past that area. | ||
53 | + */ | ||
54 | +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, | ||
55 | + uint64_t *offset, int *bytes) | ||
56 | +{ | 23 | +{ |
57 | + uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap); | 24 | + BlockBackend *blk; |
58 | + uint64_t gran_max_offset; | 25 | + BlockDriverState *bs, *backing; |
59 | + int64_t ret; | 26 | + BDRVTestState *s, *backing_s; |
60 | + int size; | 27 | + enum drain_type outer, inner; |
61 | + | 28 | + |
62 | + if (max_offset == iter->bitmap->size) { | 29 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
63 | + /* If max_offset points to the image end, round it up by the | 30 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
64 | + * bitmap granularity */ | 31 | + &error_abort); |
65 | + gran_max_offset = ROUND_UP(max_offset, granularity); | 32 | + s = bs->opaque; |
66 | + } else { | 33 | + blk_insert_bs(blk, bs, &error_abort); |
67 | + gran_max_offset = max_offset; | 34 | + |
35 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
36 | + backing_s = backing->opaque; | ||
37 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
38 | + | ||
39 | + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { | ||
40 | + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { | ||
41 | + /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
42 | + int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
43 | + (inner != BDRV_DRAIN_ALL); | ||
44 | + int backing_quiesce = 0; | ||
45 | + int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
46 | + (inner != BDRV_DRAIN); | ||
47 | + | ||
48 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
49 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
50 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
51 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
52 | + | ||
53 | + do_drain_begin(outer, bs); | ||
54 | + do_drain_begin(inner, bs); | ||
55 | + | ||
56 | + g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); | ||
57 | + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); | ||
58 | + g_assert_cmpint(s->drain_count, ==, 2); | ||
59 | + g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); | ||
60 | + | ||
61 | + do_drain_end(inner, bs); | ||
62 | + do_drain_end(outer, bs); | ||
63 | + | ||
64 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
65 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
66 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
67 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
68 | + } | ||
68 | + } | 69 | + } |
69 | + | 70 | + |
70 | + ret = hbitmap_iter_next(&iter->hbi, false); | 71 | + bdrv_unref(backing); |
71 | + if (ret < 0 || ret + granularity > gran_max_offset) { | 72 | + bdrv_unref(bs); |
72 | + return false; | 73 | + blk_unref(blk); |
73 | + } | ||
74 | + | ||
75 | + *offset = ret; | ||
76 | + size = 0; | ||
77 | + | ||
78 | + assert(granularity <= INT_MAX); | ||
79 | + | ||
80 | + do { | ||
81 | + /* Advance iterator */ | ||
82 | + ret = hbitmap_iter_next(&iter->hbi, true); | ||
83 | + size += granularity; | ||
84 | + } while (ret + granularity <= gran_max_offset && | ||
85 | + hbitmap_iter_next(&iter->hbi, false) == ret + granularity && | ||
86 | + size <= INT_MAX - granularity); | ||
87 | + | ||
88 | + *bytes = MIN(size, max_offset - *offset); | ||
89 | + return true; | ||
90 | +} | 74 | +} |
91 | + | 75 | + |
92 | /* Called within bdrv_dirty_bitmap_lock..unlock */ | 76 | |
93 | void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, | 77 | typedef struct TestBlockJob { |
94 | int64_t offset, int64_t bytes) | 78 | BlockJob common; |
79 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
80 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
81 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
82 | |||
83 | + g_test_add_func("/bdrv-drain/nested", test_nested); | ||
84 | + | ||
85 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
86 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
87 | |||
95 | -- | 88 | -- |
96 | 2.13.6 | 89 | 2.13.6 |
97 | 90 | ||
98 | 91 | diff view generated by jsdifflib |
1 | In the future, bdrv_drained_all_begin/end() will drain all invidiual | 1 | This is in preparation for subtree drains, i.e. drained sections that |
---|---|---|---|
2 | nodes separately rather than whole subtrees. This means that we don't | 2 | affect not only a single node, but recursively all child nodes, too. |
3 | want to propagate the drain to all parents any more: If the parent is a | 3 | |
4 | BDS, it will already be drained separately. Recursing to all parents is | 4 | Calling the parent callbacks for drain is pointless when we just came |
5 | unnecessary work and would make it an O(n²) operation. | 5 | from that parent node recursively and leads to multiple increases of |
6 | 6 | bs->quiesce_counter in a single drain call. Don't do it. | |
7 | Prepare the drain function for the changed drain_all by adding an | 7 | |
8 | ignore_bds_parents parameter to the internal implementation that | 8 | In order for this to work correctly, the parent callback must be called |
9 | prevents the propagation of the drain to BDS parents. We still (have to) | 9 | for every bdrv_drain_begin/end() call, not only for the outermost one: |
10 | propagate it to non-BDS parents like BlockBackends or Jobs because those | 10 | |
11 | are not drained separately. | 11 | If we have a node N with two parents A and B, recursive draining of A |
12 | should cause the quiesce_counter of B to increase because its child N is | ||
13 | drained independently of B. If now B is recursively drained, too, A must | ||
14 | increase its quiesce_counter because N is drained independently of A | ||
15 | only now, even if N is going from quiesce_counter 1 to 2. | ||
12 | 16 | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 18 | --- |
15 | include/block/block.h | 16 ++++++--- | 19 | include/block/block.h | 4 ++-- |
16 | include/block/block_int.h | 6 ++++ | 20 | block.c | 13 +++++++++---- |
17 | block.c | 11 +++--- | 21 | block/io.c | 47 ++++++++++++++++++++++++++++++++++------------- |
18 | block/io.c | 88 ++++++++++++++++++++++++++++------------------- | 22 | 3 files changed, 45 insertions(+), 19 deletions(-) |
19 | block/vvfat.c | 1 + | ||
20 | 5 files changed, 78 insertions(+), 44 deletions(-) | ||
21 | 23 | ||
22 | diff --git a/include/block/block.h b/include/block/block.h | 24 | diff --git a/include/block/block.h b/include/block/block.h |
23 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/include/block/block.h | 26 | --- a/include/block/block.h |
25 | +++ b/include/block/block.h | 27 | +++ b/include/block/block.h |
26 | @@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs); | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs); |
27 | * Begin a quiesced section of all users of @bs. This is part of | 29 | * Begin a quiesced section of all users of @bs. This is part of |
28 | * bdrv_drained_begin. | 30 | * bdrv_drained_begin. |
29 | */ | 31 | */ |
30 | -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); | 32 | -void bdrv_parent_drained_begin(BlockDriverState *bs); |
31 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, | 33 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); |
32 | + bool ignore_bds_parents); | ||
33 | 34 | ||
34 | /** | 35 | /** |
35 | * bdrv_parent_drained_end: | 36 | * bdrv_parent_drained_end: |
36 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); | 37 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs); |
37 | * End a quiesced section of all users of @bs. This is part of | 38 | * End a quiesced section of all users of @bs. This is part of |
38 | * bdrv_drained_end. | 39 | * bdrv_drained_end. |
39 | */ | 40 | */ |
40 | -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); | 41 | -void bdrv_parent_drained_end(BlockDriverState *bs); |
41 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, | 42 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
42 | + bool ignore_bds_parents); | ||
43 | |||
44 | /** | ||
45 | * bdrv_drain_poll: | ||
46 | * | ||
47 | * Poll for pending requests in @bs, its parents (except for @ignore_parent), | ||
48 | - * and if @recursive is true its children as well. | ||
49 | + * and if @recursive is true its children as well (used for subtree drain). | ||
50 | + * | ||
51 | + * If @ignore_bds_parents is true, parents that are BlockDriverStates must | ||
52 | + * ignore the drain request because they will be drained separately (used for | ||
53 | + * drain_all). | ||
54 | * | ||
55 | * This is part of bdrv_drained_begin. | ||
56 | */ | ||
57 | bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | ||
58 | - BdrvChild *ignore_parent); | ||
59 | + BdrvChild *ignore_parent, bool ignore_bds_parents); | ||
60 | 43 | ||
61 | /** | 44 | /** |
62 | * bdrv_drained_begin: | 45 | * bdrv_drained_begin: |
63 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs); | ||
64 | * running requests to complete. | ||
65 | */ | ||
66 | void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | ||
67 | - BdrvChild *parent); | ||
68 | + BdrvChild *parent, bool ignore_bds_parents); | ||
69 | |||
70 | /** | ||
71 | * Like bdrv_drained_begin, but recursively begins a quiesced section for | ||
72 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/include/block/block_int.h | ||
75 | +++ b/include/block/block_int.h | ||
76 | @@ -XXX,XX +XXX,XX @@ struct BdrvChildRole { | ||
77 | * points to. */ | ||
78 | bool stay_at_node; | ||
79 | |||
80 | + /* If true, the parent is a BlockDriverState and bdrv_next_all_states() | ||
81 | + * will return it. This information is used for drain_all, where every node | ||
82 | + * will be drained separately, so the drain only needs to be propagated to | ||
83 | + * non-BDS parents. */ | ||
84 | + bool parent_is_bds; | ||
85 | + | ||
86 | void (*inherit_options)(int *child_flags, QDict *child_options, | ||
87 | int parent_flags, QDict *parent_options); | ||
88 | |||
89 | diff --git a/block.c b/block.c | 46 | diff --git a/block.c b/block.c |
90 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
91 | --- a/block.c | 48 | --- a/block.c |
92 | +++ b/block.c | 49 | +++ b/block.c |
93 | @@ -XXX,XX +XXX,XX @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) | 50 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
94 | static void bdrv_child_cb_drained_begin(BdrvChild *child) | 51 | BlockDriverState *new_bs) |
95 | { | 52 | { |
96 | BlockDriverState *bs = child->opaque; | 53 | BlockDriverState *old_bs = child->bs; |
97 | - bdrv_do_drained_begin_quiesce(bs, NULL); | 54 | + int i; |
98 | + bdrv_do_drained_begin_quiesce(bs, NULL, false); | 55 | |
99 | } | 56 | if (old_bs && new_bs) { |
100 | 57 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | |
101 | static bool bdrv_child_cb_drained_poll(BdrvChild *child) | 58 | } |
102 | { | 59 | if (old_bs) { |
103 | BlockDriverState *bs = child->opaque; | 60 | if (old_bs->quiesce_counter && child->role->drained_end) { |
104 | - return bdrv_drain_poll(bs, false, NULL); | 61 | - child->role->drained_end(child); |
105 | + return bdrv_drain_poll(bs, false, NULL, false); | 62 | + for (i = 0; i < old_bs->quiesce_counter; i++) { |
106 | } | 63 | + child->role->drained_end(child); |
107 | 64 | + } | |
108 | static void bdrv_child_cb_drained_end(BdrvChild *child) | 65 | } |
109 | @@ -XXX,XX +XXX,XX @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, | 66 | if (child->role->detach) { |
110 | } | 67 | child->role->detach(child); |
111 | 68 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | |
112 | const BdrvChildRole child_file = { | 69 | if (new_bs) { |
113 | + .parent_is_bds = true, | 70 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); |
114 | .get_parent_desc = bdrv_child_get_parent_desc, | 71 | if (new_bs->quiesce_counter && child->role->drained_begin) { |
115 | .inherit_options = bdrv_inherited_options, | 72 | - child->role->drained_begin(child); |
116 | .drained_begin = bdrv_child_cb_drained_begin, | 73 | + for (i = 0; i < new_bs->quiesce_counter; i++) { |
117 | @@ -XXX,XX +XXX,XX @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, | 74 | + child->role->drained_begin(child); |
118 | } | 75 | + } |
119 | 76 | } | |
120 | const BdrvChildRole child_format = { | 77 | |
121 | + .parent_is_bds = true, | 78 | if (child->role->attach) { |
122 | .get_parent_desc = bdrv_child_get_parent_desc, | ||
123 | .inherit_options = bdrv_inherited_fmt_options, | ||
124 | .drained_begin = bdrv_child_cb_drained_begin, | ||
125 | @@ -XXX,XX +XXX,XX @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, | ||
126 | } | ||
127 | |||
128 | const BdrvChildRole child_backing = { | ||
129 | + .parent_is_bds = true, | ||
130 | .get_parent_desc = bdrv_child_get_parent_desc, | ||
131 | .attach = bdrv_backing_attach, | ||
132 | .detach = bdrv_backing_detach, | ||
133 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | 79 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) |
134 | AioContext *ctx = bdrv_get_aio_context(bs); | 80 | AioContext *ctx = bdrv_get_aio_context(bs); |
135 | 81 | ||
136 | aio_disable_external(ctx); | 82 | aio_disable_external(ctx); |
137 | - bdrv_parent_drained_begin(bs, NULL); | 83 | - bdrv_parent_drained_begin(bs); |
138 | + bdrv_parent_drained_begin(bs, NULL, false); | 84 | + bdrv_parent_drained_begin(bs, NULL); |
139 | bdrv_drain(bs); /* ensure there are no in-flight requests */ | 85 | bdrv_drain(bs); /* ensure there are no in-flight requests */ |
140 | 86 | ||
141 | while (aio_poll(ctx, false)) { | 87 | while (aio_poll(ctx, false)) { |
142 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | 88 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) |
143 | */ | 89 | */ |
144 | aio_context_acquire(new_context); | 90 | aio_context_acquire(new_context); |
145 | bdrv_attach_aio_context(bs, new_context); | 91 | bdrv_attach_aio_context(bs, new_context); |
146 | - bdrv_parent_drained_end(bs, NULL); | 92 | - bdrv_parent_drained_end(bs); |
147 | + bdrv_parent_drained_end(bs, NULL, false); | 93 | + bdrv_parent_drained_end(bs, NULL); |
148 | aio_enable_external(ctx); | 94 | aio_enable_external(ctx); |
149 | aio_context_release(new_context); | 95 | aio_context_release(new_context); |
150 | } | 96 | } |
151 | diff --git a/block/io.c b/block/io.c | 97 | diff --git a/block/io.c b/block/io.c |
152 | index XXXXXXX..XXXXXXX 100644 | 98 | index XXXXXXX..XXXXXXX 100644 |
153 | --- a/block/io.c | 99 | --- a/block/io.c |
154 | +++ b/block/io.c | 100 | +++ b/block/io.c |
155 | @@ -XXX,XX +XXX,XX @@ | 101 | @@ -XXX,XX +XXX,XX @@ |
156 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | 102 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, |
157 | int64_t offset, int bytes, BdrvRequestFlags flags); | 103 | int64_t offset, int bytes, BdrvRequestFlags flags); |
158 | 104 | ||
159 | -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) | 105 | -void bdrv_parent_drained_begin(BlockDriverState *bs) |
160 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, | 106 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) |
161 | + bool ignore_bds_parents) | ||
162 | { | 107 | { |
163 | BdrvChild *c, *next; | 108 | BdrvChild *c, *next; |
164 | 109 | ||
165 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | 110 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { |
166 | - if (c == ignore) { | 111 | + if (c == ignore) { |
167 | + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { | 112 | + continue; |
168 | continue; | 113 | + } |
169 | } | ||
170 | if (c->role->drained_begin) { | 114 | if (c->role->drained_begin) { |
171 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) | 115 | c->role->drained_begin(c); |
172 | } | 116 | } |
173 | } | 117 | } |
174 | 118 | } | |
175 | -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) | 119 | |
176 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, | 120 | -void bdrv_parent_drained_end(BlockDriverState *bs) |
177 | + bool ignore_bds_parents) | 121 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) |
178 | { | 122 | { |
179 | BdrvChild *c, *next; | 123 | BdrvChild *c, *next; |
180 | 124 | ||
181 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | 125 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { |
182 | - if (c == ignore) { | 126 | + if (c == ignore) { |
183 | + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { | 127 | + continue; |
184 | continue; | 128 | + } |
185 | } | ||
186 | if (c->role->drained_end) { | 129 | if (c->role->drained_end) { |
187 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) | 130 | c->role->drained_end(c); |
188 | } | 131 | } |
189 | } | ||
190 | |||
191 | -static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore) | ||
192 | +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, | ||
193 | + bool ignore_bds_parents) | ||
194 | { | ||
195 | BdrvChild *c, *next; | ||
196 | bool busy = false; | ||
197 | |||
198 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
199 | - if (c == ignore) { | ||
200 | + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { | ||
201 | continue; | ||
202 | } | ||
203 | if (c->role->drained_poll) { | ||
204 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 132 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
205 | bool recursive; | 133 | BlockDriverState *bs; |
206 | bool poll; | 134 | bool done; |
207 | BdrvChild *parent; | 135 | bool begin; |
208 | + bool ignore_bds_parents; | 136 | + BdrvChild *parent; |
209 | } BdrvCoDrainData; | 137 | } BdrvCoDrainData; |
210 | 138 | ||
211 | static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | 139 | static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
212 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 140 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) |
213 | 141 | return waited; | |
214 | /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ | 142 | } |
215 | bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | 143 | |
216 | - BdrvChild *ignore_parent) | 144 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
217 | + BdrvChild *ignore_parent, bool ignore_bds_parents) | 145 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
218 | { | 146 | + |
219 | BdrvChild *child, *next; | ||
220 | |||
221 | - if (bdrv_parent_drained_poll(bs, ignore_parent)) { | ||
222 | + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { | ||
223 | return true; | ||
224 | } | ||
225 | |||
226 | @@ -XXX,XX +XXX,XX @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | ||
227 | } | ||
228 | |||
229 | if (recursive) { | ||
230 | + assert(!ignore_bds_parents); | ||
231 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
232 | - if (bdrv_drain_poll(child->bs, recursive, child)) { | ||
233 | + if (bdrv_drain_poll(child->bs, recursive, child, false)) { | ||
234 | return true; | ||
235 | } | ||
236 | } | ||
237 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, | ||
238 | * have executed. */ | ||
239 | while (aio_poll(bs->aio_context, false)); | ||
240 | |||
241 | - return bdrv_drain_poll(bs, recursive, ignore_parent); | ||
242 | + return bdrv_drain_poll(bs, recursive, ignore_parent, false); | ||
243 | } | ||
244 | |||
245 | static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
246 | - BdrvChild *parent, bool poll); | ||
247 | + BdrvChild *parent, bool ignore_bds_parents, | ||
248 | + bool poll); | ||
249 | static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
250 | - BdrvChild *parent); | ||
251 | + BdrvChild *parent, bool ignore_bds_parents); | ||
252 | |||
253 | static void bdrv_co_drain_bh_cb(void *opaque) | 147 | static void bdrv_co_drain_bh_cb(void *opaque) |
254 | { | 148 | { |
149 | BdrvCoDrainData *data = opaque; | ||
255 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | 150 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
256 | if (bs) { | 151 | |
257 | bdrv_dec_in_flight(bs); | 152 | bdrv_dec_in_flight(bs); |
258 | if (data->begin) { | 153 | if (data->begin) { |
259 | - bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); | 154 | - bdrv_drained_begin(bs); |
260 | + bdrv_do_drained_begin(bs, data->recursive, data->parent, | 155 | + bdrv_do_drained_begin(bs, data->parent); |
261 | + data->ignore_bds_parents, data->poll); | ||
262 | } else { | ||
263 | - bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
264 | + bdrv_do_drained_end(bs, data->recursive, data->parent, | ||
265 | + data->ignore_bds_parents); | ||
266 | } | ||
267 | } else { | 156 | } else { |
268 | assert(data->begin); | 157 | - bdrv_drained_end(bs); |
158 | + bdrv_do_drained_end(bs, data->parent); | ||
159 | } | ||
160 | |||
161 | data->done = true; | ||
269 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | 162 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
163 | } | ||
270 | 164 | ||
271 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | 165 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
272 | bool begin, bool recursive, | 166 | - bool begin) |
273 | - BdrvChild *parent, bool poll) | 167 | + bool begin, BdrvChild *parent) |
274 | + BdrvChild *parent, | ||
275 | + bool ignore_bds_parents, | ||
276 | + bool poll) | ||
277 | { | 168 | { |
278 | BdrvCoDrainData data; | 169 | BdrvCoDrainData data; |
279 | 170 | ||
280 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | 171 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
172 | .bs = bs, | ||
173 | .done = false, | ||
281 | .begin = begin, | 174 | .begin = begin, |
282 | .recursive = recursive, | 175 | + .parent = parent, |
283 | .parent = parent, | ||
284 | + .ignore_bds_parents = ignore_bds_parents, | ||
285 | .poll = poll, | ||
286 | }; | 176 | }; |
287 | if (bs) { | 177 | bdrv_inc_in_flight(bs); |
178 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | ||
288 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | 179 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
289 | } | 180 | assert(data.done); |
290 | 181 | } | |
291 | void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | 182 | |
292 | - BdrvChild *parent) | 183 | -void bdrv_drained_begin(BlockDriverState *bs) |
293 | + BdrvChild *parent, bool ignore_bds_parents) | 184 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) |
294 | { | 185 | { |
295 | assert(!qemu_in_coroutine()); | 186 | if (qemu_in_coroutine()) { |
296 | 187 | - bdrv_co_yield_to_drain(bs, true); | |
297 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | 188 | + bdrv_co_yield_to_drain(bs, true, parent); |
189 | return; | ||
190 | } | ||
191 | |||
192 | /* Stop things in parent-to-child order */ | ||
193 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
298 | aio_disable_external(bdrv_get_aio_context(bs)); | 194 | aio_disable_external(bdrv_get_aio_context(bs)); |
299 | } | 195 | - bdrv_parent_drained_begin(bs); |
300 | 196 | } | |
301 | - bdrv_parent_drained_begin(bs, parent); | 197 | |
302 | + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); | 198 | + bdrv_parent_drained_begin(bs, parent); |
303 | bdrv_drain_invoke(bs, true); | 199 | bdrv_drain_invoke(bs, true, false); |
304 | } | 200 | bdrv_drain_recurse(bs); |
305 | 201 | } | |
306 | static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | 202 | |
307 | - BdrvChild *parent, bool poll) | 203 | -void bdrv_drained_end(BlockDriverState *bs) |
308 | + BdrvChild *parent, bool ignore_bds_parents, | 204 | +void bdrv_drained_begin(BlockDriverState *bs) |
309 | + bool poll) | 205 | +{ |
310 | { | 206 | + bdrv_do_drained_begin(bs, NULL); |
311 | BdrvChild *child, *next; | 207 | +} |
208 | + | ||
209 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
210 | { | ||
211 | int old_quiesce_counter; | ||
312 | 212 | ||
313 | if (qemu_in_coroutine()) { | 213 | if (qemu_in_coroutine()) { |
314 | - bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); | 214 | - bdrv_co_yield_to_drain(bs, false); |
315 | + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, | 215 | + bdrv_co_yield_to_drain(bs, false, parent); |
316 | + poll); | ||
317 | return; | 216 | return; |
318 | } | 217 | } |
319 | |||
320 | - bdrv_do_drained_begin_quiesce(bs, parent); | ||
321 | + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); | ||
322 | |||
323 | if (recursive) { | ||
324 | + assert(!ignore_bds_parents); | ||
325 | bs->recursive_quiesce_counter++; | ||
326 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
327 | - bdrv_do_drained_begin(child->bs, true, child, false); | ||
328 | + bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, | ||
329 | + false); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
334 | * nodes. | ||
335 | */ | ||
336 | if (poll) { | ||
337 | + assert(!ignore_bds_parents); | ||
338 | BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); | ||
339 | } | ||
340 | } | ||
341 | |||
342 | void bdrv_drained_begin(BlockDriverState *bs) | ||
343 | { | ||
344 | - bdrv_do_drained_begin(bs, false, NULL, true); | ||
345 | + bdrv_do_drained_begin(bs, false, NULL, false, true); | ||
346 | } | ||
347 | |||
348 | void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
349 | { | ||
350 | - bdrv_do_drained_begin(bs, true, NULL, true); | ||
351 | + bdrv_do_drained_begin(bs, true, NULL, false, true); | ||
352 | } | ||
353 | |||
354 | -void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
355 | - BdrvChild *parent) | ||
356 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
357 | + BdrvChild *parent, bool ignore_bds_parents) | ||
358 | { | ||
359 | BdrvChild *child, *next; | ||
360 | int old_quiesce_counter; | ||
361 | |||
362 | if (qemu_in_coroutine()) { | ||
363 | - bdrv_co_yield_to_drain(bs, false, recursive, parent, false); | ||
364 | + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, | ||
365 | + false); | ||
366 | return; | ||
367 | } | ||
368 | assert(bs->quiesce_counter > 0); | 218 | assert(bs->quiesce_counter > 0); |
369 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | 219 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
370 | 220 | ||
371 | /* Re-enable things in child-to-parent order */ | 221 | /* Re-enable things in child-to-parent order */ |
372 | bdrv_drain_invoke(bs, false); | 222 | bdrv_drain_invoke(bs, false, false); |
373 | - bdrv_parent_drained_end(bs, parent); | 223 | + bdrv_parent_drained_end(bs, parent); |
374 | + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); | ||
375 | if (old_quiesce_counter == 1) { | 224 | if (old_quiesce_counter == 1) { |
225 | - bdrv_parent_drained_end(bs); | ||
376 | aio_enable_external(bdrv_get_aio_context(bs)); | 226 | aio_enable_external(bdrv_get_aio_context(bs)); |
377 | } | 227 | } |
378 | 228 | } | |
379 | if (recursive) { | 229 | |
380 | + assert(!ignore_bds_parents); | 230 | +void bdrv_drained_end(BlockDriverState *bs) |
381 | bs->recursive_quiesce_counter--; | 231 | +{ |
382 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | 232 | + bdrv_do_drained_end(bs, NULL); |
383 | - bdrv_do_drained_end(child->bs, true, child); | 233 | +} |
384 | + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); | 234 | + |
385 | } | 235 | /* |
386 | } | 236 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
387 | } | 237 | * and suspend block driver's internal I/O until next request arrives. |
388 | |||
389 | void bdrv_drained_end(BlockDriverState *bs) | ||
390 | { | ||
391 | - bdrv_do_drained_end(bs, false, NULL); | ||
392 | + bdrv_do_drained_end(bs, false, NULL, false); | ||
393 | } | ||
394 | |||
395 | void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
396 | { | ||
397 | - bdrv_do_drained_end(bs, true, NULL); | ||
398 | + bdrv_do_drained_end(bs, true, NULL, false); | ||
399 | } | ||
400 | |||
401 | void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | ||
402 | @@ -XXX,XX +XXX,XX @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | ||
403 | int i; | ||
404 | |||
405 | for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { | ||
406 | - bdrv_do_drained_begin(child->bs, true, child, true); | ||
407 | + bdrv_do_drained_begin(child->bs, true, child, false, true); | ||
408 | } | ||
409 | } | ||
410 | |||
411 | @@ -XXX,XX +XXX,XX @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) | ||
412 | int i; | ||
413 | |||
414 | for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { | ||
415 | - bdrv_do_drained_end(child->bs, true, child); | ||
416 | + bdrv_do_drained_end(child->bs, true, child, false); | ||
417 | } | ||
418 | } | ||
419 | |||
420 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | 238 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
421 | BdrvNextIterator it; | 239 | /* Stop things in parent-to-child order */ |
422 | |||
423 | if (qemu_in_coroutine()) { | ||
424 | - bdrv_co_yield_to_drain(NULL, true, false, NULL, true); | ||
425 | + bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true); | ||
426 | return; | ||
427 | } | ||
428 | |||
429 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
430 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
431 | |||
432 | aio_context_acquire(aio_context); | 240 | aio_context_acquire(aio_context); |
433 | - bdrv_do_drained_begin(bs, true, NULL, true); | 241 | aio_disable_external(aio_context); |
434 | + bdrv_do_drained_begin(bs, true, NULL, false, true); | 242 | - bdrv_parent_drained_begin(bs); |
243 | + bdrv_parent_drained_begin(bs, NULL); | ||
244 | bdrv_drain_invoke(bs, true, true); | ||
435 | aio_context_release(aio_context); | 245 | aio_context_release(aio_context); |
436 | } | ||
437 | 246 | ||
438 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | 247 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
439 | AioContext *aio_context = bdrv_get_aio_context(bs); | 248 | /* Re-enable things in child-to-parent order */ |
440 | |||
441 | aio_context_acquire(aio_context); | 249 | aio_context_acquire(aio_context); |
442 | - bdrv_do_drained_end(bs, true, NULL); | 250 | bdrv_drain_invoke(bs, false, true); |
443 | + bdrv_do_drained_end(bs, true, NULL, false); | 251 | - bdrv_parent_drained_end(bs); |
252 | + bdrv_parent_drained_end(bs, NULL); | ||
253 | aio_enable_external(aio_context); | ||
444 | aio_context_release(aio_context); | 254 | aio_context_release(aio_context); |
445 | } | 255 | } |
446 | } | ||
447 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
448 | index XXXXXXX..XXXXXXX 100644 | ||
449 | --- a/block/vvfat.c | ||
450 | +++ b/block/vvfat.c | ||
451 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(int *child_flags, QDict *child_options, | ||
452 | } | ||
453 | |||
454 | static const BdrvChildRole child_vvfat_qcow = { | ||
455 | + .parent_is_bds = true, | ||
456 | .inherit_options = vvfat_qcow_options, | ||
457 | }; | ||
458 | |||
459 | -- | 256 | -- |
460 | 2.13.6 | 257 | 2.13.6 |
461 | 258 | ||
462 | 259 | diff view generated by jsdifflib |
1 | Anything can happen inside BDRV_POLL_WHILE(), including graph | 1 | bdrv_drained_begin() waits for the completion of requests in the whole |
---|---|---|---|
2 | changes that may interfere with its callers (e.g. child list iteration | 2 | subtree, but it only actually keeps its immediate bs parameter quiesced |
3 | in recursive callers of bdrv_do_drained_begin). | 3 | until bdrv_drained_end(). |
4 | 4 | ||
5 | Switch to a single BDRV_POLL_WHILE() call for the whole subtree at the | 5 | Add a version that keeps the whole subtree drained. As of this commit, |
6 | end of bdrv_do_drained_begin() to avoid such effects. The recursion | 6 | graph changes cannot be allowed during a subtree drained section, but |
7 | happens now inside the loop condition. As the graph can only change | 7 | this will be fixed soon. |
8 | between bdrv_drain_poll() calls, but not inside of it, doing the | ||
9 | recursion here is safe. | ||
10 | 8 | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 10 | --- |
13 | include/block/block.h | 9 +++++--- | 11 | include/block/block.h | 13 +++++++++++++ |
14 | block.c | 2 +- | 12 | block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++----------- |
15 | block/io.c | 63 ++++++++++++++++++++++++++++++++++++--------------- | 13 | 2 files changed, 56 insertions(+), 11 deletions(-) |
16 | 3 files changed, 52 insertions(+), 22 deletions(-) | ||
17 | 14 | ||
18 | diff --git a/include/block/block.h b/include/block/block.h | 15 | diff --git a/include/block/block.h b/include/block/block.h |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/block/block.h | 17 | --- a/include/block/block.h |
21 | +++ b/include/block/block.h | 18 | +++ b/include/block/block.h |
22 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); | 19 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
20 | void bdrv_drained_begin(BlockDriverState *bs); | ||
21 | |||
23 | /** | 22 | /** |
24 | * bdrv_drain_poll: | 23 | + * Like bdrv_drained_begin, but recursively begins a quiesced section for |
24 | + * exclusive access to all child nodes as well. | ||
25 | + * | ||
26 | + * Graph changes are not allowed during a subtree drain section. | ||
27 | + */ | ||
28 | +void bdrv_subtree_drained_begin(BlockDriverState *bs); | ||
29 | + | ||
30 | +/** | ||
31 | * bdrv_drained_end: | ||
25 | * | 32 | * |
26 | - * Poll for pending requests in @bs and its parents (except for | 33 | * End a quiescent section started by bdrv_drained_begin(). |
27 | - * @ignore_parent). This is part of bdrv_drained_begin. | ||
28 | + * Poll for pending requests in @bs, its parents (except for @ignore_parent), | ||
29 | + * and if @recursive is true its children as well. | ||
30 | + * | ||
31 | + * This is part of bdrv_drained_begin. | ||
32 | */ | 34 | */ |
33 | -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent); | 35 | void bdrv_drained_end(BlockDriverState *bs); |
34 | +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | 36 | |
35 | + BdrvChild *ignore_parent); | 37 | +/** |
36 | 38 | + * End a quiescent section started by bdrv_subtree_drained_begin(). | |
37 | /** | 39 | + */ |
38 | * bdrv_drained_begin: | 40 | +void bdrv_subtree_drained_end(BlockDriverState *bs); |
39 | diff --git a/block.c b/block.c | 41 | + |
40 | index XXXXXXX..XXXXXXX 100644 | 42 | void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, |
41 | --- a/block.c | 43 | Error **errp); |
42 | +++ b/block.c | 44 | void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); |
43 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) | ||
44 | static bool bdrv_child_cb_drained_poll(BdrvChild *child) | ||
45 | { | ||
46 | BlockDriverState *bs = child->opaque; | ||
47 | - return bdrv_drain_poll(bs, NULL); | ||
48 | + return bdrv_drain_poll(bs, false, NULL); | ||
49 | } | ||
50 | |||
51 | static void bdrv_child_cb_drained_end(BdrvChild *child) | ||
52 | diff --git a/block/io.c b/block/io.c | 45 | diff --git a/block/io.c b/block/io.c |
53 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
54 | --- a/block/io.c | 47 | --- a/block/io.c |
55 | +++ b/block/io.c | 48 | +++ b/block/io.c |
56 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 49 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
50 | BlockDriverState *bs; | ||
57 | bool done; | 51 | bool done; |
58 | bool begin; | 52 | bool begin; |
59 | bool recursive; | 53 | + bool recursive; |
60 | + bool poll; | ||
61 | BdrvChild *parent; | 54 | BdrvChild *parent; |
62 | } BdrvCoDrainData; | 55 | } BdrvCoDrainData; |
63 | 56 | ||
64 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | 57 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) |
58 | return waited; | ||
65 | } | 59 | } |
66 | 60 | ||
67 | /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ | 61 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
68 | -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent) | 62 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
69 | +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, | 63 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
70 | + BdrvChild *ignore_parent) | 64 | + BdrvChild *parent); |
65 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
66 | + BdrvChild *parent); | ||
67 | |||
68 | static void bdrv_co_drain_bh_cb(void *opaque) | ||
69 | { | ||
70 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
71 | |||
72 | bdrv_dec_in_flight(bs); | ||
73 | if (data->begin) { | ||
74 | - bdrv_do_drained_begin(bs, data->parent); | ||
75 | + bdrv_do_drained_begin(bs, data->recursive, data->parent); | ||
76 | } else { | ||
77 | - bdrv_do_drained_end(bs, data->parent); | ||
78 | + bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
79 | } | ||
80 | |||
81 | data->done = true; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
83 | } | ||
84 | |||
85 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
86 | - bool begin, BdrvChild *parent) | ||
87 | + bool begin, bool recursive, | ||
88 | + BdrvChild *parent) | ||
89 | { | ||
90 | BdrvCoDrainData data; | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
93 | .bs = bs, | ||
94 | .done = false, | ||
95 | .begin = begin, | ||
96 | + .recursive = recursive, | ||
97 | .parent = parent, | ||
98 | }; | ||
99 | bdrv_inc_in_flight(bs); | ||
100 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
101 | assert(data.done); | ||
102 | } | ||
103 | |||
104 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
105 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
106 | + BdrvChild *parent) | ||
71 | { | 107 | { |
72 | + BdrvChild *child, *next; | 108 | + BdrvChild *child, *next; |
73 | + | 109 | + |
74 | if (bdrv_parent_drained_poll(bs, ignore_parent)) { | 110 | if (qemu_in_coroutine()) { |
75 | return true; | 111 | - bdrv_co_yield_to_drain(bs, true, parent); |
112 | + bdrv_co_yield_to_drain(bs, true, recursive, parent); | ||
113 | return; | ||
76 | } | 114 | } |
77 | 115 | ||
78 | - return atomic_read(&bs->in_flight); | 116 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) |
79 | + if (atomic_read(&bs->in_flight)) { | 117 | bdrv_parent_drained_begin(bs, parent); |
80 | + return true; | 118 | bdrv_drain_invoke(bs, true, false); |
81 | + } | 119 | bdrv_drain_recurse(bs); |
82 | + | 120 | + |
83 | + if (recursive) { | 121 | + if (recursive) { |
84 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | 122 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { |
85 | + if (bdrv_drain_poll(child->bs, recursive, child)) { | 123 | + bdrv_do_drained_begin(child->bs, true, child); |
86 | + return true; | ||
87 | + } | ||
88 | + } | 124 | + } |
89 | + } | 125 | + } |
90 | + | ||
91 | + return false; | ||
92 | } | 126 | } |
93 | |||
94 | -static bool bdrv_drain_poll_top_level(BlockDriverState *bs, | ||
95 | +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, | ||
96 | BdrvChild *ignore_parent) | ||
97 | { | ||
98 | /* Execute pending BHs first and check everything else only after the BHs | ||
99 | * have executed. */ | ||
100 | while (aio_poll(bs->aio_context, false)); | ||
101 | |||
102 | - return bdrv_drain_poll(bs, ignore_parent); | ||
103 | + return bdrv_drain_poll(bs, recursive, ignore_parent); | ||
104 | } | ||
105 | |||
106 | static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
107 | - BdrvChild *parent); | ||
108 | + BdrvChild *parent, bool poll); | ||
109 | static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
110 | BdrvChild *parent); | ||
111 | |||
112 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
113 | |||
114 | bdrv_dec_in_flight(bs); | ||
115 | if (data->begin) { | ||
116 | - bdrv_do_drained_begin(bs, data->recursive, data->parent); | ||
117 | + bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); | ||
118 | } else { | ||
119 | bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
120 | } | ||
121 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
122 | |||
123 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
124 | bool begin, bool recursive, | ||
125 | - BdrvChild *parent) | ||
126 | + BdrvChild *parent, bool poll) | ||
127 | { | ||
128 | BdrvCoDrainData data; | ||
129 | |||
130 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
131 | .begin = begin, | ||
132 | .recursive = recursive, | ||
133 | .parent = parent, | ||
134 | + .poll = poll, | ||
135 | }; | ||
136 | bdrv_inc_in_flight(bs); | ||
137 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | ||
138 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
139 | } | ||
140 | |||
141 | void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
142 | - BdrvChild *parent) | ||
143 | + BdrvChild *parent, bool poll) | ||
144 | { | ||
145 | BdrvChild *child, *next; | ||
146 | |||
147 | if (qemu_in_coroutine()) { | ||
148 | - bdrv_co_yield_to_drain(bs, true, recursive, parent); | ||
149 | + bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); | ||
150 | return; | ||
151 | } | ||
152 | |||
153 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
154 | bdrv_parent_drained_begin(bs, parent); | ||
155 | bdrv_drain_invoke(bs, true); | ||
156 | |||
157 | - /* Wait for drained requests to finish */ | ||
158 | - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); | ||
159 | - | ||
160 | if (recursive) { | ||
161 | bs->recursive_quiesce_counter++; | ||
162 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
163 | - bdrv_do_drained_begin(child->bs, true, child); | ||
164 | + bdrv_do_drained_begin(child->bs, true, child, false); | ||
165 | } | ||
166 | } | ||
167 | + | ||
168 | + /* | ||
169 | + * Wait for drained requests to finish. | ||
170 | + * | ||
171 | + * Calling BDRV_POLL_WHILE() only once for the top-level node is okay: The | ||
172 | + * call is needed so things in this AioContext can make progress even | ||
173 | + * though we don't return to the main AioContext loop - this automatically | ||
174 | + * includes other nodes in the same AioContext and therefore all child | ||
175 | + * nodes. | ||
176 | + */ | ||
177 | + if (poll) { | ||
178 | + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); | ||
179 | + } | ||
180 | } | ||
181 | 127 | ||
182 | void bdrv_drained_begin(BlockDriverState *bs) | 128 | void bdrv_drained_begin(BlockDriverState *bs) |
183 | { | 129 | { |
184 | - bdrv_do_drained_begin(bs, false, NULL); | 130 | - bdrv_do_drained_begin(bs, NULL); |
185 | + bdrv_do_drained_begin(bs, false, NULL, true); | 131 | + bdrv_do_drained_begin(bs, false, NULL); |
132 | +} | ||
133 | + | ||
134 | +void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
135 | +{ | ||
136 | + bdrv_do_drained_begin(bs, true, NULL); | ||
186 | } | 137 | } |
187 | 138 | ||
188 | void bdrv_subtree_drained_begin(BlockDriverState *bs) | 139 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
140 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
141 | + BdrvChild *parent) | ||
189 | { | 142 | { |
190 | - bdrv_do_drained_begin(bs, true, NULL); | 143 | + BdrvChild *child, *next; |
191 | + bdrv_do_drained_begin(bs, true, NULL, true); | ||
192 | } | ||
193 | |||
194 | void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
195 | @@ -XXX,XX +XXX,XX @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
196 | int old_quiesce_counter; | 144 | int old_quiesce_counter; |
197 | 145 | ||
198 | if (qemu_in_coroutine()) { | 146 | if (qemu_in_coroutine()) { |
199 | - bdrv_co_yield_to_drain(bs, false, recursive, parent); | 147 | - bdrv_co_yield_to_drain(bs, false, parent); |
200 | + bdrv_co_yield_to_drain(bs, false, recursive, parent, false); | 148 | + bdrv_co_yield_to_drain(bs, false, recursive, parent); |
201 | return; | 149 | return; |
202 | } | 150 | } |
203 | assert(bs->quiesce_counter > 0); | 151 | assert(bs->quiesce_counter > 0); |
204 | @@ -XXX,XX +XXX,XX @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | 152 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
205 | int i; | 153 | if (old_quiesce_counter == 1) { |
206 | 154 | aio_enable_external(bdrv_get_aio_context(bs)); | |
207 | for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { | ||
208 | - bdrv_do_drained_begin(child->bs, true, child); | ||
209 | + bdrv_do_drained_begin(child->bs, true, child, true); | ||
210 | } | 155 | } |
156 | + | ||
157 | + if (recursive) { | ||
158 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
159 | + bdrv_do_drained_end(child->bs, true, child); | ||
160 | + } | ||
161 | + } | ||
211 | } | 162 | } |
212 | 163 | ||
213 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | 164 | void bdrv_drained_end(BlockDriverState *bs) |
214 | AioContext *aio_context = bdrv_get_aio_context(bs); | 165 | { |
215 | 166 | - bdrv_do_drained_end(bs, NULL); | |
216 | aio_context_acquire(aio_context); | 167 | + bdrv_do_drained_end(bs, false, NULL); |
217 | - bdrv_do_drained_begin(bs, true, NULL); | 168 | +} |
218 | + bdrv_do_drained_begin(bs, true, NULL, true); | 169 | + |
219 | aio_context_release(aio_context); | 170 | +void bdrv_subtree_drained_end(BlockDriverState *bs) |
220 | } | 171 | +{ |
221 | 172 | + bdrv_do_drained_end(bs, true, NULL); | |
173 | } | ||
174 | |||
175 | /* | ||
222 | -- | 176 | -- |
223 | 2.13.6 | 177 | 2.13.6 |
224 | 178 | ||
225 | 179 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Add a subtree drain version to the existing test cases. |
---|---|---|---|
2 | 2 | ||
3 | In order to talk to the source BDS (and maybe in the future to the | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | target BDS as well) directly, we need to convert our existing AIO | 4 | --- |
5 | requests into coroutine I/O requests. | 5 | tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++- |
6 | 1 file changed, 26 insertions(+), 1 deletion(-) | ||
6 | 7 | ||
7 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Message-id: 20180613181823.13618-3-mreitz@redhat.com | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
11 | --- | ||
12 | block/mirror.c | 152 ++++++++++++++++++++++++++++++++++----------------------- | ||
13 | 1 file changed, 90 insertions(+), 62 deletions(-) | ||
14 | |||
15 | diff --git a/block/mirror.c b/block/mirror.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/mirror.c | 10 | --- a/tests/test-bdrv-drain.c |
18 | +++ b/block/mirror.c | 11 | +++ b/tests/test-bdrv-drain.c |
19 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp { | 12 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
20 | QEMUIOVector qiov; | 13 | enum drain_type { |
21 | int64_t offset; | 14 | BDRV_DRAIN_ALL, |
22 | uint64_t bytes; | 15 | BDRV_DRAIN, |
23 | + | 16 | + BDRV_SUBTREE_DRAIN, |
24 | + /* The pointee is set by mirror_co_read(), mirror_co_zero(), and | 17 | DRAIN_TYPE_MAX, |
25 | + * mirror_co_discard() before yielding for the first time */ | 18 | }; |
26 | + int64_t *bytes_handled; | 19 | |
27 | } MirrorOp; | 20 | @@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
28 | 21 | switch (drain_type) { | |
29 | typedef enum MirrorMethod { | 22 | case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; |
30 | @@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, | 23 | case BDRV_DRAIN: bdrv_drained_begin(bs); break; |
24 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; | ||
25 | default: g_assert_not_reached(); | ||
31 | } | 26 | } |
32 | } | 27 | } |
33 | 28 | @@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | |
34 | -static void mirror_iteration_done(MirrorOp *op, int ret) | 29 | switch (drain_type) { |
35 | +static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) | 30 | case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; |
36 | { | 31 | case BDRV_DRAIN: bdrv_drained_end(bs); break; |
37 | MirrorBlockJob *s = op->s; | 32 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; |
38 | struct iovec *iov; | 33 | default: g_assert_not_reached(); |
39 | @@ -XXX,XX +XXX,XX @@ static void mirror_iteration_done(MirrorOp *op, int ret) | ||
40 | } | 34 | } |
41 | } | 35 | } |
42 | 36 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) | |
43 | -static void mirror_write_complete(void *opaque, int ret) | 37 | test_drv_cb_common(BDRV_DRAIN, false); |
44 | +static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret) | ||
45 | { | ||
46 | - MirrorOp *op = opaque; | ||
47 | MirrorBlockJob *s = op->s; | ||
48 | |||
49 | aio_context_acquire(blk_get_aio_context(s->common.blk)); | ||
50 | @@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret) | ||
51 | aio_context_release(blk_get_aio_context(s->common.blk)); | ||
52 | } | 38 | } |
53 | 39 | ||
54 | -static void mirror_read_complete(void *opaque, int ret) | 40 | +static void test_drv_cb_drain_subtree(void) |
55 | +static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret) | 41 | +{ |
56 | { | 42 | + test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); |
57 | - MirrorOp *op = opaque; | ||
58 | MirrorBlockJob *s = op->s; | ||
59 | |||
60 | aio_context_acquire(blk_get_aio_context(s->common.blk)); | ||
61 | @@ -XXX,XX +XXX,XX @@ static void mirror_read_complete(void *opaque, int ret) | ||
62 | |||
63 | mirror_iteration_done(op, ret); | ||
64 | } else { | ||
65 | - blk_aio_pwritev(s->target, op->offset, &op->qiov, | ||
66 | - 0, mirror_write_complete, op); | ||
67 | + ret = blk_co_pwritev(s->target, op->offset, | ||
68 | + op->qiov.size, &op->qiov, 0); | ||
69 | + mirror_write_complete(op, ret); | ||
70 | } | ||
71 | aio_context_release(blk_get_aio_context(s->common.blk)); | ||
72 | } | ||
73 | @@ -XXX,XX +XXX,XX @@ static inline void mirror_wait_for_io(MirrorBlockJob *s) | ||
74 | s->waiting_for_io = false; | ||
75 | } | ||
76 | |||
77 | -/* Submit async read while handling COW. | ||
78 | - * Returns: The number of bytes copied after and including offset, | ||
79 | - * excluding any bytes copied prior to offset due to alignment. | ||
80 | - * This will be @bytes if no alignment is necessary, or | ||
81 | - * (new_end - offset) if tail is rounded up or down due to | ||
82 | - * alignment or buffer limit. | ||
83 | +/* Perform a mirror copy operation. | ||
84 | + * | ||
85 | + * *op->bytes_handled is set to the number of bytes copied after and | ||
86 | + * including offset, excluding any bytes copied prior to offset due | ||
87 | + * to alignment. This will be op->bytes if no alignment is necessary, | ||
88 | + * or (new_end - op->offset) if the tail is rounded up or down due to | ||
89 | + * alignment or buffer limit. | ||
90 | */ | ||
91 | -static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, | ||
92 | - uint64_t bytes) | ||
93 | +static void coroutine_fn mirror_co_read(void *opaque) | ||
94 | { | ||
95 | + MirrorOp *op = opaque; | ||
96 | + MirrorBlockJob *s = op->s; | ||
97 | BlockBackend *source = s->common.blk; | ||
98 | int nb_chunks; | ||
99 | uint64_t ret; | ||
100 | - MirrorOp *op; | ||
101 | uint64_t max_bytes; | ||
102 | |||
103 | max_bytes = s->granularity * s->max_iov; | ||
104 | |||
105 | /* We can only handle as much as buf_size at a time. */ | ||
106 | - bytes = MIN(s->buf_size, MIN(max_bytes, bytes)); | ||
107 | - assert(bytes); | ||
108 | - assert(bytes < BDRV_REQUEST_MAX_BYTES); | ||
109 | - ret = bytes; | ||
110 | + op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes)); | ||
111 | + assert(op->bytes); | ||
112 | + assert(op->bytes < BDRV_REQUEST_MAX_BYTES); | ||
113 | + *op->bytes_handled = op->bytes; | ||
114 | |||
115 | if (s->cow_bitmap) { | ||
116 | - ret += mirror_cow_align(s, &offset, &bytes); | ||
117 | + *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes); | ||
118 | } | ||
119 | - assert(bytes <= s->buf_size); | ||
120 | + /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */ | ||
121 | + assert(*op->bytes_handled <= UINT_MAX); | ||
122 | + assert(op->bytes <= s->buf_size); | ||
123 | /* The offset is granularity-aligned because: | ||
124 | * 1) Caller passes in aligned values; | ||
125 | * 2) mirror_cow_align is used only when target cluster is larger. */ | ||
126 | - assert(QEMU_IS_ALIGNED(offset, s->granularity)); | ||
127 | + assert(QEMU_IS_ALIGNED(op->offset, s->granularity)); | ||
128 | /* The range is sector-aligned, since bdrv_getlength() rounds up. */ | ||
129 | - assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); | ||
130 | - nb_chunks = DIV_ROUND_UP(bytes, s->granularity); | ||
131 | + assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE)); | ||
132 | + nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); | ||
133 | |||
134 | while (s->buf_free_count < nb_chunks) { | ||
135 | - trace_mirror_yield_in_flight(s, offset, s->in_flight); | ||
136 | + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); | ||
137 | mirror_wait_for_io(s); | ||
138 | } | ||
139 | |||
140 | - /* Allocate a MirrorOp that is used as an AIO callback. */ | ||
141 | - op = g_new(MirrorOp, 1); | ||
142 | - op->s = s; | ||
143 | - op->offset = offset; | ||
144 | - op->bytes = bytes; | ||
145 | - | ||
146 | /* Now make a QEMUIOVector taking enough granularity-sized chunks | ||
147 | * from s->buf_free. | ||
148 | */ | ||
149 | qemu_iovec_init(&op->qiov, nb_chunks); | ||
150 | while (nb_chunks-- > 0) { | ||
151 | MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); | ||
152 | - size_t remaining = bytes - op->qiov.size; | ||
153 | + size_t remaining = op->bytes - op->qiov.size; | ||
154 | |||
155 | QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); | ||
156 | s->buf_free_count--; | ||
157 | @@ -XXX,XX +XXX,XX @@ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, | ||
158 | |||
159 | /* Copy the dirty cluster. */ | ||
160 | s->in_flight++; | ||
161 | - s->bytes_in_flight += bytes; | ||
162 | - trace_mirror_one_iteration(s, offset, bytes); | ||
163 | + s->bytes_in_flight += op->bytes; | ||
164 | + trace_mirror_one_iteration(s, op->offset, op->bytes); | ||
165 | |||
166 | - blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op); | ||
167 | - return ret; | ||
168 | + ret = blk_co_preadv(source, op->offset, op->bytes, &op->qiov, 0); | ||
169 | + mirror_read_complete(op, ret); | ||
170 | } | ||
171 | |||
172 | -static void mirror_do_zero_or_discard(MirrorBlockJob *s, | ||
173 | - int64_t offset, | ||
174 | - uint64_t bytes, | ||
175 | - bool is_discard) | ||
176 | +static void coroutine_fn mirror_co_zero(void *opaque) | ||
177 | { | ||
178 | - MirrorOp *op; | ||
179 | + MirrorOp *op = opaque; | ||
180 | + int ret; | ||
181 | |||
182 | - /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed | ||
183 | - * so the freeing in mirror_iteration_done is nop. */ | ||
184 | - op = g_new0(MirrorOp, 1); | ||
185 | - op->s = s; | ||
186 | - op->offset = offset; | ||
187 | - op->bytes = bytes; | ||
188 | + op->s->in_flight++; | ||
189 | + op->s->bytes_in_flight += op->bytes; | ||
190 | + *op->bytes_handled = op->bytes; | ||
191 | |||
192 | - s->in_flight++; | ||
193 | - s->bytes_in_flight += bytes; | ||
194 | - if (is_discard) { | ||
195 | - blk_aio_pdiscard(s->target, offset, | ||
196 | - op->bytes, mirror_write_complete, op); | ||
197 | - } else { | ||
198 | - blk_aio_pwrite_zeroes(s->target, offset, | ||
199 | - op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, | ||
200 | - mirror_write_complete, op); | ||
201 | - } | ||
202 | + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, | ||
203 | + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); | ||
204 | + mirror_write_complete(op, ret); | ||
205 | +} | 43 | +} |
206 | + | 44 | + |
207 | +static void coroutine_fn mirror_co_discard(void *opaque) | 45 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
46 | { | ||
47 | BlockBackend *blk; | ||
48 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | ||
49 | test_quiesce_common(BDRV_DRAIN, false); | ||
50 | } | ||
51 | |||
52 | +static void test_quiesce_drain_subtree(void) | ||
208 | +{ | 53 | +{ |
209 | + MirrorOp *op = opaque; | 54 | + test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
210 | + int ret; | 55 | +} |
211 | + | 56 | + |
212 | + op->s->in_flight++; | 57 | static void test_nested(void) |
213 | + op->s->bytes_in_flight += op->bytes; | 58 | { |
214 | + *op->bytes_handled = op->bytes; | 59 | BlockBackend *blk; |
60 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) | ||
61 | /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
62 | int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
63 | (inner != BDRV_DRAIN_ALL); | ||
64 | - int backing_quiesce = 0; | ||
65 | + int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + | ||
66 | + (inner == BDRV_SUBTREE_DRAIN); | ||
67 | int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
68 | (inner != BDRV_DRAIN); | ||
69 | |||
70 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void) | ||
71 | test_blockjob_common(BDRV_DRAIN); | ||
72 | } | ||
73 | |||
74 | +static void test_blockjob_drain_subtree(void) | ||
75 | +{ | ||
76 | + test_blockjob_common(BDRV_SUBTREE_DRAIN); | ||
77 | +} | ||
215 | + | 78 | + |
216 | + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); | 79 | int main(int argc, char **argv) |
217 | + mirror_write_complete(op, ret); | 80 | { |
81 | bdrv_init(); | ||
82 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
83 | |||
84 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
85 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
86 | + g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", | ||
87 | + test_drv_cb_drain_subtree); | ||
88 | |||
89 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
90 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
91 | + g_test_add_func("/bdrv-drain/quiesce/drain_subtree", | ||
92 | + test_quiesce_drain_subtree); | ||
93 | |||
94 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
95 | |||
96 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
97 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
98 | + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", | ||
99 | + test_blockjob_drain_subtree); | ||
100 | |||
101 | return g_test_run(); | ||
218 | } | 102 | } |
219 | |||
220 | static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | ||
221 | unsigned bytes, MirrorMethod mirror_method) | ||
222 | { | ||
223 | + MirrorOp *op; | ||
224 | + Coroutine *co; | ||
225 | + int64_t bytes_handled = -1; | ||
226 | + | ||
227 | + op = g_new(MirrorOp, 1); | ||
228 | + *op = (MirrorOp){ | ||
229 | + .s = s, | ||
230 | + .offset = offset, | ||
231 | + .bytes = bytes, | ||
232 | + .bytes_handled = &bytes_handled, | ||
233 | + }; | ||
234 | + | ||
235 | switch (mirror_method) { | ||
236 | case MIRROR_METHOD_COPY: | ||
237 | - return mirror_do_read(s, offset, bytes); | ||
238 | + co = qemu_coroutine_create(mirror_co_read, op); | ||
239 | + break; | ||
240 | case MIRROR_METHOD_ZERO: | ||
241 | + co = qemu_coroutine_create(mirror_co_zero, op); | ||
242 | + break; | ||
243 | case MIRROR_METHOD_DISCARD: | ||
244 | - mirror_do_zero_or_discard(s, offset, bytes, | ||
245 | - mirror_method == MIRROR_METHOD_DISCARD); | ||
246 | - return bytes; | ||
247 | + co = qemu_coroutine_create(mirror_co_discard, op); | ||
248 | + break; | ||
249 | default: | ||
250 | abort(); | ||
251 | } | ||
252 | + | ||
253 | + qemu_coroutine_enter(co); | ||
254 | + /* At this point, ownership of op has been moved to the coroutine | ||
255 | + * and the object may already be freed */ | ||
256 | + | ||
257 | + /* Assert that this value has been set */ | ||
258 | + assert(bytes_handled >= 0); | ||
259 | + | ||
260 | + /* Same assertion as in mirror_co_read() (and for mirror_co_read() | ||
261 | + * and mirror_co_discard(), bytes_handled == op->bytes, which | ||
262 | + * is the @bytes parameter given to this function) */ | ||
263 | + assert(bytes_handled <= UINT_MAX); | ||
264 | + return bytes_handled; | ||
265 | } | ||
266 | |||
267 | static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
268 | -- | 103 | -- |
269 | 2.13.6 | 104 | 2.13.6 |
270 | 105 | ||
271 | 106 | diff view generated by jsdifflib |
1 | Since we use bdrv_do_drained_begin/end() for bdrv_drain_all_begin/end(), | 1 | If bdrv_do_drained_begin/end() are called in coroutine context, they |
---|---|---|---|
2 | coroutine context is automatically left with a BH, preventing the | 2 | first use a BH to get out of the coroutine context. Call some existing |
3 | deadlocks that made bdrv_drain_all*() unsafe in coroutine context. Now | 3 | tests again from a coroutine to cover this code path. |
4 | that we even removed the old polling code as dead code, it's obvious | ||
5 | that it's compatible now. | ||
6 | |||
7 | Enable the coroutine test cases for bdrv_drain_all(). | ||
8 | 4 | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | 6 | --- |
12 | tests/test-bdrv-drain.c | 16 ++++++++++++++-- | 7 | tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ |
13 | 1 file changed, 14 insertions(+), 2 deletions(-) | 8 | 1 file changed, 59 insertions(+) |
14 | 9 | ||
15 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
16 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/test-bdrv-drain.c | 12 | --- a/tests/test-bdrv-drain.c |
18 | +++ b/tests/test-bdrv-drain.c | 13 | +++ b/tests/test-bdrv-drain.c |
14 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) | ||
15 | *aio_ret = ret; | ||
16 | } | ||
17 | |||
18 | +typedef struct CallInCoroutineData { | ||
19 | + void (*entry)(void); | ||
20 | + bool done; | ||
21 | +} CallInCoroutineData; | ||
22 | + | ||
23 | +static coroutine_fn void call_in_coroutine_entry(void *opaque) | ||
24 | +{ | ||
25 | + CallInCoroutineData *data = opaque; | ||
26 | + | ||
27 | + data->entry(); | ||
28 | + data->done = true; | ||
29 | +} | ||
30 | + | ||
31 | +static void call_in_coroutine(void (*entry)(void)) | ||
32 | +{ | ||
33 | + Coroutine *co; | ||
34 | + CallInCoroutineData data = { | ||
35 | + .entry = entry, | ||
36 | + .done = false, | ||
37 | + }; | ||
38 | + | ||
39 | + co = qemu_coroutine_create(call_in_coroutine_entry, &data); | ||
40 | + qemu_coroutine_enter(co); | ||
41 | + while (!data.done) { | ||
42 | + aio_poll(qemu_get_aio_context(), true); | ||
43 | + } | ||
44 | +} | ||
45 | + | ||
46 | enum drain_type { | ||
47 | BDRV_DRAIN_ALL, | ||
48 | BDRV_DRAIN, | ||
19 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void) | 49 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void) |
20 | test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | 50 | test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); |
21 | } | 51 | } |
22 | 52 | ||
23 | +static void test_drv_cb_co_drain_all(void) | 53 | +static void test_drv_cb_co_drain(void) |
24 | +{ | 54 | +{ |
25 | + call_in_coroutine(test_drv_cb_drain_all); | 55 | + call_in_coroutine(test_drv_cb_drain); |
26 | +} | 56 | +} |
27 | + | 57 | + |
28 | static void test_drv_cb_co_drain(void) | 58 | +static void test_drv_cb_co_drain_subtree(void) |
59 | +{ | ||
60 | + call_in_coroutine(test_drv_cb_drain_subtree); | ||
61 | +} | ||
62 | + | ||
63 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
29 | { | 64 | { |
30 | call_in_coroutine(test_drv_cb_drain); | 65 | BlockBackend *blk; |
31 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void) | 66 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void) |
32 | test_quiesce_common(BDRV_SUBTREE_DRAIN, true); | 67 | test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
33 | } | 68 | } |
34 | 69 | ||
35 | +static void test_quiesce_co_drain_all(void) | 70 | +static void test_quiesce_co_drain(void) |
36 | +{ | 71 | +{ |
37 | + call_in_coroutine(test_quiesce_drain_all); | 72 | + call_in_coroutine(test_quiesce_drain); |
38 | +} | 73 | +} |
39 | + | 74 | + |
40 | static void test_quiesce_co_drain(void) | 75 | +static void test_quiesce_co_drain_subtree(void) |
76 | +{ | ||
77 | + call_in_coroutine(test_quiesce_drain_subtree); | ||
78 | +} | ||
79 | + | ||
80 | static void test_nested(void) | ||
41 | { | 81 | { |
42 | call_in_coroutine(test_quiesce_drain); | 82 | BlockBackend *blk; |
43 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 83 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
44 | g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", | 84 | g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", |
45 | test_drv_cb_drain_subtree); | 85 | test_drv_cb_drain_subtree); |
46 | 86 | ||
47 | - // XXX bdrv_drain_all() doesn't work in coroutine context | 87 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
48 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", | 88 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); |
49 | + test_drv_cb_co_drain_all); | 89 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", |
50 | g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); | 90 | + test_drv_cb_co_drain_subtree); |
51 | g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", | 91 | + |
52 | test_drv_cb_co_drain_subtree); | 92 | + |
53 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 93 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
94 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
54 | g_test_add_func("/bdrv-drain/quiesce/drain_subtree", | 95 | g_test_add_func("/bdrv-drain/quiesce/drain_subtree", |
55 | test_quiesce_drain_subtree); | 96 | test_quiesce_drain_subtree); |
56 | 97 | ||
57 | - // XXX bdrv_drain_all() doesn't work in coroutine context | 98 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
58 | + g_test_add_func("/bdrv-drain/quiesce/co/drain_all", | 99 | + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); |
59 | + test_quiesce_co_drain_all); | 100 | + g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", |
60 | g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); | 101 | + test_quiesce_co_drain_subtree); |
61 | g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", | 102 | + |
62 | test_quiesce_co_drain_subtree); | 103 | g_test_add_func("/bdrv-drain/nested", test_nested); |
104 | |||
105 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
63 | -- | 106 | -- |
64 | 2.13.6 | 107 | 2.13.6 |
65 | 108 | ||
66 | 109 | diff view generated by jsdifflib |
1 | If bdrv_do_drained_begin() polls during its subtree recursion, the graph | 1 | Test that drain sections are correctly propagated through the graph. |
---|---|---|---|
2 | can change and mess up the bs->children iteration. Test that this | ||
3 | doesn't happen. | ||
4 | 2 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | --- | 4 | --- |
7 | tests/test-bdrv-drain.c | 38 +++++++++++++++++++++++++++++--------- | 5 | tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ |
8 | 1 file changed, 29 insertions(+), 9 deletions(-) | 6 | 1 file changed, 74 insertions(+) |
9 | 7 | ||
10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
11 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tests/test-bdrv-drain.c | 10 | --- a/tests/test-bdrv-drain.c |
13 | +++ b/tests/test-bdrv-drain.c | 11 | +++ b/tests/test-bdrv-drain.c |
14 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) | 12 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
15 | * If @detach_instead_of_delete is set, the BDS is not going to be | 13 | blk_unref(blk); |
16 | * deleted but will only detach all of its children. | ||
17 | */ | ||
18 | -static void do_test_delete_by_drain(bool detach_instead_of_delete) | ||
19 | +static void do_test_delete_by_drain(bool detach_instead_of_delete, | ||
20 | + enum drain_type drain_type) | ||
21 | { | ||
22 | BlockBackend *blk; | ||
23 | BlockDriverState *bs, *child_bs, *null_bs; | ||
24 | @@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete) | ||
25 | * test_co_delete_by_drain() resuming. Thus, @bs will be deleted | ||
26 | * and the coroutine will exit while this drain operation is still | ||
27 | * in progress. */ | ||
28 | - bdrv_ref(child_bs); | ||
29 | - bdrv_drain(child_bs); | ||
30 | - bdrv_unref(child_bs); | ||
31 | + switch (drain_type) { | ||
32 | + case BDRV_DRAIN: | ||
33 | + bdrv_ref(child_bs); | ||
34 | + bdrv_drain(child_bs); | ||
35 | + bdrv_unref(child_bs); | ||
36 | + break; | ||
37 | + case BDRV_SUBTREE_DRAIN: | ||
38 | + /* Would have to ref/unref bs here for !detach_instead_of_delete, but | ||
39 | + * then the whole test becomes pointless because the graph changes | ||
40 | + * don't occur during the drain any more. */ | ||
41 | + assert(detach_instead_of_delete); | ||
42 | + bdrv_subtree_drained_begin(bs); | ||
43 | + bdrv_subtree_drained_end(bs); | ||
44 | + break; | ||
45 | + default: | ||
46 | + g_assert_not_reached(); | ||
47 | + } | ||
48 | |||
49 | while (!dbdd.done) { | ||
50 | aio_poll(qemu_get_aio_context(), true); | ||
51 | @@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete) | ||
52 | } | ||
53 | } | 14 | } |
54 | 15 | ||
55 | - | 16 | +static void test_multiparent(void) |
56 | static void test_delete_by_drain(void) | 17 | +{ |
57 | { | 18 | + BlockBackend *blk_a, *blk_b; |
58 | - do_test_delete_by_drain(false); | 19 | + BlockDriverState *bs_a, *bs_b, *backing; |
59 | + do_test_delete_by_drain(false, BDRV_DRAIN); | 20 | + BDRVTestState *a_s, *b_s, *backing_s; |
60 | } | 21 | + |
61 | 22 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | |
62 | static void test_detach_by_drain(void) | 23 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
63 | { | 24 | + &error_abort); |
64 | - do_test_delete_by_drain(true); | 25 | + a_s = bs_a->opaque; |
65 | + do_test_delete_by_drain(true, BDRV_DRAIN); | 26 | + blk_insert_bs(blk_a, bs_a, &error_abort); |
27 | + | ||
28 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
29 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, | ||
30 | + &error_abort); | ||
31 | + b_s = bs_b->opaque; | ||
32 | + blk_insert_bs(blk_b, bs_b, &error_abort); | ||
33 | + | ||
34 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
35 | + backing_s = backing->opaque; | ||
36 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); | ||
37 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
38 | + | ||
39 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
40 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
41 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
43 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
44 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
45 | + | ||
46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
47 | + | ||
48 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
49 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
50 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
51 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
52 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
53 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
54 | + | ||
55 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
56 | + | ||
57 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 2); | ||
58 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
59 | + g_assert_cmpint(backing->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(a_s->drain_count, ==, 2); | ||
61 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
62 | + g_assert_cmpint(backing_s->drain_count, ==, 2); | ||
63 | + | ||
64 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
65 | + | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
74 | + | ||
75 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
76 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
77 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
78 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
79 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
80 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
81 | + | ||
82 | + bdrv_unref(backing); | ||
83 | + bdrv_unref(bs_a); | ||
84 | + bdrv_unref(bs_b); | ||
85 | + blk_unref(blk_a); | ||
86 | + blk_unref(blk_b); | ||
66 | +} | 87 | +} |
67 | + | 88 | + |
68 | +static void test_detach_by_drain_subtree(void) | 89 | |
69 | +{ | 90 | typedef struct TestBlockJob { |
70 | + do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); | 91 | BlockJob common; |
71 | } | ||
72 | |||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 92 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
75 | g_test_add_func("/bdrv-drain/blockjob/drain_subtree", | 93 | test_quiesce_co_drain_subtree); |
76 | test_blockjob_drain_subtree); | 94 | |
77 | 95 | g_test_add_func("/bdrv-drain/nested", test_nested); | |
78 | - g_test_add_func("/bdrv-drain/deletion", test_delete_by_drain); | 96 | + g_test_add_func("/bdrv-drain/multiparent", test_multiparent); |
79 | - g_test_add_func("/bdrv-drain/detach", test_detach_by_drain); | 97 | |
80 | + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); | 98 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
81 | + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); | 99 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
82 | + g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); | ||
83 | |||
84 | ret = g_test_run(); | ||
85 | qemu_event_destroy(&done_event); | ||
86 | -- | 100 | -- |
87 | 2.13.6 | 101 | 2.13.6 |
88 | 102 | ||
89 | 103 | diff view generated by jsdifflib |
1 | bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and | 1 | We need to remember how many of the drain sections in which a node is |
---|---|---|---|
2 | did a subtree drain for each of them. This works fine as long as the | 2 | were recursive (i.e. subtree drain rather than node drain), so that they |
3 | graph is static, but sadly, reality looks different. | 3 | can be correctly applied when children are added or removed during the |
4 | 4 | drained section. | |
5 | If the graph changes so that root nodes are added or removed, we would | 5 | |
6 | have to compensate for this. bdrv_next() returns each root node only | 6 | With this change, it is safe to modify the graph even inside a |
7 | once even if it's the root node for multiple BlockBackends or for a | 7 | bdrv_subtree_drained_begin/end() section. |
8 | monitor-owned block driver tree, which would only complicate things. | ||
9 | |||
10 | The much easier and more obviously correct way is to fundamentally | ||
11 | change the way the functions work: Iterate over all BlockDriverStates, | ||
12 | no matter who owns them, and drain them individually. Compensation is | ||
13 | only necessary when a new BDS is created inside a drain_all section. | ||
14 | Removal of a BDS doesn't require any action because it's gone afterwards | ||
15 | anyway. | ||
16 | 8 | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | --- | 10 | --- |
19 | include/block/block.h | 1 + | 11 | include/block/block.h | 2 -- |
20 | include/block/block_int.h | 1 + | 12 | include/block/block_int.h | 5 +++++ |
21 | block.c | 34 ++++++++++++++++++++++++--- | 13 | block.c | 32 +++++++++++++++++++++++++++++--- |
22 | block/io.c | 60 ++++++++++++++++++++++++++++++++++++----------- | 14 | block/io.c | 28 ++++++++++++++++++++++++---- |
23 | 4 files changed, 79 insertions(+), 17 deletions(-) | 15 | 4 files changed, 58 insertions(+), 9 deletions(-) |
24 | 16 | ||
25 | diff --git a/include/block/block.h b/include/block/block.h | 17 | diff --git a/include/block/block.h b/include/block/block.h |
26 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/include/block/block.h | 19 | --- a/include/block/block.h |
28 | +++ b/include/block/block.h | 20 | +++ b/include/block/block.h |
29 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_lookup_bs(const char *device, | 21 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs); |
30 | Error **errp); | 22 | /** |
31 | bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); | 23 | * Like bdrv_drained_begin, but recursively begins a quiesced section for |
32 | BlockDriverState *bdrv_next_node(BlockDriverState *bs); | 24 | * exclusive access to all child nodes as well. |
33 | +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); | 25 | - * |
34 | 26 | - * Graph changes are not allowed during a subtree drain section. | |
35 | typedef struct BdrvNextIterator { | 27 | */ |
36 | enum { | 28 | void bdrv_subtree_drained_begin(BlockDriverState *bs); |
29 | |||
37 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 30 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
38 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/include/block/block_int.h | 32 | --- a/include/block/block_int.h |
40 | +++ b/include/block/block_int.h | 33 | +++ b/include/block/block_int.h |
34 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
35 | |||
36 | /* Accessed with atomic ops. */ | ||
37 | int quiesce_counter; | ||
38 | + int recursive_quiesce_counter; | ||
39 | + | ||
40 | unsigned int write_gen; /* Current data generation */ | ||
41 | |||
42 | /* Protected by reqs_lock. */ | ||
41 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | 43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, |
42 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | 44 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
43 | BdrvRequestFlags flags); | 45 | BdrvRequestFlags flags); |
44 | 46 | ||
45 | +extern unsigned int bdrv_drain_all_count; | 47 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); |
46 | void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); | 48 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); |
47 | void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); | 49 | + |
48 | 50 | int get_tmp_filename(char *filename, int size); | |
51 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, | ||
52 | const char *filename); | ||
49 | diff --git a/block.c b/block.c | 53 | diff --git a/block.c b/block.c |
50 | index XXXXXXX..XXXXXXX 100644 | 54 | index XXXXXXX..XXXXXXX 100644 |
51 | --- a/block.c | 55 | --- a/block.c |
52 | +++ b/block.c | 56 | +++ b/block.c |
53 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_new(void) | 57 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child) |
54 | 58 | bdrv_drained_end(bs); | |
55 | qemu_co_queue_init(&bs->flush_queue); | 59 | } |
56 | 60 | ||
57 | + for (i = 0; i < bdrv_drain_all_count; i++) { | 61 | +static void bdrv_child_cb_attach(BdrvChild *child) |
58 | + bdrv_drained_begin(bs); | 62 | +{ |
59 | + } | 63 | + BlockDriverState *bs = child->opaque; |
60 | + | 64 | + bdrv_apply_subtree_drain(child, bs); |
61 | QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); | 65 | +} |
62 | 66 | + | |
63 | return bs; | 67 | +static void bdrv_child_cb_detach(BdrvChild *child) |
64 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, | 68 | +{ |
65 | int open_flags, Error **errp) | 69 | + BlockDriverState *bs = child->opaque; |
70 | + bdrv_unapply_subtree_drain(child, bs); | ||
71 | +} | ||
72 | + | ||
73 | static int bdrv_child_cb_inactivate(BdrvChild *child) | ||
66 | { | 74 | { |
67 | Error *local_err = NULL; | 75 | BlockDriverState *bs = child->opaque; |
68 | - int ret; | 76 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { |
69 | + int i, ret; | 77 | .inherit_options = bdrv_inherited_options, |
70 | 78 | .drained_begin = bdrv_child_cb_drained_begin, | |
71 | bdrv_assign_node_name(bs, node_name, &local_err); | 79 | .drained_end = bdrv_child_cb_drained_end, |
72 | if (local_err) { | 80 | + .attach = bdrv_child_cb_attach, |
73 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, | 81 | + .detach = bdrv_child_cb_detach, |
74 | assert(bdrv_min_mem_align(bs) != 0); | 82 | .inactivate = bdrv_child_cb_inactivate, |
75 | assert(is_power_of_2(bs->bl.request_alignment)); | 83 | }; |
76 | 84 | ||
77 | + for (i = 0; i < bs->quiesce_counter; i++) { | 85 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { |
78 | + if (drv->bdrv_co_drain_begin) { | 86 | .inherit_options = bdrv_inherited_fmt_options, |
79 | + drv->bdrv_co_drain_begin(bs); | 87 | .drained_begin = bdrv_child_cb_drained_begin, |
88 | .drained_end = bdrv_child_cb_drained_end, | ||
89 | + .attach = bdrv_child_cb_attach, | ||
90 | + .detach = bdrv_child_cb_detach, | ||
91 | .inactivate = bdrv_child_cb_inactivate, | ||
92 | }; | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c) | ||
95 | parent->backing_blocker); | ||
96 | bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, | ||
97 | parent->backing_blocker); | ||
98 | + | ||
99 | + bdrv_child_cb_attach(c); | ||
100 | } | ||
101 | |||
102 | static void bdrv_backing_detach(BdrvChild *c) | ||
103 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c) | ||
104 | bdrv_op_unblock_all(c->bs, parent->backing_blocker); | ||
105 | error_free(parent->backing_blocker); | ||
106 | parent->backing_blocker = NULL; | ||
107 | + | ||
108 | + bdrv_child_cb_detach(c); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
113 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | ||
114 | } | ||
115 | if (old_bs) { | ||
116 | + /* Detach first so that the recursive drain sections coming from @child | ||
117 | + * are already gone and we only end the drain sections that came from | ||
118 | + * elsewhere. */ | ||
119 | + if (child->role->detach) { | ||
120 | + child->role->detach(child); | ||
80 | + } | 121 | + } |
81 | + } | ||
82 | + | ||
83 | return 0; | ||
84 | open_failed: | ||
85 | bs->drv = NULL; | ||
86 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
87 | child->role->detach(child); | ||
88 | } | ||
89 | if (old_bs->quiesce_counter && child->role->drained_end) { | 122 | if (old_bs->quiesce_counter && child->role->drained_end) { |
90 | - for (i = 0; i < old_bs->quiesce_counter; i++) { | 123 | for (i = 0; i < old_bs->quiesce_counter; i++) { |
91 | + int num = old_bs->quiesce_counter; | ||
92 | + if (child->role->parent_is_bds) { | ||
93 | + num -= bdrv_drain_all_count; | ||
94 | + } | ||
95 | + assert(num >= 0); | ||
96 | + for (i = 0; i < num; i++) { | ||
97 | child->role->drained_end(child); | 124 | child->role->drained_end(child); |
98 | } | 125 | } |
99 | } | 126 | } |
127 | - if (child->role->detach) { | ||
128 | - child->role->detach(child); | ||
129 | - } | ||
130 | QLIST_REMOVE(child, next_parent); | ||
131 | } | ||
132 | |||
100 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | 133 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
101 | if (new_bs) { | ||
102 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | ||
103 | if (new_bs->quiesce_counter && child->role->drained_begin) { | ||
104 | - for (i = 0; i < new_bs->quiesce_counter; i++) { | ||
105 | + int num = new_bs->quiesce_counter; | ||
106 | + if (child->role->parent_is_bds) { | ||
107 | + num -= bdrv_drain_all_count; | ||
108 | + } | ||
109 | + assert(num >= 0); | ||
110 | + for (i = 0; i < num; i++) { | ||
111 | child->role->drained_begin(child); | ||
112 | } | 134 | } |
113 | } | 135 | } |
114 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs) | 136 | |
115 | return QTAILQ_NEXT(bs, node_list); | 137 | + /* Attach only after starting new drained sections, so that recursive |
116 | } | 138 | + * drain sections coming from @child don't get an extra .drained_begin |
117 | 139 | + * callback. */ | |
118 | +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) | 140 | if (child->role->attach) { |
119 | +{ | 141 | child->role->attach(child); |
120 | + if (!bs) { | 142 | } |
121 | + return QTAILQ_FIRST(&all_bdrv_states); | ||
122 | + } | ||
123 | + return QTAILQ_NEXT(bs, bs_list); | ||
124 | +} | ||
125 | + | ||
126 | const char *bdrv_get_node_name(const BlockDriverState *bs) | ||
127 | { | ||
128 | return bs->node_name; | ||
129 | diff --git a/block/io.c b/block/io.c | 143 | diff --git a/block/io.c b/block/io.c |
130 | index XXXXXXX..XXXXXXX 100644 | 144 | index XXXXXXX..XXXXXXX 100644 |
131 | --- a/block/io.c | 145 | --- a/block/io.c |
132 | +++ b/block/io.c | 146 | +++ b/block/io.c |
133 | @@ -XXX,XX +XXX,XX @@ | 147 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
134 | /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ | 148 | assert(data.done); |
135 | #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) | 149 | } |
136 | 150 | ||
137 | +static AioWait drain_all_aio_wait; | 151 | -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
138 | + | 152 | - BdrvChild *parent) |
139 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | 153 | +void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
140 | int64_t offset, int bytes, BdrvRequestFlags flags); | 154 | + BdrvChild *parent) |
141 | 155 | { | |
142 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_assert_idle(BlockDriverState *bs) | 156 | BdrvChild *child, *next; |
157 | |||
158 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
159 | bdrv_drain_recurse(bs); | ||
160 | |||
161 | if (recursive) { | ||
162 | + bs->recursive_quiesce_counter++; | ||
163 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
164 | bdrv_do_drained_begin(child->bs, true, child); | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
167 | bdrv_do_drained_begin(bs, true, NULL); | ||
168 | } | ||
169 | |||
170 | -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
171 | - BdrvChild *parent) | ||
172 | +void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
173 | + BdrvChild *parent) | ||
174 | { | ||
175 | BdrvChild *child, *next; | ||
176 | int old_quiesce_counter; | ||
177 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
143 | } | 178 | } |
144 | } | 179 | |
145 | 180 | if (recursive) { | |
146 | +unsigned int bdrv_drain_all_count = 0; | 181 | + bs->recursive_quiesce_counter--; |
147 | + | 182 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { |
148 | +static bool bdrv_drain_all_poll(void) | 183 | bdrv_do_drained_end(child->bs, true, child); |
149 | +{ | 184 | } |
150 | + BlockDriverState *bs = NULL; | 185 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs) |
151 | + bool result = false; | 186 | bdrv_do_drained_end(bs, true, NULL); |
152 | + | 187 | } |
153 | + /* Execute pending BHs first (may modify the graph) and check everything | 188 | |
154 | + * else only after the BHs have executed. */ | 189 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) |
155 | + while (aio_poll(qemu_get_aio_context(), false)); | 190 | +{ |
156 | + | 191 | + int i; |
157 | + /* bdrv_drain_poll() can't make changes to the graph and we are holding the | 192 | + |
158 | + * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ | 193 | + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { |
159 | + while ((bs = bdrv_next_all_states(bs))) { | 194 | + bdrv_do_drained_begin(child->bs, true, child); |
160 | + AioContext *aio_context = bdrv_get_aio_context(bs); | ||
161 | + aio_context_acquire(aio_context); | ||
162 | + result |= bdrv_drain_poll(bs, false, NULL, true); | ||
163 | + aio_context_release(aio_context); | ||
164 | + } | 195 | + } |
165 | + | 196 | +} |
166 | + return result; | 197 | + |
198 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) | ||
199 | +{ | ||
200 | + int i; | ||
201 | + | ||
202 | + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { | ||
203 | + bdrv_do_drained_end(child->bs, true, child); | ||
204 | + } | ||
167 | +} | 205 | +} |
168 | + | 206 | + |
169 | /* | 207 | /* |
170 | * Wait for pending requests to complete across all BlockDriverStates | 208 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
171 | * | 209 | * and suspend block driver's internal I/O until next request arrives. |
172 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_assert_idle(BlockDriverState *bs) | ||
173 | */ | ||
174 | void bdrv_drain_all_begin(void) | ||
175 | { | ||
176 | - BlockDriverState *bs; | ||
177 | - BdrvNextIterator it; | ||
178 | + BlockDriverState *bs = NULL; | ||
179 | |||
180 | if (qemu_in_coroutine()) { | ||
181 | - bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true); | ||
182 | + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); | ||
183 | return; | ||
184 | } | ||
185 | |||
186 | - /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread | ||
187 | - * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on | ||
188 | - * nodes in several different AioContexts, so make sure we're in the main | ||
189 | - * context. */ | ||
190 | + /* AIO_WAIT_WHILE() with a NULL context can only be called from the main | ||
191 | + * loop AioContext, so make sure we're in the main context. */ | ||
192 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
193 | + assert(bdrv_drain_all_count < INT_MAX); | ||
194 | + bdrv_drain_all_count++; | ||
195 | |||
196 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
197 | + /* Quiesce all nodes, without polling in-flight requests yet. The graph | ||
198 | + * cannot change during this loop. */ | ||
199 | + while ((bs = bdrv_next_all_states(bs))) { | ||
200 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
201 | |||
202 | aio_context_acquire(aio_context); | ||
203 | - bdrv_do_drained_begin(bs, true, NULL, false, true); | ||
204 | + bdrv_do_drained_begin(bs, false, NULL, true, false); | ||
205 | aio_context_release(aio_context); | ||
206 | } | ||
207 | |||
208 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
209 | + /* Now poll the in-flight requests */ | ||
210 | + AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); | ||
211 | + | ||
212 | + while ((bs = bdrv_next_all_states(bs))) { | ||
213 | bdrv_drain_assert_idle(bs); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | void bdrv_drain_all_end(void) | ||
218 | { | ||
219 | - BlockDriverState *bs; | ||
220 | - BdrvNextIterator it; | ||
221 | + BlockDriverState *bs = NULL; | ||
222 | |||
223 | - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
224 | + while ((bs = bdrv_next_all_states(bs))) { | ||
225 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
226 | |||
227 | aio_context_acquire(aio_context); | ||
228 | - bdrv_do_drained_end(bs, true, NULL, false); | ||
229 | + bdrv_do_drained_end(bs, false, NULL, true); | ||
230 | aio_context_release(aio_context); | ||
231 | } | ||
232 | + | ||
233 | + assert(bdrv_drain_all_count > 0); | ||
234 | + bdrv_drain_all_count--; | ||
235 | } | ||
236 | |||
237 | void bdrv_drain_all(void) | ||
238 | @@ -XXX,XX +XXX,XX @@ void bdrv_inc_in_flight(BlockDriverState *bs) | ||
239 | void bdrv_wakeup(BlockDriverState *bs) | ||
240 | { | ||
241 | aio_wait_kick(bdrv_get_aio_wait(bs)); | ||
242 | + aio_wait_kick(&drain_all_aio_wait); | ||
243 | } | ||
244 | |||
245 | void bdrv_dec_in_flight(BlockDriverState *bs) | ||
246 | -- | 210 | -- |
247 | 2.13.6 | 211 | 2.13.6 |
248 | 212 | ||
249 | 213 | diff view generated by jsdifflib |
1 | This tests both adding and remove a node between bdrv_drain_all_begin() | ||
---|---|---|---|
2 | and bdrv_drain_all_end(), and enabled the existing detach test for | ||
3 | drain_all. | ||
4 | |||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | --- | 2 | --- |
7 | tests/test-bdrv-drain.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++-- | 3 | tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ |
8 | 1 file changed, 73 insertions(+), 2 deletions(-) | 4 | 1 file changed, 80 insertions(+) |
9 | 5 | ||
10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
11 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tests/test-bdrv-drain.c | 8 | --- a/tests/test-bdrv-drain.c |
13 | +++ b/tests/test-bdrv-drain.c | 9 | +++ b/tests/test-bdrv-drain.c |
14 | @@ -XXX,XX +XXX,XX @@ static void test_multiparent(void) | 10 | @@ -XXX,XX +XXX,XX @@ static void test_multiparent(void) |
15 | blk_unref(blk_b); | 11 | blk_unref(blk_b); |
16 | } | 12 | } |
17 | 13 | ||
18 | -static void test_graph_change(void) | 14 | +static void test_graph_change(void) |
19 | +static void test_graph_change_drain_subtree(void) | ||
20 | { | ||
21 | BlockBackend *blk_a, *blk_b; | ||
22 | BlockDriverState *bs_a, *bs_b, *backing; | ||
23 | @@ -XXX,XX +XXX,XX @@ static void test_graph_change(void) | ||
24 | blk_unref(blk_b); | ||
25 | } | ||
26 | |||
27 | +static void test_graph_change_drain_all(void) | ||
28 | +{ | 15 | +{ |
29 | + BlockBackend *blk_a, *blk_b; | 16 | + BlockBackend *blk_a, *blk_b; |
30 | + BlockDriverState *bs_a, *bs_b; | 17 | + BlockDriverState *bs_a, *bs_b, *backing; |
31 | + BDRVTestState *a_s, *b_s; | 18 | + BDRVTestState *a_s, *b_s, *backing_s; |
32 | + | 19 | + |
33 | + /* Create node A with a BlockBackend */ | ||
34 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 20 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
35 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, | 21 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
36 | + &error_abort); | 22 | + &error_abort); |
37 | + a_s = bs_a->opaque; | 23 | + a_s = bs_a->opaque; |
38 | + blk_insert_bs(blk_a, bs_a, &error_abort); | 24 | + blk_insert_bs(blk_a, bs_a, &error_abort); |
39 | + | 25 | + |
40 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
41 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
42 | + | ||
43 | + /* Call bdrv_drain_all_begin() */ | ||
44 | + bdrv_drain_all_begin(); | ||
45 | + | ||
46 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
47 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
48 | + | ||
49 | + /* Create node B with a BlockBackend */ | ||
50 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 26 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
51 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, | 27 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, |
52 | + &error_abort); | 28 | + &error_abort); |
53 | + b_s = bs_b->opaque; | 29 | + b_s = bs_b->opaque; |
54 | + blk_insert_bs(blk_b, bs_b, &error_abort); | 30 | + blk_insert_bs(blk_b, bs_b, &error_abort); |
55 | + | 31 | + |
56 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | 32 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
57 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | 33 | + backing_s = backing->opaque; |
58 | + g_assert_cmpint(a_s->drain_count, ==, 1); | 34 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); |
59 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
60 | + | 35 | + |
61 | + /* Unref and finally delete node A */ | 36 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); |
37 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
38 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
39 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
40 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
41 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
42 | + | ||
43 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
44 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
45 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
47 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
48 | + | ||
49 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
50 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
51 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
52 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
53 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
54 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
55 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
56 | + | ||
57 | + bdrv_set_backing_hd(bs_b, NULL, &error_abort); | ||
58 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 3); | ||
59 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(backing->quiesce_counter, ==, 3); | ||
61 | + g_assert_cmpint(a_s->drain_count, ==, 3); | ||
62 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
63 | + g_assert_cmpint(backing_s->drain_count, ==, 3); | ||
64 | + | ||
65 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
74 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
75 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
76 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
77 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
78 | + | ||
79 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
80 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
81 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
82 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
83 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
84 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
85 | + | ||
86 | + bdrv_unref(backing); | ||
87 | + bdrv_unref(bs_a); | ||
88 | + bdrv_unref(bs_b); | ||
62 | + blk_unref(blk_a); | 89 | + blk_unref(blk_a); |
63 | + | ||
64 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
65 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
66 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
67 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
68 | + | ||
69 | + bdrv_unref(bs_a); | ||
70 | + | ||
71 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
72 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
73 | + | ||
74 | + /* End the drained section */ | ||
75 | + bdrv_drain_all_end(); | ||
76 | + | ||
77 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
78 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
79 | + | ||
80 | + bdrv_unref(bs_b); | ||
81 | + blk_unref(blk_b); | 90 | + blk_unref(blk_b); |
82 | +} | 91 | +} |
83 | + | 92 | + |
84 | struct test_iothread_data { | 93 | |
85 | BlockDriverState *bs; | 94 | typedef struct TestBlockJob { |
86 | enum drain_type drain_type; | 95 | BlockJob common; |
87 | @@ -XXX,XX +XXX,XX @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, | ||
88 | bdrv_subtree_drained_begin(bs); | ||
89 | bdrv_subtree_drained_end(bs); | ||
90 | break; | ||
91 | + case BDRV_DRAIN_ALL: | ||
92 | + bdrv_drain_all_begin(); | ||
93 | + bdrv_drain_all_end(); | ||
94 | + break; | ||
95 | default: | ||
96 | g_assert_not_reached(); | ||
97 | } | ||
98 | @@ -XXX,XX +XXX,XX @@ static void test_delete_by_drain(void) | ||
99 | do_test_delete_by_drain(false, BDRV_DRAIN); | ||
100 | } | ||
101 | |||
102 | +static void test_detach_by_drain_all(void) | ||
103 | +{ | ||
104 | + do_test_delete_by_drain(true, BDRV_DRAIN_ALL); | ||
105 | +} | ||
106 | + | ||
107 | static void test_detach_by_drain(void) | ||
108 | { | ||
109 | do_test_delete_by_drain(true, BDRV_DRAIN); | ||
110 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 96 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
111 | 97 | ||
112 | g_test_add_func("/bdrv-drain/nested", test_nested); | 98 | g_test_add_func("/bdrv-drain/nested", test_nested); |
113 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | 99 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); |
114 | - g_test_add_func("/bdrv-drain/graph-change", test_graph_change); | 100 | + g_test_add_func("/bdrv-drain/graph-change", test_graph_change); |
115 | + | 101 | |
116 | + g_test_add_func("/bdrv-drain/graph-change/drain_subtree", | 102 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
117 | + test_graph_change_drain_subtree); | 103 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
118 | + g_test_add_func("/bdrv-drain/graph-change/drain_all", | ||
119 | + test_graph_change_drain_all); | ||
120 | |||
121 | g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); | ||
122 | g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); | ||
123 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
124 | test_blockjob_drain_subtree); | ||
125 | |||
126 | g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); | ||
127 | + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); | ||
128 | g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); | ||
129 | g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); | ||
130 | g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); | ||
131 | -- | 104 | -- |
132 | 2.13.6 | 105 | 2.13.6 |
133 | 106 | ||
134 | 107 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Since commit bde70715, base is the only node that is reopened in |
---|---|---|---|
2 | commit_start(). This means that the code, which still involves an | ||
3 | explicit BlockReopenQueue, can now be simplified by using bdrv_reopen(). | ||
2 | 4 | ||
3 | When converting mirror's I/O to coroutines, we are going to need a point | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | where these coroutines are created. mirror_perform() is going to be | 6 | Reviewed-by: Fam Zheng <famz@redhat.com> |
5 | that point. | 7 | --- |
8 | block/commit.c | 8 +------- | ||
9 | 1 file changed, 1 insertion(+), 7 deletions(-) | ||
6 | 10 | ||
7 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 11 | diff --git a/block/commit.c b/block/commit.c |
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
12 | Message-id: 20180613181823.13618-2-mreitz@redhat.com | ||
13 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
14 | --- | ||
15 | block/mirror.c | 51 +++++++++++++++++++++++++++++---------------------- | ||
16 | 1 file changed, 29 insertions(+), 22 deletions(-) | ||
17 | |||
18 | diff --git a/block/mirror.c b/block/mirror.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/mirror.c | 13 | --- a/block/commit.c |
21 | +++ b/block/mirror.c | 14 | +++ b/block/commit.c |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorOp { | 15 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
23 | uint64_t bytes; | 16 | const char *filter_node_name, Error **errp) |
24 | } MirrorOp; | ||
25 | |||
26 | +typedef enum MirrorMethod { | ||
27 | + MIRROR_METHOD_COPY, | ||
28 | + MIRROR_METHOD_ZERO, | ||
29 | + MIRROR_METHOD_DISCARD, | ||
30 | +} MirrorMethod; | ||
31 | + | ||
32 | static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, | ||
33 | int error) | ||
34 | { | 17 | { |
35 | @@ -XXX,XX +XXX,XX @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, | 18 | CommitBlockJob *s; |
36 | } | 19 | - BlockReopenQueue *reopen_queue = NULL; |
37 | } | 20 | int orig_base_flags; |
38 | 21 | BlockDriverState *iter; | |
39 | +static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | 22 | BlockDriverState *commit_top_bs = NULL; |
40 | + unsigned bytes, MirrorMethod mirror_method) | 23 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
41 | +{ | 24 | /* convert base to r/w, if necessary */ |
42 | + switch (mirror_method) { | 25 | orig_base_flags = bdrv_get_flags(base); |
43 | + case MIRROR_METHOD_COPY: | 26 | if (!(orig_base_flags & BDRV_O_RDWR)) { |
44 | + return mirror_do_read(s, offset, bytes); | 27 | - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, |
45 | + case MIRROR_METHOD_ZERO: | 28 | - orig_base_flags | BDRV_O_RDWR); |
46 | + case MIRROR_METHOD_DISCARD: | 29 | - } |
47 | + mirror_do_zero_or_discard(s, offset, bytes, | 30 | - |
48 | + mirror_method == MIRROR_METHOD_DISCARD); | 31 | - if (reopen_queue) { |
49 | + return bytes; | 32 | - bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); |
50 | + default: | 33 | + bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); |
51 | + abort(); | 34 | if (local_err != NULL) { |
52 | + } | 35 | error_propagate(errp, local_err); |
53 | +} | 36 | goto fail; |
54 | + | ||
55 | static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
56 | { | ||
57 | BlockDriverState *source = s->source; | ||
58 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
59 | int ret; | ||
60 | int64_t io_bytes; | ||
61 | int64_t io_bytes_acct; | ||
62 | - enum MirrorMethod { | ||
63 | - MIRROR_METHOD_COPY, | ||
64 | - MIRROR_METHOD_ZERO, | ||
65 | - MIRROR_METHOD_DISCARD | ||
66 | - } mirror_method = MIRROR_METHOD_COPY; | ||
67 | + MirrorMethod mirror_method = MIRROR_METHOD_COPY; | ||
68 | |||
69 | assert(!(offset % s->granularity)); | ||
70 | ret = bdrv_block_status_above(source, NULL, offset, | ||
71 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
72 | } | ||
73 | |||
74 | io_bytes = mirror_clip_bytes(s, offset, io_bytes); | ||
75 | - switch (mirror_method) { | ||
76 | - case MIRROR_METHOD_COPY: | ||
77 | - io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes); | ||
78 | - break; | ||
79 | - case MIRROR_METHOD_ZERO: | ||
80 | - case MIRROR_METHOD_DISCARD: | ||
81 | - mirror_do_zero_or_discard(s, offset, io_bytes, | ||
82 | - mirror_method == MIRROR_METHOD_DISCARD); | ||
83 | - if (write_zeroes_ok) { | ||
84 | - io_bytes_acct = 0; | ||
85 | - } else { | ||
86 | - io_bytes_acct = io_bytes; | ||
87 | - } | ||
88 | - break; | ||
89 | - default: | ||
90 | - abort(); | ||
91 | + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); | ||
92 | + if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { | ||
93 | + io_bytes_acct = 0; | ||
94 | + } else { | ||
95 | + io_bytes_acct = io_bytes; | ||
96 | } | ||
97 | assert(io_bytes); | ||
98 | offset += io_bytes; | ||
99 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) | ||
100 | continue; | ||
101 | } | ||
102 | |||
103 | - mirror_do_zero_or_discard(s, offset, bytes, false); | ||
104 | + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); | ||
105 | offset += bytes; | ||
106 | } | ||
107 | |||
108 | -- | 37 | -- |
109 | 2.13.6 | 38 | 2.13.6 |
110 | 39 | ||
111 | 40 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | The bdrv_reopen*() implementation doesn't like it if the graph is |
---|---|---|---|
2 | changed between queuing nodes for reopen and actually reopening them | ||
3 | (one of the reasons is that queuing can be recursive). | ||
2 | 4 | ||
3 | This patch makes the mirror code differentiate between simply waiting | 5 | So instead of draining the device only in bdrv_reopen_multiple(), |
4 | for any operation to complete (mirror_wait_for_free_in_flight_slot()) | 6 | require that callers already drained all affected nodes, and assert this |
5 | and specifically waiting for all operations touching a certain range of | 7 | in bdrv_reopen_queue(). |
6 | the virtual disk to complete (mirror_wait_on_conflicts()). | ||
7 | 8 | ||
8 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Fam Zheng <famz@redhat.com> | 10 | Reviewed-by: Fam Zheng <famz@redhat.com> |
10 | Message-id: 20180613181823.13618-5-mreitz@redhat.com | ||
11 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
12 | --- | 11 | --- |
13 | block/mirror.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++---------- | 12 | block.c | 23 ++++++++++++++++------- |
14 | 1 file changed, 84 insertions(+), 18 deletions(-) | 13 | block/replication.c | 6 ++++++ |
14 | qemu-io-cmds.c | 3 +++ | ||
15 | 3 files changed, 25 insertions(+), 7 deletions(-) | ||
15 | 16 | ||
16 | diff --git a/block/mirror.c b/block/mirror.c | 17 | diff --git a/block.c b/block.c |
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/mirror.c | 19 | --- a/block.c |
19 | +++ b/block/mirror.c | 20 | +++ b/block.c |
20 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, |
21 | #include "qemu/osdep.h" | 22 | * returns a pointer to bs_queue, which is either the newly allocated |
22 | #include "qemu/cutils.h" | 23 | * bs_queue, or the existing bs_queue being used. |
23 | #include "qemu/coroutine.h" | 24 | * |
24 | +#include "qemu/range.h" | 25 | + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). |
25 | #include "trace.h" | 26 | */ |
26 | #include "block/blockjob_int.h" | 27 | static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
27 | #include "block/block_int.h" | 28 | BlockDriverState *bs, |
28 | @@ -XXX,XX +XXX,XX @@ struct MirrorOp { | 29 | @@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
29 | * mirror_co_discard() before yielding for the first time */ | 30 | BdrvChild *child; |
30 | int64_t *bytes_handled; | 31 | QDict *old_options, *explicit_options; |
31 | 32 | ||
32 | + bool is_pseudo_op; | 33 | + /* Make sure that the caller remembered to use a drained section. This is |
33 | CoQueue waiting_requests; | 34 | + * important to avoid graph changes between the recursive queuing here and |
34 | 35 | + * bdrv_reopen_multiple(). */ | |
35 | QTAILQ_ENTRY(MirrorOp) next; | 36 | + assert(bs->quiesce_counter > 0); |
36 | @@ -XXX,XX +XXX,XX @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, | 37 | + |
38 | if (bs_queue == NULL) { | ||
39 | bs_queue = g_new0(BlockReopenQueue, 1); | ||
40 | QSIMPLEQ_INIT(bs_queue); | ||
41 | @@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, | ||
42 | * If all devices prepare successfully, then the changes are committed | ||
43 | * to all devices. | ||
44 | * | ||
45 | + * All affected nodes must be drained between bdrv_reopen_queue() and | ||
46 | + * bdrv_reopen_multiple(). | ||
47 | */ | ||
48 | int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) | ||
49 | { | ||
50 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er | ||
51 | |||
52 | assert(bs_queue != NULL); | ||
53 | |||
54 | - aio_context_release(ctx); | ||
55 | - bdrv_drain_all_begin(); | ||
56 | - aio_context_acquire(ctx); | ||
57 | - | ||
58 | QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { | ||
59 | + assert(bs_entry->state.bs->quiesce_counter > 0); | ||
60 | if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { | ||
61 | error_propagate(errp, local_err); | ||
62 | goto cleanup; | ||
63 | @@ -XXX,XX +XXX,XX @@ cleanup: | ||
37 | } | 64 | } |
38 | } | 65 | g_free(bs_queue); |
39 | 66 | ||
40 | +static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, | 67 | - bdrv_drain_all_end(); |
41 | + MirrorBlockJob *s, | 68 | - |
42 | + uint64_t offset, | ||
43 | + uint64_t bytes) | ||
44 | +{ | ||
45 | + uint64_t self_start_chunk = offset / s->granularity; | ||
46 | + uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); | ||
47 | + uint64_t self_nb_chunks = self_end_chunk - self_start_chunk; | ||
48 | + | ||
49 | + while (find_next_bit(s->in_flight_bitmap, self_end_chunk, | ||
50 | + self_start_chunk) < self_end_chunk && | ||
51 | + s->ret >= 0) | ||
52 | + { | ||
53 | + MirrorOp *op; | ||
54 | + | ||
55 | + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { | ||
56 | + uint64_t op_start_chunk = op->offset / s->granularity; | ||
57 | + uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes, | ||
58 | + s->granularity) - | ||
59 | + op_start_chunk; | ||
60 | + | ||
61 | + if (op == self) { | ||
62 | + continue; | ||
63 | + } | ||
64 | + | ||
65 | + if (ranges_overlap(self_start_chunk, self_nb_chunks, | ||
66 | + op_start_chunk, op_nb_chunks)) | ||
67 | + { | ||
68 | + qemu_co_queue_wait(&op->waiting_requests, NULL); | ||
69 | + break; | ||
70 | + } | ||
71 | + } | ||
72 | + } | ||
73 | +} | ||
74 | + | ||
75 | static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) | ||
76 | { | ||
77 | MirrorBlockJob *s = op->s; | ||
78 | @@ -XXX,XX +XXX,XX @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, | ||
79 | return ret; | 69 | return ret; |
80 | } | 70 | } |
81 | 71 | ||
82 | -static inline void mirror_wait_for_io(MirrorBlockJob *s) | 72 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) |
83 | +static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) | ||
84 | { | 73 | { |
85 | MirrorOp *op; | 74 | int ret = -1; |
86 | 75 | Error *local_err = NULL; | |
87 | - op = QTAILQ_FIRST(&s->ops_in_flight); | 76 | - BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); |
88 | - assert(op); | 77 | + BlockReopenQueue *queue; |
89 | - qemu_co_queue_wait(&op->waiting_requests, NULL); | 78 | |
90 | + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { | 79 | + bdrv_subtree_drained_begin(bs); |
91 | + /* Do not wait on pseudo ops, because it may in turn wait on | 80 | + |
92 | + * some other operation to start, which may in fact be the | 81 | + queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); |
93 | + * caller of this function. Since there is only one pseudo op | 82 | ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); |
94 | + * at any given time, we will always find some real operation | 83 | if (local_err != NULL) { |
95 | + * to wait on. */ | 84 | error_propagate(errp, local_err); |
96 | + if (!op->is_pseudo_op) { | 85 | } |
97 | + qemu_co_queue_wait(&op->waiting_requests, NULL); | 86 | + |
98 | + return; | 87 | + bdrv_subtree_drained_end(bs); |
99 | + } | 88 | + |
100 | + } | 89 | return ret; |
101 | + abort(); | ||
102 | } | 90 | } |
103 | 91 | ||
104 | /* Perform a mirror copy operation. | 92 | diff --git a/block/replication.c b/block/replication.c |
105 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_co_read(void *opaque) | 93 | index XXXXXXX..XXXXXXX 100644 |
106 | 94 | --- a/block/replication.c | |
107 | while (s->buf_free_count < nb_chunks) { | 95 | +++ b/block/replication.c |
108 | trace_mirror_yield_in_flight(s, op->offset, s->in_flight); | 96 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, |
109 | - mirror_wait_for_io(s); | 97 | new_secondary_flags = s->orig_secondary_flags; |
110 | + mirror_wait_for_free_in_flight_slot(s); | ||
111 | } | 98 | } |
112 | 99 | ||
113 | /* Now make a QEMUIOVector taking enough granularity-sized chunks | 100 | + bdrv_subtree_drained_begin(s->hidden_disk->bs); |
114 | @@ -XXX,XX +XXX,XX @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, | 101 | + bdrv_subtree_drained_begin(s->secondary_disk->bs); |
115 | static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | 102 | + |
116 | { | 103 | if (orig_hidden_flags != new_hidden_flags) { |
117 | BlockDriverState *source = s->source; | 104 | reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, |
118 | - int64_t offset, first_chunk; | 105 | new_hidden_flags); |
119 | - uint64_t delay_ns = 0; | 106 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, |
120 | + MirrorOp *pseudo_op; | 107 | reopen_queue, &local_err); |
121 | + int64_t offset; | 108 | error_propagate(errp, local_err); |
122 | + uint64_t delay_ns = 0, ret = 0; | ||
123 | /* At least the first dirty chunk is mirrored in one iteration. */ | ||
124 | int nb_chunks = 1; | ||
125 | bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); | ||
126 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
127 | } | 109 | } |
128 | bdrv_dirty_bitmap_unlock(s->dirty_bitmap); | ||
129 | |||
130 | - first_chunk = offset / s->granularity; | ||
131 | - while (test_bit(first_chunk, s->in_flight_bitmap)) { | ||
132 | - trace_mirror_yield_in_flight(s, offset, s->in_flight); | ||
133 | - mirror_wait_for_io(s); | ||
134 | - } | ||
135 | + mirror_wait_on_conflicts(NULL, s, offset, 1); | ||
136 | |||
137 | job_pause_point(&s->common.job); | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
140 | nb_chunks * s->granularity); | ||
141 | bdrv_dirty_bitmap_unlock(s->dirty_bitmap); | ||
142 | |||
143 | + /* Before claiming an area in the in-flight bitmap, we have to | ||
144 | + * create a MirrorOp for it so that conflicting requests can wait | ||
145 | + * for it. mirror_perform() will create the real MirrorOps later, | ||
146 | + * for now we just create a pseudo operation that will wake up all | ||
147 | + * conflicting requests once all real operations have been | ||
148 | + * launched. */ | ||
149 | + pseudo_op = g_new(MirrorOp, 1); | ||
150 | + *pseudo_op = (MirrorOp){ | ||
151 | + .offset = offset, | ||
152 | + .bytes = nb_chunks * s->granularity, | ||
153 | + .is_pseudo_op = true, | ||
154 | + }; | ||
155 | + qemu_co_queue_init(&pseudo_op->waiting_requests); | ||
156 | + QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next); | ||
157 | + | 110 | + |
158 | bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); | 111 | + bdrv_subtree_drained_end(s->hidden_disk->bs); |
159 | while (nb_chunks > 0 && offset < s->bdev_length) { | 112 | + bdrv_subtree_drained_end(s->secondary_disk->bs); |
160 | int ret; | 113 | } |
161 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | 114 | |
162 | 115 | static void backup_job_cleanup(BlockDriverState *bs) | |
163 | while (s->in_flight >= MAX_IN_FLIGHT) { | 116 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c |
164 | trace_mirror_yield_in_flight(s, offset, s->in_flight); | 117 | index XXXXXXX..XXXXXXX 100644 |
165 | - mirror_wait_for_io(s); | 118 | --- a/qemu-io-cmds.c |
166 | + mirror_wait_for_free_in_flight_slot(s); | 119 | +++ b/qemu-io-cmds.c |
167 | } | 120 | @@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) |
168 | 121 | opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; | |
169 | if (s->ret < 0) { | 122 | qemu_opts_reset(&reopen_opts); |
170 | - return 0; | 123 | |
171 | + ret = 0; | 124 | + bdrv_subtree_drained_begin(bs); |
172 | + goto fail; | 125 | brq = bdrv_reopen_queue(NULL, bs, opts, flags); |
173 | } | 126 | bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); |
174 | 127 | + bdrv_subtree_drained_end(bs); | |
175 | io_bytes = mirror_clip_bytes(s, offset, io_bytes); | ||
176 | @@ -XXX,XX +XXX,XX @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
177 | nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); | ||
178 | delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); | ||
179 | } | ||
180 | - return delay_ns; | ||
181 | + | 128 | + |
182 | + ret = delay_ns; | 129 | if (local_err) { |
183 | +fail: | 130 | error_report_err(local_err); |
184 | + QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); | 131 | } else { |
185 | + qemu_co_queue_restart_all(&pseudo_op->waiting_requests); | ||
186 | + g_free(pseudo_op); | ||
187 | + | ||
188 | + return ret; | ||
189 | } | ||
190 | |||
191 | static void mirror_free_init(MirrorBlockJob *s) | ||
192 | @@ -XXX,XX +XXX,XX @@ static void mirror_free_init(MirrorBlockJob *s) | ||
193 | static void mirror_wait_for_all_io(MirrorBlockJob *s) | ||
194 | { | ||
195 | while (s->in_flight > 0) { | ||
196 | - mirror_wait_for_io(s); | ||
197 | + mirror_wait_for_free_in_flight_slot(s); | ||
198 | } | ||
199 | } | ||
200 | |||
201 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) | ||
202 | if (s->in_flight >= MAX_IN_FLIGHT) { | ||
203 | trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, | ||
204 | s->in_flight); | ||
205 | - mirror_wait_for_io(s); | ||
206 | + mirror_wait_for_free_in_flight_slot(s); | ||
207 | continue; | ||
208 | } | ||
209 | |||
210 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_run(void *opaque) | ||
211 | if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || | ||
212 | (cnt == 0 && s->in_flight > 0)) { | ||
213 | trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); | ||
214 | - mirror_wait_for_io(s); | ||
215 | + mirror_wait_for_free_in_flight_slot(s); | ||
216 | continue; | ||
217 | } else if (cnt != 0) { | ||
218 | delay_ns = mirror_iteration(s); | ||
219 | -- | 132 | -- |
220 | 2.13.6 | 133 | 2.13.6 |
221 | 134 | ||
222 | 135 | diff view generated by jsdifflib |